From 3c167c8a9a6bd148f83bea0d68765ed389345e0a Mon Sep 17 00:00:00 2001 From: michelle0927 Date: Thu, 30 Jan 2025 14:48:35 -0500 Subject: [PATCH 1/3] new components --- .../get-credit-status/get-credit-status.mjs | 19 ++++ .../get-website-content.mjs | 56 +++++++++++ components/zenscrape/package.json | 8 +- .../website-content-updated.mjs | 92 +++++++++++++++++++ components/zenscrape/zenscrape.app.mjs | 64 ++++++++++++- 5 files changed, 233 insertions(+), 6 deletions(-) create mode 100644 components/zenscrape/actions/get-credit-status/get-credit-status.mjs create mode 100644 components/zenscrape/actions/get-website-content/get-website-content.mjs create mode 100644 components/zenscrape/sources/website-content-updated/website-content-updated.mjs diff --git a/components/zenscrape/actions/get-credit-status/get-credit-status.mjs b/components/zenscrape/actions/get-credit-status/get-credit-status.mjs new file mode 100644 index 0000000000000..22ff1bf4b05d8 --- /dev/null +++ b/components/zenscrape/actions/get-credit-status/get-credit-status.mjs @@ -0,0 +1,19 @@ +import zenscrape from "../../zenscrape.app.mjs"; + +export default { + key: "zenscrape-get-credit-status", + name: "Get Credit Status", + description: "Retrieve the number of remaining credits in Zenscrape. [See the documentation](https://app.zenscrape.com/documentation)", + version: "0.0.1", + type: "action", + props: { + zenscrape, + }, + async run({ $ }) { + const response = await this.zenscrape.getStatus({ + $, + }); + $.export("$summary", "Successfully retrieved credit status."); + return response; + }, +}; diff --git a/components/zenscrape/actions/get-website-content/get-website-content.mjs b/components/zenscrape/actions/get-website-content/get-website-content.mjs new file mode 100644 index 0000000000000..dc1195f15d070 --- /dev/null +++ b/components/zenscrape/actions/get-website-content/get-website-content.mjs @@ -0,0 +1,56 @@ +import zenscrape from "../../zenscrape.app.mjs"; + +export default { + key: "zenscrape-get-website-content", + name: "Get Website Content", + description: "Retrieve the content of a website. [See the documentation](https://app.zenscrape.com/documentation)", + version: "0.0.1", + type: "action", + props: { + zenscrape, + url: { + propDefinition: [ + zenscrape, + "url", + ], + }, + premium: { + propDefinition: [ + zenscrape, + "premium", + ], + }, + location: { + propDefinition: [ + zenscrape, + "location", + ], + }, + keepHeaders: { + propDefinition: [ + zenscrape, + "keepHeaders", + ], + }, + render: { + propDefinition: [ + zenscrape, + "render", + ], + }, + }, + async run({ $ }) { + const response = await this.zenscrape.getContent({ + $, + params: { + url: this.url, + premium: this.premium, + location: this.location, + keep_headers: this.keepHeaders, + render: this.render, + }, + }); + $.export("$summary", `Successfully scraped website \`${this.url}.\``); + return response; + }, +}; diff --git a/components/zenscrape/package.json b/components/zenscrape/package.json index 71b652aa0ff86..71477b31cfd7b 100644 --- a/components/zenscrape/package.json +++ b/components/zenscrape/package.json @@ -1,6 +1,6 @@ { "name": "@pipedream/zenscrape", - "version": "0.0.1", + "version": "0.1.0", "description": "Pipedream Zenscrape Components", "main": "zenscrape.app.mjs", "keywords": [ @@ -11,5 +11,9 @@ "author": "Pipedream (https://pipedream.com/)", "publishConfig": { "access": "public" + }, + "dependencies": { + "@pipedream/platform": "^3.0.3", + "md5": "^2.3.0" } -} \ No newline at end of file +} diff --git a/components/zenscrape/sources/website-content-updated/website-content-updated.mjs b/components/zenscrape/sources/website-content-updated/website-content-updated.mjs new file mode 100644 index 0000000000000..1516af999ad88 --- /dev/null +++ b/components/zenscrape/sources/website-content-updated/website-content-updated.mjs @@ -0,0 +1,92 @@ +import zenscrape from "../../zenscrape.app.mjs"; +import { DEFAULT_POLLING_SOURCE_TIMER_INTERVAL } from "@pipedream/platform"; +import md5 from "md5"; + +export default { + key: "zenscrape-website-content-updated", + name: "Website Content Updated", + description: "Emit new event when the content of a URL has updated. [See the documentation](https://app.zenscrape.com/documentation)", + version: "0.0.1", + type: "source", + dedupe: "unique", + props: { + zenscrape, + db: "$.service.db", + timer: { + type: "$.interface.timer", + default: { + intervalSeconds: DEFAULT_POLLING_SOURCE_TIMER_INTERVAL, + }, + }, + url: { + propDefinition: [ + zenscrape, + "url", + ], + }, + premium: { + propDefinition: [ + zenscrape, + "premium", + ], + }, + location: { + propDefinition: [ + zenscrape, + "location", + ], + }, + keepHeaders: { + propDefinition: [ + zenscrape, + "keepHeaders", + ], + }, + render: { + propDefinition: [ + zenscrape, + "render", + ], + }, + }, + methods: { + _getContentHash() { + return this.db.get("contentHash"); + }, + _setContentHash(contentHash) { + this.db.set("contentHash", contentHash); + }, + generateMeta() { + const ts = Date.now(); + return { + id: ts, + summary: "Website Content Updated", + ts, + }; + }, + }, + async run() { + const contentHash = this._getContentHash(); + + const content = await this.zenscrape.getContent({ + params: { + url: this.url, + premium: this.premium, + location: this.location, + keep_headers: this.keepHeaders, + render: this.render, + }, + }); + + const newContentHash = md5(JSON.stringify(content)); + + if (newContentHash === contentHash) { + return; + } + + this._setContentHash(newContentHash); + + const meta = this.generateMeta(); + this.$emit(content, meta); + }, +}; diff --git a/components/zenscrape/zenscrape.app.mjs b/components/zenscrape/zenscrape.app.mjs index 4ac729c665b47..0b59e381202fe 100644 --- a/components/zenscrape/zenscrape.app.mjs +++ b/components/zenscrape/zenscrape.app.mjs @@ -1,11 +1,67 @@ +import { axios } from "@pipedream/platform"; + export default { type: "app", app: "zenscrape", - propDefinitions: {}, + propDefinitions: { + url: { + type: "string", + label: "URL", + description: "The target site you want to scrape", + }, + premium: { + type: "boolean", + label: "Premium", + description: "Uses residential proxies, unlocks sites that are hard to scrape. Counts as 20 credits towards your quota.", + optional: true, + }, + location: { + type: "string", + label: "Location", + description: "If premium=`false` possible locations are 'na' (North America) and 'eu' (Europe). If premium=`true` you can choose a location from Zenscrape's [list of 230+ countries](https://app.zenscrape.com/documentation#proxyLocationList)", + optional: true, + }, + keepHeaders: { + type: "boolean", + label: "Keep Headers", + description: "Allow to pass through forward headers (e.g. user agents, cookies)", + optional: true, + }, + render: { + type: "boolean", + label: "Render", + description: "Use a headless browser to fetch content that relies on javascript. Counts as 5 credits towards your quota.", + optional: true, + }, + }, methods: { - // this.$auth contains connected account data - authKeys() { - console.log(Object.keys(this.$auth)); + _baseUrl() { + return "https://app.zenscrape.com/api/v1"; + }, + _makeRequest({ + $ = this, + path, + ...opts + }) { + return axios($, { + url: `${this._baseUrl()}${path}`, + headers: { + apikey: this.$auth.api_key, + }, + ...opts, + }); + }, + getContent(opts = {}) { + return this._makeRequest({ + path: "/get", + ...opts, + }); + }, + getStatus(opts = {}) { + return this._makeRequest({ + path: "/status", + ...opts, + }); }, }, }; From 0e6030b5ee9bfaa9f42fd6e985af2a6d756cd1ea Mon Sep 17 00:00:00 2001 From: michelle0927 Date: Thu, 30 Jan 2025 14:51:18 -0500 Subject: [PATCH 2/3] pnpm-lock.yaml --- pnpm-lock.yaml | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 3f5858a965ed2..ba9decf4d2cf7 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -292,8 +292,7 @@ importers: specifier: ^3.0.1 version: 3.0.3 - components/adobe_document_generation_api: - specifiers: {} + components/adobe_document_generation_api: {} components/adobe_pdf_services: dependencies: @@ -10501,8 +10500,7 @@ importers: components/syncro: {} - components/synthflow: - specifiers: {} + components/synthflow: {} components/t2m_url_shortener: {} @@ -12216,7 +12214,14 @@ importers: specifier: ^1.5.1 version: 1.6.6 - components/zenscrape: {} + components/zenscrape: + dependencies: + '@pipedream/platform': + specifier: ^3.0.3 + version: 3.0.3 + md5: + specifier: ^2.3.0 + version: 2.3.0 components/zenserp: {} @@ -31754,6 +31759,8 @@ snapshots: '@putout/operator-filesystem': 5.0.0(putout@36.13.1(eslint@8.57.1)(typescript@5.6.3)) '@putout/operator-json': 2.2.0 putout: 36.13.1(eslint@8.57.1)(typescript@5.6.3) + transitivePeerDependencies: + - supports-color '@putout/operator-regexp@1.0.0(putout@36.13.1(eslint@8.57.1)(typescript@5.6.3))': dependencies: From 8f4ddc376ed42c786ad158bb93a26c26fe920174 Mon Sep 17 00:00:00 2001 From: michelle0927 Date: Mon, 3 Feb 2025 16:23:20 -0500 Subject: [PATCH 3/3] remove source --- .../website-content-updated.mjs | 92 ------------------- 1 file changed, 92 deletions(-) delete mode 100644 components/zenscrape/sources/website-content-updated/website-content-updated.mjs diff --git a/components/zenscrape/sources/website-content-updated/website-content-updated.mjs b/components/zenscrape/sources/website-content-updated/website-content-updated.mjs deleted file mode 100644 index 1516af999ad88..0000000000000 --- a/components/zenscrape/sources/website-content-updated/website-content-updated.mjs +++ /dev/null @@ -1,92 +0,0 @@ -import zenscrape from "../../zenscrape.app.mjs"; -import { DEFAULT_POLLING_SOURCE_TIMER_INTERVAL } from "@pipedream/platform"; -import md5 from "md5"; - -export default { - key: "zenscrape-website-content-updated", - name: "Website Content Updated", - description: "Emit new event when the content of a URL has updated. [See the documentation](https://app.zenscrape.com/documentation)", - version: "0.0.1", - type: "source", - dedupe: "unique", - props: { - zenscrape, - db: "$.service.db", - timer: { - type: "$.interface.timer", - default: { - intervalSeconds: DEFAULT_POLLING_SOURCE_TIMER_INTERVAL, - }, - }, - url: { - propDefinition: [ - zenscrape, - "url", - ], - }, - premium: { - propDefinition: [ - zenscrape, - "premium", - ], - }, - location: { - propDefinition: [ - zenscrape, - "location", - ], - }, - keepHeaders: { - propDefinition: [ - zenscrape, - "keepHeaders", - ], - }, - render: { - propDefinition: [ - zenscrape, - "render", - ], - }, - }, - methods: { - _getContentHash() { - return this.db.get("contentHash"); - }, - _setContentHash(contentHash) { - this.db.set("contentHash", contentHash); - }, - generateMeta() { - const ts = Date.now(); - return { - id: ts, - summary: "Website Content Updated", - ts, - }; - }, - }, - async run() { - const contentHash = this._getContentHash(); - - const content = await this.zenscrape.getContent({ - params: { - url: this.url, - premium: this.premium, - location: this.location, - keep_headers: this.keepHeaders, - render: this.render, - }, - }); - - const newContentHash = md5(JSON.stringify(content)); - - if (newContentHash === contentHash) { - return; - } - - this._setContentHash(newContentHash); - - const meta = this.generateMeta(); - this.$emit(content, meta); - }, -};