Skip to content

Commit

Permalink
feat: Add screenshot storage support to broken link checker (#123)
Browse files Browse the repository at this point in the history
* get-cloud-region (#100)

Add functionality to synthetics-sdk-api to extract cloud region during GCF execution

* stoage proto api (#101)

* expose resolveProjectId (#104)

* update to capture_condition (#109)

* chore(deps): bump ip from 1.1.8 to 1.1.9 (#105)

* chore(deps): bump ip from 1.1.8 to 1.1.9

Bumps [ip](https://github.com/indutny/node-ip) from 1.1.8 to 1.1.9.
- [Commits](indutny/node-ip@v1.1.8...v1.1.9)

---
updated-dependencies:
- dependency-name: ip
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>

* Empty-Commit

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Adam Weidman <adamfweidman@google.com>

* add samples tags (#108)

* blc-api-integration-def (#102)

* resolveProjectId present (#106)

* take-screenshots (#107)

* rebase-capture-condition (#110)

* refactor-integrations (#112)

* sanitize strings (#113)

* Take and populate screenshot (#114)

* screenshots-prop

* broken_links.spec working

* fix naming

* pass-args

* response to comments

* change default (#118)

* update synthetics-sdk-api to point to new npm pkg

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
  • Loading branch information
adamfweidman and dependabot[bot] authored Apr 19, 2024
1 parent 6c7fedb commit cd34e35
Show file tree
Hide file tree
Showing 17 changed files with 2,210 additions and 581 deletions.
803 changes: 714 additions & 89 deletions package-lock.json

Large diffs are not rendered by default.

8 changes: 6 additions & 2 deletions packages/synthetics-sdk-broken-links/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,24 @@
"@types/chai": "^4.3.4",
"@types/express": "^4.17.17",
"@types/node": "^18.15.10",
"@types/proxyquire": "^1.3.31",
"@types/sinon": "^10.0.16",
"@types/supertest": "^2.0.12",
"chai": "^4.3.7",
"chai-exclude": "^2.1.0",
"express": "^4.18.2",
"sinon": "^15.2.0",
"proxyquire": "^2.1.3",
"node-mocks-http": "^1.13.0",
"sinon": "^16.1.1",
"supertest": "^6.3.3",
"synthetics-sdk-broken-links": "file:./"
},
"engines": {
"node": ">=18"
},
"dependencies": {
"@google-cloud/synthetics-sdk-api": "^0.5.1",
"@google-cloud/storage": "^7.7.0",
"@google-cloud/synthetics-sdk-api": "^0.6.0",
"puppeteer": "21.3.6"
}
}
87 changes: 65 additions & 22 deletions packages/synthetics-sdk-broken-links/src/broken_links.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.

import puppeteer, { Browser, Page } from 'puppeteer';
// Internal Project Files
import {
BaseError,
BrokenLinksResultV1_BrokenLinkCheckerOptions,
BrokenLinksResultV1_SyntheticLinkResult,
instantiateMetadata,
getRuntimeMetadata,
instantiateMetadata,
SyntheticResult,
} from '@google-cloud/synthetics-sdk-api';
import {
Expand All @@ -32,10 +33,19 @@ import {
checkLinks,
closeBrowser,
closePagePool,
retrieveLinksFromPage,
openNewPage,
retrieveLinksFromPage,
} from './navigation_func';
import { setDefaultOptions, validateInputOptions } from './options_func';
import { processOptions } from './options_func';
import {
createStorageClientIfStorageSelected,
getOrCreateStorageBucket,
StorageParameters,
} from './storage_func';

// External Dependencies
import { Bucket } from '@google-cloud/storage';
import puppeteer, { Browser, Page } from 'puppeteer';

export interface BrokenLinkCheckerOptions {
origin_uri: string;
Expand All @@ -48,6 +58,7 @@ export interface BrokenLinkCheckerOptions {
wait_for_selector?: string;
per_link_options?: { [key: string]: PerLinkOption };
total_synthetic_timeout_millis?: number;
screenshot_options?: ScreenshotOptions;
}

export interface PerLinkOption {
Expand All @@ -70,6 +81,17 @@ export enum StatusClass {
STATUS_CLASS_ANY = 'STATUS_CLASS_ANY',
}

export interface ScreenshotOptions {
storage_location?: string;
capture_condition?: CaptureCondition;
}

export enum CaptureCondition {
NONE = 'NONE',
FAILING = 'FAILING',
ALL = 'ALL',
}

let synthetics_sdk_broken_links_package;
try {
synthetics_sdk_broken_links_package = require('../package.json');
Expand All @@ -79,7 +101,11 @@ try {
instantiateMetadata(synthetics_sdk_broken_links_package);

export async function runBrokenLinks(
inputOptions: BrokenLinkCheckerOptions
inputOptions: BrokenLinkCheckerOptions,
args: {
executionId: string | undefined;
checkId: string | undefined;
}
): Promise<SyntheticResult> {
// init
const startTime = new Date().toISOString();
Expand All @@ -96,6 +122,30 @@ export async function runBrokenLinks(
const [timeLimitPromise, timeLimitTimeout, timeLimitresolver] =
getTimeLimitPromise(startTime, adjusted_synthetic_timeout_millis);

const errors: BaseError[] = [];

// Initialize Storage Client with Error Handling. Set to `null` if
// capture_condition is 'None'
const storageClient = createStorageClientIfStorageSelected(
errors,
options.screenshot_options!.capture_condition
);

// // Bucket Validation
const bucket: Bucket | null = await getOrCreateStorageBucket(
storageClient,
options.screenshot_options!.storage_location,
errors
);

const storageParams: StorageParameters = {
storageClient: storageClient,
bucket: bucket,
checkId: args.checkId || '_',
executionId: args.executionId || '_',
screenshotNumber: 1,
};

const followed_links: BrokenLinksResultV1_SyntheticLinkResult[] = [];

const checkLinksPromise = async () => {
Expand All @@ -109,7 +159,8 @@ export async function runBrokenLinks(
originPage,
options,
startTime,
adjusted_synthetic_timeout_millis
adjusted_synthetic_timeout_millis,
storageParams
)
);

Expand All @@ -131,7 +182,8 @@ export async function runBrokenLinks(
linksToFollow,
options,
startTime,
adjusted_synthetic_timeout_millis
adjusted_synthetic_timeout_millis,
storageParams
))
);
return true;
Expand All @@ -149,7 +201,9 @@ export async function runBrokenLinks(
startTime,
runtime_metadata,
options,
followed_links
followed_links,
storageParams,
errors
);
} catch (err) {
const errorMessage =
Expand All @@ -176,7 +230,8 @@ async function checkOriginLink(
originPage: Page,
options: BrokenLinksResultV1_BrokenLinkCheckerOptions,
startTime: string,
adjusted_synthetic_timeout_millis: number
adjusted_synthetic_timeout_millis: number,
storageParams: StorageParameters
): Promise<BrokenLinksResultV1_SyntheticLinkResult> {
let originLinkResult: BrokenLinksResultV1_SyntheticLinkResult;

Expand All @@ -193,6 +248,7 @@ async function checkOriginLink(
originPage,
{ target_uri: options.origin_uri, anchor_text: '', html_element: '' },
options,
storageParams,
true
);

Expand Down Expand Up @@ -263,16 +319,3 @@ async function scrapeLinks(
options.link_order
);
}

/**
* Validates input options and sets defaults in `options`.
*
* @param inputOptions - The input options for the broken link checker.
* @returns The processed broken link checker options.
*/
function processOptions(
inputOptions: BrokenLinkCheckerOptions
): BrokenLinksResultV1_BrokenLinkCheckerOptions {
const validOptions = validateInputOptions(inputOptions);
return setDefaultOptions(validOptions);
}
15 changes: 13 additions & 2 deletions packages/synthetics-sdk-broken-links/src/handlers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.

import { runBrokenLinks, BrokenLinkCheckerOptions } from './broken_links';
// Standard Libraries
import { Request, Response } from 'express';

// Internal Project Files
import { runBrokenLinks, BrokenLinkCheckerOptions } from './broken_links';

const syntheticExecutionIdHeader = 'Synthetic-Execution-Id';
const checkIdHeader = 'Check-Id';

/**
* Middleware for easy invocation of SyntheticSDK broken links, and may be used to
* register a GoogleCloudFunction http function, or express js compatible handler.
Expand All @@ -26,5 +32,10 @@ import { Request, Response } from 'express';
export function runBrokenLinksHandler(options: BrokenLinkCheckerOptions) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
return async (req: Request, res: Response): Promise<any> =>
res.send(await runBrokenLinks(options));
res.send(
await runBrokenLinks(options, {
executionId: req.get(syntheticExecutionIdHeader),
checkId: req.get(checkIdHeader),
})
);
}
4 changes: 2 additions & 2 deletions packages/synthetics-sdk-broken-links/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
// limitations under the License.

export {
runBrokenLinks,
BrokenLinkCheckerOptions,
LinkOrder,
PerLinkOption,
runBrokenLinks,
StatusClass,
LinkOrder,
} from './broken_links';
export * from './handlers';
export * from '@google-cloud/synthetics-sdk-api';
107 changes: 105 additions & 2 deletions packages/synthetics-sdk-broken-links/src/link_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.

import { HTTPResponse } from 'puppeteer';
// Standard Libraries
import * as path from 'path';

// Internal Project Files
import {
BaseError,
BrokenLinksResultV1,
BrokenLinksResultV1_BrokenLinkCheckerOptions,
BrokenLinksResultV1_BrokenLinkCheckerOptions_LinkOrder,
BrokenLinksResultV1_BrokenLinkCheckerOptions_ScreenshotOptions_CaptureCondition as ApiCaptureCondition,
BrokenLinksResultV1_SyntheticLinkResult,
GenericResultV1,
getRuntimeMetadata,
Expand All @@ -25,6 +30,10 @@ import {
SyntheticResult,
} from '@google-cloud/synthetics-sdk-api';

// External Dependencies
import { HTTPResponse } from 'puppeteer';
import { StorageParameters } from './storage_func';

/**
* Represents an intermediate link with its properties.
*/
Expand Down Expand Up @@ -153,6 +162,8 @@ function parseFollowedLinks(
options: {} as BrokenLinksResultV1_BrokenLinkCheckerOptions,
origin_link_result: {} as BrokenLinksResultV1_SyntheticLinkResult,
followed_link_results: [],
execution_data_storage_path: '',
errors: [],
};

for (const link of followed_links) {
Expand Down Expand Up @@ -216,12 +227,21 @@ export function createSyntheticResult(
start_time: string,
runtime_metadata: { [key: string]: string },
options: BrokenLinksResultV1_BrokenLinkCheckerOptions,
followed_links: BrokenLinksResultV1_SyntheticLinkResult[]
followed_links: BrokenLinksResultV1_SyntheticLinkResult[],
storageParams: StorageParameters,
errors: BaseError[]
): SyntheticResult {
// Create BrokenLinksResultV1 by parsing followed links and setting options
const broken_links_result: BrokenLinksResultV1 =
parseFollowedLinks(followed_links);
broken_links_result.options = options;
broken_links_result.errors = errors;
broken_links_result.execution_data_storage_path = storageParams.bucket
? 'gs://' +
storageParams.bucket.name +
'/' +
getStoragePathToExecution(storageParams, options)
: '';

// Create SyntheticResult object
const synthetic_result: SyntheticResult = {
Expand Down Expand Up @@ -264,6 +284,89 @@ export function shuffleAndTruncate(
return linksToFollow.slice(0, link_limit! - 1);
}

/**
* Determines whether a screenshot should be taken based on screenshot options and link result.
*
* @param options - BrokenLinksResultV1_BrokenLinkCheckerOptions
* @param passed - boolean indicating whether the link navigation succeeded
* @returns true if a screenshot should be taken, false otherwise
*/
export function shouldTakeScreenshot(
options: BrokenLinksResultV1_BrokenLinkCheckerOptions,
passed: boolean
): boolean {
return (
options.screenshot_options!.capture_condition === ApiCaptureCondition.ALL ||
(options.screenshot_options!.capture_condition ===
ApiCaptureCondition.FAILING &&
!passed)
);
}

/**
* Sanitizes an object name string for safe use, ensuring compliance with
* naming restrictions.
*
* @param {string} inputString - The original object name string.
* @returns {string} The sanitized object name.
*
* **Sanitization Rules:**
* * Removes control characters ([\u007F-\u009F]).
* * Removes disallowed characters (#, [, ], *, ?, ", <, >, |, /).
* * Replaces the forbidden prefix ".well-known/acme-challenge/" with an underscore.
* * Replaces standalone occurrences of "." or ".." with an underscore.
*/
export function sanitizeObjectName(
inputString: string | null | undefined
): string {
if (!inputString || inputString === '.' || inputString === '..') return '_';

// Regular expressions for:
/*eslint no-useless-escape: "off"*/
const invalidCharactersRegex = /[\r\n\u007F-\u009F#\[\]*?:"<>|/]/g; // Control characters, special characters, path separator
const wellKnownPrefixRegex = /^\.well-known\/acme-challenge\//;

// Core sanitization:
return inputString
.replace(wellKnownPrefixRegex, '_') // Replace forbidden prefix
.replace(invalidCharactersRegex, '_') // replace invalid characters
.trim() // Clean up any leading/trailing spaces
.replace(/\s+/g, '_'); // Replace one or more spaces with underscores
}

export function getStoragePathToExecution(
storageParams: StorageParameters,
options: BrokenLinksResultV1_BrokenLinkCheckerOptions
) {
try {
const storageLocation = options.screenshot_options!.storage_location;
let writeDestination = '';

// extract folder name for a given storage location. If there is no '/'
// present then the storageLocation is just a folder
const firstSlashIndex = storageLocation.indexOf('/');
if (firstSlashIndex !== -1) {
writeDestination = storageLocation.substring(firstSlashIndex + 1);
}

// Ensure writeDestination ends with a slash for proper path joining
if (writeDestination && !writeDestination.endsWith('/')) {
writeDestination += '/';
}

writeDestination = path.join(
writeDestination,
storageParams.checkId,
storageParams.executionId
);

return writeDestination;
} catch (err) {
return '';
}
}

export function getTimeLimitPromise(
startTime: string,
totalTimeoutMillis: number,
Expand Down
Loading

0 comments on commit cd34e35

Please # to comment.