From d381d090e3d11f9cd6b991d98f32c2d33cae15c8 Mon Sep 17 00:00:00 2001 From: Johann Schopplich Date: Sat, 12 Aug 2023 19:42:15 +0200 Subject: [PATCH] perf: use modern pdfjs bundle --- src/image.ts | 4 ++-- src/index.node.ts | 6 +++--- src/index.web.ts | 6 +++--- src/utils.ts | 29 +++++++---------------------- 4 files changed, 15 insertions(+), 30 deletions(-) diff --git a/src/image.ts b/src/image.ts index 184de96..6601c6a 100644 --- a/src/image.ts +++ b/src/image.ts @@ -1,4 +1,4 @@ -import { getDocumentProxy, getResolvedPDFJSInstance } from './utils' +import { getDocumentProxy, getResolvedPDFJS } from './utils' export async function getImagesFromPage( data: ArrayBuffer, @@ -7,7 +7,7 @@ export async function getImagesFromPage( const pdf = await getDocumentProxy(data) const page = await pdf.getPage(pageNumber) const operatorList = await page.getOperatorList() - const { OPS } = getResolvedPDFJSInstance() + const { OPS } = getResolvedPDFJS() const images: ArrayBuffer[] = [] for (const op of operatorList.fnArray) { diff --git a/src/index.node.ts b/src/index.node.ts index e2386f4..5a847cf 100644 --- a/src/index.node.ts +++ b/src/index.node.ts @@ -1,13 +1,13 @@ import { decodePDFText as _decodePDFText } from './text' import { getImagesFromPage as _getImagesFromPage } from './image' -import { resolvePDFJSNodeImports } from './utils' +import { resolvePDFJSImports } from './utils' export const decodePDFText: typeof _decodePDFText = async (...args) => { - await resolvePDFJSNodeImports() + await resolvePDFJSImports() return await _decodePDFText(...args) } export const getImagesFromPage: typeof _getImagesFromPage = async (...args) => { - await resolvePDFJSNodeImports() + await resolvePDFJSImports() return await _getImagesFromPage(...args) } diff --git a/src/index.web.ts b/src/index.web.ts index 6b3a337..5a847cf 100644 --- a/src/index.web.ts +++ b/src/index.web.ts @@ -1,13 +1,13 @@ import { decodePDFText as _decodePDFText } from './text' import { getImagesFromPage as _getImagesFromPage } from './image' -import { resolvePDFJSWebImports } from './utils' +import { resolvePDFJSImports } from './utils' export const decodePDFText: typeof _decodePDFText = async (...args) => { - await resolvePDFJSWebImports() + await resolvePDFJSImports() return await _decodePDFText(...args) } export const getImagesFromPage: typeof _getImagesFromPage = async (...args) => { - await resolvePDFJSWebImports() + await resolvePDFJSImports() return await _getImagesFromPage(...args) } diff --git a/src/utils.ts b/src/utils.ts index 2784020..9bf2747 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -1,9 +1,9 @@ import type * as PDFJS from 'pdfjs-dist' -let instance: typeof PDFJS | undefined +let resolvedModule: typeof PDFJS | undefined export async function getDocumentProxy(data: ArrayBuffer) { - const { getDocument } = getResolvedPDFJSInstance() + const { getDocument } = getResolvedPDFJS() const pdf = await getDocument({ data, useWorkerFetch: false, @@ -14,32 +14,17 @@ export async function getDocumentProxy(data: ArrayBuffer) { return pdf } -export function getResolvedPDFJSInstance() { - return instance! +export function getResolvedPDFJS() { + return resolvedModule! } -export async function resolvePDFJSWebImports() { - if (instance) +export async function resolvePDFJSImports() { + if (resolvedModule) return try { const { default: mod } = await import('pdfjs-dist') - instance = mod - } - catch (error) { - throw new Error( - 'PDF.js is not available. Please add the package as a dependency.', - ) - } -} - -export async function resolvePDFJSNodeImports() { - if (instance) - return - - try { - const { default: mod } = await import('pdfjs-dist/legacy/build/pdf') - instance = mod + resolvedModule = mod } catch (error) { throw new Error(