diff --git a/README.md b/README.md index a1e4d13..288a3f8 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ This library is also intended as a modern alternative to the unmaintained [`pdf- - 🏗️ Conditional exports for Browser, Node and worker environments - 💬 Extract text from PDFs +- 🧱 Use custom PDF.js build ## Installation @@ -38,8 +39,30 @@ const { totalPages, info, metadata, text } = await decodePDFText( ) ``` +## Config + +```ts +interface UnPDFConfiguration { + /** + * By default, UnPDF will use the latest version of PDF.js. If you want to + * use an older version or the legacy build, set a promise that resolves to + * the PDF.js module. + * + * @example + * () => import('pdfjs-dist/legacy/build/pdf.js') + */ + pdfjs?: () => typeof PDFJS +} +``` + ## Methods +### `defineUnPDFConfig` + +```ts +function defineUnPDFConfig({ pdfjs }: UnPDFConfiguration): Promise +``` + ### `decodePDFText` ```ts diff --git a/src/index.node.ts b/src/index.node.ts index 5a847cf..394f9cb 100644 --- a/src/index.node.ts +++ b/src/index.node.ts @@ -2,6 +2,8 @@ import { decodePDFText as _decodePDFText } from './text' import { getImagesFromPage as _getImagesFromPage } from './image' import { resolvePDFJSImports } from './utils' +export { defineUnPDFConfig } from './utils' + export const decodePDFText: typeof _decodePDFText = async (...args) => { await resolvePDFJSImports() return await _decodePDFText(...args) diff --git a/src/index.web.ts b/src/index.web.ts index 5a847cf..394f9cb 100644 --- a/src/index.web.ts +++ b/src/index.web.ts @@ -2,6 +2,8 @@ import { decodePDFText as _decodePDFText } from './text' import { getImagesFromPage as _getImagesFromPage } from './image' import { resolvePDFJSImports } from './utils' +export { defineUnPDFConfig } from './utils' + export const decodePDFText: typeof _decodePDFText = async (...args) => { await resolvePDFJSImports() return await _decodePDFText(...args) diff --git a/src/index.worker.ts b/src/index.worker.ts index f0d151f..3d352c3 100644 --- a/src/index.worker.ts +++ b/src/index.worker.ts @@ -1,6 +1,8 @@ import type { decodePDFText as _decodePDFText } from './text' import type { getImagesFromPage as _getImagesFromPage } from './image' +export { defineUnPDFConfig } from './utils' + export const decodePDFText: typeof _decodePDFText = async () => { throw new Error('Not implemented in worker context yet') } diff --git a/src/types.ts b/src/types.ts index c5257fb..b5231c0 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,3 +1,5 @@ +import type * as PDFJS from 'pdfjs-dist' + export interface PDFContent { totalPages: number info?: Record @@ -5,6 +7,14 @@ export interface PDFContent { text: string | string[] } -export type Prettify = { - [K in keyof T]: T[K]; -} & Record +export interface UnPDFConfiguration { + /** + * By default, UnPDF will use the latest version of PDF.js. If you want to + * use an older version or the legacy build, set a promise that resolves to + * the PDF.js module. + * + * @example + * () => import('pdfjs-dist/legacy/build/pdf.js') + */ + pdfjs?: () => Promise +} diff --git a/src/utils.ts b/src/utils.ts index 9bf2747..792837c 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -1,4 +1,5 @@ import type * as PDFJS from 'pdfjs-dist' +import type { UnPDFConfiguration } from './types' let resolvedModule: typeof PDFJS | undefined @@ -14,6 +15,17 @@ export async function getDocumentProxy(data: ArrayBuffer) { return pdf } +export async function defineUnPDFConfig({ pdfjs }: UnPDFConfiguration) { + if (pdfjs) { + try { + resolvedModule = await pdfjs() + } + catch (error) { + throw new Error('Resolving the PDF.js module failed. Please check your configuration.') + } + } +} + export function getResolvedPDFJS() { return resolvedModule! } @@ -27,8 +39,6 @@ export async function resolvePDFJSImports() { resolvedModule = mod } catch (error) { - throw new Error( - 'PDF.js is not available. Please add the package as a dependency.', - ) + throw new Error('PDF.js is not available. Please add the package as a dependency.') } }