Skip to content

Commit

Permalink
feat: defineUnPDFConfig and custom PDF.js mod
Browse files Browse the repository at this point in the history
  • Loading branch information
johannschopplich committed Aug 12, 2023
1 parent d381d09 commit 289b22f
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 6 deletions.
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ This library is also intended as a modern alternative to the unmaintained [`pdf-

- 🏗️ Conditional exports for Browser, Node and worker environments
- 💬 Extract text from PDFs
- 🧱 Use custom PDF.js build

## Installation

Expand Down Expand Up @@ -38,8 +39,30 @@ const { totalPages, info, metadata, text } = await decodePDFText(
)
```

## Config

```ts
interface UnPDFConfiguration {
/**
* By default, UnPDF will use the latest version of PDF.js. If you want to
* use an older version or the legacy build, set a promise that resolves to
* the PDF.js module.
*
* @example
* () => import('pdfjs-dist/legacy/build/pdf.js')
*/
pdfjs?: () => typeof PDFJS
}
```

## Methods

### `defineUnPDFConfig`

```ts
function defineUnPDFConfig({ pdfjs }: UnPDFConfiguration): Promise<void>
```

### `decodePDFText`

```ts
Expand Down
2 changes: 2 additions & 0 deletions src/index.node.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import { decodePDFText as _decodePDFText } from './text'
import { getImagesFromPage as _getImagesFromPage } from './image'
import { resolvePDFJSImports } from './utils'

export { defineUnPDFConfig } from './utils'

export const decodePDFText: typeof _decodePDFText = async (...args) => {
await resolvePDFJSImports()
return await _decodePDFText(...args)
Expand Down
2 changes: 2 additions & 0 deletions src/index.web.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import { decodePDFText as _decodePDFText } from './text'
import { getImagesFromPage as _getImagesFromPage } from './image'
import { resolvePDFJSImports } from './utils'

export { defineUnPDFConfig } from './utils'

export const decodePDFText: typeof _decodePDFText = async (...args) => {
await resolvePDFJSImports()
return await _decodePDFText(...args)
Expand Down
2 changes: 2 additions & 0 deletions src/index.worker.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import type { decodePDFText as _decodePDFText } from './text'
import type { getImagesFromPage as _getImagesFromPage } from './image'

export { defineUnPDFConfig } from './utils'

export const decodePDFText: typeof _decodePDFText = async () => {
throw new Error('Not implemented in worker context yet')
}
Expand Down
16 changes: 13 additions & 3 deletions src/types.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
import type * as PDFJS from 'pdfjs-dist'

export interface PDFContent {
totalPages: number
info?: Record<string, any>
metadata?: any
text: string | string[]
}

export type Prettify<T> = {
[K in keyof T]: T[K];
} & Record<never, never>
export interface UnPDFConfiguration {
/**
* By default, UnPDF will use the latest version of PDF.js. If you want to
* use an older version or the legacy build, set a promise that resolves to
* the PDF.js module.
*
* @example
* () => import('pdfjs-dist/legacy/build/pdf.js')
*/
pdfjs?: () => Promise<typeof PDFJS>
}
16 changes: 13 additions & 3 deletions src/utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type * as PDFJS from 'pdfjs-dist'
import type { UnPDFConfiguration } from './types'

let resolvedModule: typeof PDFJS | undefined

Expand All @@ -14,6 +15,17 @@ export async function getDocumentProxy(data: ArrayBuffer) {
return pdf
}

export async function defineUnPDFConfig({ pdfjs }: UnPDFConfiguration) {
if (pdfjs) {
try {
resolvedModule = await pdfjs()
}
catch (error) {
throw new Error('Resolving the PDF.js module failed. Please check your configuration.')
}
}
}

export function getResolvedPDFJS() {
return resolvedModule!
}
Expand All @@ -27,8 +39,6 @@ export async function resolvePDFJSImports() {
resolvedModule = mod
}
catch (error) {
throw new Error(
'PDF.js is not available. Please add the package as a dependency.',
)
throw new Error('PDF.js is not available. Please add the package as a dependency.')
}
}

0 comments on commit 289b22f

Please # to comment.