Skip to content

Commit

Permalink
perf: use modern pdfjs bundle
Browse files Browse the repository at this point in the history
  • Loading branch information
johannschopplich committed Aug 12, 2023
1 parent a6c1ee8 commit d381d09
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 30 deletions.
4 changes: 2 additions & 2 deletions src/image.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { getDocumentProxy, getResolvedPDFJSInstance } from './utils'
import { getDocumentProxy, getResolvedPDFJS } from './utils'

export async function getImagesFromPage(
data: ArrayBuffer,
Expand All @@ -7,7 +7,7 @@ export async function getImagesFromPage(
const pdf = await getDocumentProxy(data)
const page = await pdf.getPage(pageNumber)
const operatorList = await page.getOperatorList()
const { OPS } = getResolvedPDFJSInstance()
const { OPS } = getResolvedPDFJS()

const images: ArrayBuffer[] = []
for (const op of operatorList.fnArray) {
Expand Down
6 changes: 3 additions & 3 deletions src/index.node.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import { decodePDFText as _decodePDFText } from './text'
import { getImagesFromPage as _getImagesFromPage } from './image'
import { resolvePDFJSNodeImports } from './utils'
import { resolvePDFJSImports } from './utils'

export const decodePDFText: typeof _decodePDFText = async (...args) => {
await resolvePDFJSNodeImports()
await resolvePDFJSImports()
return await _decodePDFText(...args)
}

export const getImagesFromPage: typeof _getImagesFromPage = async (...args) => {
await resolvePDFJSNodeImports()
await resolvePDFJSImports()
return await _getImagesFromPage(...args)
}
6 changes: 3 additions & 3 deletions src/index.web.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import { decodePDFText as _decodePDFText } from './text'
import { getImagesFromPage as _getImagesFromPage } from './image'
import { resolvePDFJSWebImports } from './utils'
import { resolvePDFJSImports } from './utils'

export const decodePDFText: typeof _decodePDFText = async (...args) => {
await resolvePDFJSWebImports()
await resolvePDFJSImports()
return await _decodePDFText(...args)
}

export const getImagesFromPage: typeof _getImagesFromPage = async (...args) => {
await resolvePDFJSWebImports()
await resolvePDFJSImports()
return await _getImagesFromPage(...args)
}
29 changes: 7 additions & 22 deletions src/utils.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import type * as PDFJS from 'pdfjs-dist'

let instance: typeof PDFJS | undefined
let resolvedModule: typeof PDFJS | undefined

export async function getDocumentProxy(data: ArrayBuffer) {
const { getDocument } = getResolvedPDFJSInstance()
const { getDocument } = getResolvedPDFJS()
const pdf = await getDocument({
data,
useWorkerFetch: false,
Expand All @@ -14,32 +14,17 @@ export async function getDocumentProxy(data: ArrayBuffer) {
return pdf
}

export function getResolvedPDFJSInstance() {
return instance!
export function getResolvedPDFJS() {
return resolvedModule!
}

export async function resolvePDFJSWebImports() {
if (instance)
export async function resolvePDFJSImports() {
if (resolvedModule)
return

try {
const { default: mod } = await import('pdfjs-dist')
instance = mod
}
catch (error) {
throw new Error(
'PDF.js is not available. Please add the package as a dependency.',
)
}
}

export async function resolvePDFJSNodeImports() {
if (instance)
return

try {
const { default: mod } = await import('pdfjs-dist/legacy/build/pdf')
instance = mod
resolvedModule = mod
}
catch (error) {
throw new Error(
Expand Down

0 comments on commit d381d09

Please # to comment.