diff --git a/src/api/PDFDocument.ts b/src/api/PDFDocument.ts index a290cdeb3..3db37b396 100644 --- a/src/api/PDFDocument.ts +++ b/src/api/PDFDocument.ts @@ -71,6 +71,7 @@ export interface LoadOptions { parseSpeed?: ParseSpeeds | number; throwOnInvalidObject?: boolean; updateMetadata?: boolean; + capNumbers?: boolean; } export interface CreateOptions { @@ -146,6 +147,7 @@ export default class PDFDocument { parseSpeed = ParseSpeeds.Slow, throwOnInvalidObject = false, updateMetadata = true, + capNumbers = false, } = options; assertIs(pdf, 'pdf', ['string', Uint8Array, ArrayBuffer]); @@ -158,6 +160,7 @@ export default class PDFDocument { bytes, parseSpeed, throwOnInvalidObject, + capNumbers, ).parseDocument(); return new PDFDocument(context, ignoreEncryption, updateMetadata); } diff --git a/src/core/parser/BaseParser.ts b/src/core/parser/BaseParser.ts index 2b88c5d4c..44e0bce16 100644 --- a/src/core/parser/BaseParser.ts +++ b/src/core/parser/BaseParser.ts @@ -10,9 +10,11 @@ const { Newline, CarriageReturn } = CharCodes; // TODO: Throw error if eof is reached before finishing object parse... class BaseParser { protected readonly bytes: ByteStream; + protected readonly capNumbers: boolean; - constructor(bytes: ByteStream) { + constructor(bytes: ByteStream, capNumbers = false) { this.bytes = bytes; + this.capNumbers = capNumbers; } protected parseRawInt(): number { @@ -60,9 +62,14 @@ class BaseParser { } if (numberValue > Number.MAX_SAFE_INTEGER) { - const msg = `Parsed number that is too large for some PDF readers: ${value}, using Number.MAX_SAFE_INTEGER instead.`; - console.warn(msg); - return Number.MAX_SAFE_INTEGER; + if (this.capNumbers) { + const msg = `Parsed number that is too large for some PDF readers: ${value}, using Number.MAX_SAFE_INTEGER instead.`; + console.warn(msg); + return Number.MAX_SAFE_INTEGER; + } else { + const msg = `Parsed number that is too large for some PDF readers: ${value}, not capping.`; + console.warn(msg); + } } return numberValue; diff --git a/src/core/parser/PDFObjectParser.ts b/src/core/parser/PDFObjectParser.ts index ec3e8a772..c51b59fd4 100644 --- a/src/core/parser/PDFObjectParser.ts +++ b/src/core/parser/PDFObjectParser.ts @@ -31,16 +31,22 @@ import { charFromCode } from 'src/utils'; // TODO: Throw error if eof is reached before finishing object parse... class PDFObjectParser extends BaseParser { - static forBytes = (bytes: Uint8Array, context: PDFContext) => - new PDFObjectParser(ByteStream.of(bytes), context); - - static forByteStream = (byteStream: ByteStream, context: PDFContext) => - new PDFObjectParser(byteStream, context); + static forBytes = ( + bytes: Uint8Array, + context: PDFContext, + capNumbers?: boolean, + ) => new PDFObjectParser(ByteStream.of(bytes), context, capNumbers); + + static forByteStream = ( + byteStream: ByteStream, + context: PDFContext, + capNumbers = false, + ) => new PDFObjectParser(byteStream, context, capNumbers); protected readonly context: PDFContext; - constructor(byteStream: ByteStream, context: PDFContext) { - super(byteStream); + constructor(byteStream: ByteStream, context: PDFContext, capNumbers = false) { + super(byteStream, capNumbers); this.context = context; } diff --git a/src/core/parser/PDFParser.ts b/src/core/parser/PDFParser.ts index 408072b3a..6e2cc538e 100644 --- a/src/core/parser/PDFParser.ts +++ b/src/core/parser/PDFParser.ts @@ -29,7 +29,9 @@ class PDFParser extends PDFObjectParser { pdfBytes: Uint8Array, objectsPerTick?: number, throwOnInvalidObject?: boolean, - ) => new PDFParser(pdfBytes, objectsPerTick, throwOnInvalidObject); + capNumbers?: boolean, + ) => + new PDFParser(pdfBytes, objectsPerTick, throwOnInvalidObject, capNumbers); private readonly objectsPerTick: number; private readonly throwOnInvalidObject: boolean; @@ -38,10 +40,11 @@ class PDFParser extends PDFObjectParser { constructor( pdfBytes: Uint8Array, - objectsPerTick: number = Infinity, + objectsPerTick = Infinity, throwOnInvalidObject = false, + capNumbers = false, ) { - super(ByteStream.of(pdfBytes), PDFContext.create()); + super(ByteStream.of(pdfBytes), PDFContext.create(), capNumbers); this.objectsPerTick = objectsPerTick; this.throwOnInvalidObject = throwOnInvalidObject; } diff --git a/tests/core/parser/PDFObjectParser.spec.ts b/tests/core/parser/PDFObjectParser.spec.ts index f91aadcba..d5de654d7 100644 --- a/tests/core/parser/PDFObjectParser.spec.ts +++ b/tests/core/parser/PDFObjectParser.spec.ts @@ -19,16 +19,23 @@ import { numberToString, } from 'src/index'; -const parse = (value: string | Uint8Array) => { +type ParseOptions = { capNumbers?: boolean }; + +const parse = (value: string | Uint8Array, options: ParseOptions = {}) => { const context = PDFContext.create(); - const parser = PDFObjectParser.forBytes(typedArrayFor(value), context); + const parser = PDFObjectParser.forBytes( + typedArrayFor(value), + context, + options.capNumbers, + ); return parser.parseObject(); }; -const expectParse = (value: string | Uint8Array) => expect(parse(value)); +const expectParse = (value: string | Uint8Array, options?: ParseOptions) => + expect(parse(value, options)); -const expectParseStr = (value: string | Uint8Array) => - expect(String(parse(value))); +const expectParseStr = (value: string | Uint8Array, options?: ParseOptions) => + expect(String(parse(value, options))); describe(`PDFObjectParser`, () => { const origConsoleWarn = console.warn; @@ -154,7 +161,25 @@ describe(`PDFObjectParser`, () => { expect(parser.parseObject().toString()).toBe('-0.1'); }); - it(`caps numbers at Number.MAX_SAFE_INTEGER`, () => { + it(`caps numbers at Number.MAX_SAFE_INTEGER when capNumbers=true`, () => { + expectParseStr(numberToString(Number.MAX_SAFE_INTEGER - 1), { + capNumbers: true, + }).toBe('9007199254740990'); + expectParseStr(numberToString(Number.MAX_SAFE_INTEGER), { + capNumbers: true, + }).toBe('9007199254740991'); + expectParseStr(numberToString(Number.MAX_SAFE_INTEGER + 1), { + capNumbers: true, + }).toBe('9007199254740991'); + expectParseStr('340282346638528900000000000000000000000', { + capNumbers: true, + }).toBe('9007199254740991'); + expectParseStr('340282346638528859811704183484516925440', { + capNumbers: true, + }).toBe('9007199254740991'); + }); + + it(`does not cap numbers at Number.MAX_SAFE_INTEGER when capNumbers=false`, () => { expectParseStr(numberToString(Number.MAX_SAFE_INTEGER - 1)).toBe( '9007199254740990', ); @@ -162,13 +187,13 @@ describe(`PDFObjectParser`, () => { '9007199254740991', ); expectParseStr(numberToString(Number.MAX_SAFE_INTEGER + 1)).toBe( - '9007199254740991', + '9007199254740992', ); expectParseStr('340282346638528900000000000000000000000').toBe( - '9007199254740991', + '340282346638528900000000000000000000000', ); expectParseStr('340282346638528859811704183484516925440').toBe( - '9007199254740991', + '340282346638528900000000000000000000000', ); }); });