diff --git a/CHANGELOG.md b/CHANGELOG.md index edba1fff..a46adc4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,8 +5,9 @@ - Fix garbled text copying in Chrome/Edge for PDFs with >256 unique characters (#1659) - Fix Link accessibility issues - Fix Table Accessibility Issue: Operator CS/cs not allowed in this current state -- Add pageLayout option to control how pages are displayed in PDF viewers - Fix Interlaced PNG with indexed transparency rendered incorrectly +- Fix SVG path parser incorrectly handle arc flags without separators +- Add pageLayout option to control how pages are displayed in PDF viewers - Preserve existing PageMode instead of overwriting when adding outlines - Support outlines that jump to specific page positions with custom zoom level diff --git a/lib/path.js b/lib/path.js index e213c6be..3598064e 100644 --- a/lib/path.js +++ b/lib/path.js @@ -25,89 +25,191 @@ const parameters = { z: 0, }; -const parse = function (path) { - let cmd; - const ret = []; - let args = []; - let curArg = ''; - let foundDecimal = false; - let params = 0; - - for (let c of path) { - if (parameters[c] != null) { - params = parameters[c]; - if (cmd) { - // save existing command - if (curArg.length > 0) { - args[args.length] = +curArg; - } - ret[ret.length] = { cmd, args }; +const isCommand = function (c) { + return c in parameters; +}; - args = []; - curArg = ''; - foundDecimal = false; - } +const isWsp = function (c) { + const codePoint = c.codePointAt(0); + return ( + codePoint === 0x20 || + codePoint === 0x9 || + codePoint === 0xd || + codePoint === 0xa + ); +}; + +const isDigit = function (c) { + const codePoint = c.codePointAt(0); + if (codePoint == null) { + return false; + } + return 48 <= codePoint && codePoint <= 57; +}; - cmd = c; - } else if ( - [' ', ','].includes(c) || - (c === '-' && curArg.length > 0 && curArg[curArg.length - 1] !== 'e') || - (c === '.' && foundDecimal) - ) { - if (curArg.length === 0) { +const readNumber = function (string, cursor) { + let i = cursor; + let value = ''; + let state = 'none'; + for (; i < string.length; i += 1) { + const c = string[i]; + if (c === '+' || c === '-') { + if (state === 'none') { + state = 'sign'; + value += c; + continue; + } + if (state === 'e') { + state = 'exponent_sign'; + value += c; + continue; + } + } + if (isDigit(c)) { + if (state === 'none' || state === 'sign' || state === 'whole') { + state = 'whole'; + value += c; continue; } + if (state === 'decimal_point' || state === 'decimal') { + state = 'decimal'; + value += c; + continue; + } + if (state === 'e' || state === 'exponent_sign' || state === 'exponent') { + state = 'exponent'; + value += c; + continue; + } + } + if (c === '.') { + if (state === 'none' || state === 'sign' || state === 'whole') { + state = 'decimal_point'; + value += c; + continue; + } + } + if (c === 'E' || c === 'e') { + if ( + state === 'whole' || + state === 'decimal_point' || + state === 'decimal' + ) { + state = 'e'; + value += c; + continue; + } + } + break; + } + const number = Number.parseFloat(value); + if (Number.isNaN(number)) { + return [cursor, null]; + } + // step back to delegate iteration to parent loop + return [i - 1, number]; +}; - if (args.length === params) { - // handle reused commands - ret[ret.length] = { cmd, args }; - args = [+curArg]; +// parse is based on the path parser from SVGO +// https://github.com/svg/svgo/blob/main/lib/path.js +// License: MIT - // handle assumed commands - if (cmd === 'M') { - cmd = 'L'; - } - if (cmd === 'm') { - cmd = 'l'; +const parse = function (path) { + const pathData = []; + let command = null; + let args = []; + let argsCount = 0; + let canHaveComma = false; + let hadComma = false; + for (let i = 0; i < path.length; i += 1) { + const c = path.charAt(i); + if (isWsp(c)) { + continue; + } + // allow comma only between arguments + if (canHaveComma && c === ',') { + if (hadComma) { + break; + } + hadComma = true; + continue; + } + if (isCommand(c)) { + if (hadComma) { + return pathData; + } + if (command == null) { + // moveto should be leading command + if (c !== 'M' && c !== 'm') { + return pathData; } } else { - args[args.length] = +curArg; + // stop if previous command arguments are not flushed + if (args.length !== 0) { + return pathData; + } } - - foundDecimal = c === '.'; - - // fix for negative numbers or repeated decimals with no delimeter between commands - curArg = ['-', '.'].includes(c) ? c : ''; - } else { - curArg += c; - if (c === '.') { - foundDecimal = true; + command = c; + args = []; + argsCount = parameters[command]; + canHaveComma = false; + // flush command without arguments + if (argsCount === 0) { + pathData.push({ command, args }); } + continue; } - } - - // add the last command - if (curArg.length > 0) { - if (args.length === params) { - // handle reused commands - ret[ret.length] = { cmd, args }; - args = [+curArg]; - - // handle assumed commands - if (cmd === 'M') { - cmd = 'L'; + // avoid parsing arguments if no command detected + if (command == null) { + return pathData; + } + // read next argument + let newCursor = i; + let number = null; + if (command === 'A' || command === 'a') { + const position = args.length; + if (position === 0 || position === 1) { + // allow only positive number without sign as first two arguments + if (c !== '+' && c !== '-') { + [newCursor, number] = readNumber(path, i); + } } - if (cmd === 'm') { - cmd = 'l'; + if (position === 2 || position === 5 || position === 6) { + [newCursor, number] = readNumber(path, i); + } + if (position === 3 || position === 4) { + // read flags + if (c === '0') { + number = 0; + } + if (c === '1') { + number = 1; + } } } else { - args[args.length] = +curArg; + [newCursor, number] = readNumber(path, i); + } + if (number == null) { + return pathData; + } + args.push(number); + canHaveComma = true; + hadComma = false; + i = newCursor; + // flush arguments when necessary count is reached + if (args.length === argsCount) { + pathData.push({ command, args }); + // subsequent moveto coordinates are treated as implicit lineto commands + if (command === 'M') { + command = 'L'; + } + if (command === 'm') { + command = 'l'; + } + args = []; } } - - ret[ret.length] = { cmd, args }; - - return ret; + return pathData; }; const apply = function (commands, doc) { @@ -117,8 +219,8 @@ const apply = function (commands, doc) { // run the commands for (let i = 0; i < commands.length; i++) { const c = commands[i]; - if (typeof runners[c.cmd] === 'function') { - runners[c.cmd](doc, c.args); + if (typeof runners[c.command] === 'function') { + runners[c.command](doc, c.args); } } }; diff --git a/tests/unit/path.spec.js b/tests/unit/path.spec.js new file mode 100644 index 00000000..2fdac343 --- /dev/null +++ b/tests/unit/path.spec.js @@ -0,0 +1,146 @@ +import PDFDocument from '../../lib/document'; +import { logData } from './helpers'; + +describe('SVG Path', () => { + let document; + + beforeEach(() => { + document = new PDFDocument({ + info: { CreationDate: new Date(Date.UTC(2018, 1, 1)) }, + compress: false, + }); + }); + + describe('arc command flag parsing', () => { + // Arc command flags (large-arc-flag and sweep-flag) can be chained without separator + // This is valid per SVG spec and commonly produced by SVGO (SVG optimizer) + // + // Arc command: a rx ry rotation large-arc-flag sweep-flag x y + // - large-arc-flag and sweep-flag are 0 or 1 + // - They can be written without separator: "010" = 0, 1, 0 (flag, flag, x) + test('parses arc with chained flags "a5 5 0 0110 10"', () => { + // a5 5 0 0110 10 should parse as: + // rx=5, ry=5, rotation=0, large-arc=0, sweep=1, x=10, y=10 + // The "01" in "0110" are the two flags (0 and 1) + // The "10" after is the x coordinate + // This requires special handling because "0110" looks like one number + const docData = logData(document); + + // This path should produce valid bezier curves for the arc + // If parsing fails, the arc won't render correctly + document.path('M0 0 a5 5 0 0110 10').stroke(); + // same as "a 5 5 0 0 1 10 10" + document.end(); + + const content = docData.join(''); + // Arc is converted to bezier curves, so we should see 'c' (curveto) commands + // PDF format: "x1 y1 x2 y2 x3 y3 c" + expect(content).toContain('2.761424 -2.761424 7.238576 -2.761424 10 0 c'); + expect(content).toContain( + '12.761424 2.761424 12.761424 7.238576 10 10 c', + ); + }); + + test('parses arc with chained flags where flags touch x coordinate "a5 5 0 011-5"', () => { + // a5 5 0 011-5 should parse as: + // rx=5, ry=5, rotation=0, large-arc=0, sweep=1, x=1, y=-5 + // The negative sign separates y from the previous number + + const docData = logData(document); + + document.path('M0 0 a5 5 0 011-5').stroke(); + // same as "a 5 5 0 0 1 1 -5" + document.end(); + + const content = docData.join(''); + expect(content).toContain('-0.6054 -1.72418 -0.221977 -3.641295 1 -5 c'); + }); + }); + + describe('basic path commands', () => { + test('parses moveto and lineto', () => { + const docData = logData(document); + + document.path('M10 20 L30 40').stroke(); + document.end(); + + expect(docData.length).toBeGreaterThan(0); + }); + + test('parses relative moveto and lineto', () => { + const docData = logData(document); + + document.path('m10 20 l30 40').stroke(); + document.end(); + + expect(docData.length).toBeGreaterThan(0); + }); + + test('parses cubic bezier', () => { + const docData = logData(document); + + document.path('M10 10 C20 20 40 20 50 10').stroke(); + document.end(); + + expect(docData.length).toBeGreaterThan(0); + }); + + test('parses quadratic bezier', () => { + const docData = logData(document); + + document.path('M10 10 Q30 30 50 10').stroke(); + document.end(); + + expect(docData.length).toBeGreaterThan(0); + }); + + test('parses closepath', () => { + const docData = logData(document); + + document.path('M10 10 L50 10 L30 50 Z').stroke(); + document.end(); + + expect(docData.length).toBeGreaterThan(0); + }); + }); + + describe('number formats', () => { + test('parses negative numbers without separator', () => { + const docData = logData(document); + + // -10-20 should be parsed as -10 and -20 + document.path('M10 10 l-10-20').stroke(); + document.end(); + + expect(docData.length).toBeGreaterThan(0); + }); + + test('parses decimal numbers', () => { + const docData = logData(document); + + document.path('M10.5 20.5 L30.5 40.5').stroke(); + document.end(); + + expect(docData.length).toBeGreaterThan(0); + }); + + test('parses chained decimals ".5.5"', () => { + const docData = logData(document); + + // .5.5 should be parsed as 0.5 and 0.5 + document.path('M10 10 l.5.5').stroke(); + document.end(); + + expect(docData.length).toBeGreaterThan(0); + }); + + test('parses scientific notation', () => { + const docData = logData(document); + + document.path('M1e1 2e1 L3e1 4e1').stroke(); + document.end(); + + expect(docData.length).toBeGreaterThan(0); + }); + }); +});