diff --git a/package-lock.json b/package-lock.json index 6a3347a..879cc17 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,11 +10,15 @@ "license": "CC-BY-NC-SA-4.0", "dependencies": { "@modelcontextprotocol/sdk": "^1.12.3", + "@mozilla/readability": "^0.6.0", "commander": "^14.0.0", "glob": "^11.0.3", + "jsdom": "^26.1.0", + "node-html-parser": "^7.0.1", "remark": "^15.0.0", "remark-parse": "^11.0.0", "remark-stringify": "^11.0.0", + "turndown": "^7.2.0", "unified": "^11.0.0", "unist-util-visit": "^5.0.0" }, @@ -31,8 +35,10 @@ "@semantic-release/changelog": "^6.0.3", "@semantic-release/exec": "^7.1.0", "@semantic-release/git": "^10.0.1", + "@types/jsdom": "^21.1.7", "@types/mdast": "^4.0.4", "@types/node": "^24.0.1", + "@types/turndown": "^5.0.5", "@types/unist": "^3.0.0", "@typescript-eslint/eslint-plugin": "^8.34.0", "@typescript-eslint/parser": "^8.34.0", @@ -3687,6 +3693,25 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/@asamuzakjp/css-color": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-3.2.0.tgz", + "integrity": "sha512-K1A6z8tS3XsmCMM86xoWdn7Fkdn9m6RSVtocUrJYIwZnFVkng/PvkEoWtOWmP+Scc6saYWHWZYbndEEXxl24jw==", + "license": "MIT", + "dependencies": { + "@csstools/css-calc": "^2.1.3", + "@csstools/css-color-parser": "^3.0.9", + "@csstools/css-parser-algorithms": "^3.0.4", + "@csstools/css-tokenizer": "^3.0.3", + "lru-cache": "^10.4.3" + } + }, + "node_modules/@asamuzakjp/css-color/node_modules/lru-cache": { + "version": "10.4.3", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", + "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", + "license": "ISC" + }, "node_modules/@bcoe/v8-coverage": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/@bcoe/v8-coverage/-/v8-coverage-1.0.2.tgz", @@ -4939,6 +4964,116 @@ "node": ">=8" } }, + "node_modules/@csstools/color-helpers": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/@csstools/color-helpers/-/color-helpers-5.0.2.tgz", + "integrity": "sha512-JqWH1vsgdGcw2RR6VliXXdA0/59LttzlU8UlRT/iUUsEeWfYq8I+K0yhihEUTTHLRm1EXvpsCx3083EU15ecsA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT-0", + "engines": { + "node": ">=18" + } + }, + "node_modules/@csstools/css-calc": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/@csstools/css-calc/-/css-calc-2.1.4.tgz", + "integrity": "sha512-3N8oaj+0juUw/1H3YwmDDJXCgTB1gKU6Hc/bB502u9zR0q2vd786XJH9QfrKIEgFlZmhZiq6epXl4rHqhzsIgQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@csstools/css-parser-algorithms": "^3.0.5", + "@csstools/css-tokenizer": "^3.0.4" + } + }, + "node_modules/@csstools/css-color-parser": { + "version": "3.0.10", + "resolved": "https://registry.npmjs.org/@csstools/css-color-parser/-/css-color-parser-3.0.10.tgz", + "integrity": "sha512-TiJ5Ajr6WRd1r8HSiwJvZBiJOqtH86aHpUjq5aEKWHiII2Qfjqd/HCWKPOW8EP4vcspXbHnXrwIDlu5savQipg==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "dependencies": { + "@csstools/color-helpers": "^5.0.2", + "@csstools/css-calc": "^2.1.4" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@csstools/css-parser-algorithms": "^3.0.5", + "@csstools/css-tokenizer": "^3.0.4" + } + }, + "node_modules/@csstools/css-parser-algorithms": { + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/@csstools/css-parser-algorithms/-/css-parser-algorithms-3.0.5.tgz", + "integrity": "sha512-DaDeUkXZKjdGhgYaHNJTV9pV7Y9B3b644jCLs9Upc3VeNGg6LWARAT6O+Q+/COo+2gg/bM5rhpMAtf70WqfBdQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@csstools/css-tokenizer": "^3.0.4" + } + }, + "node_modules/@csstools/css-tokenizer": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@csstools/css-tokenizer/-/css-tokenizer-3.0.4.tgz", + "integrity": "sha512-Vd/9EVDiu6PPJt9yAh6roZP6El1xHrdvIVGjyBsHR0RYwNHgL7FJPyIIW4fANJNG6FtyZfvlRPpFI4ZM/lubvw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "engines": { + "node": ">=18" + } + }, "node_modules/@esbuild/aix-ppc64": { "version": "0.25.5", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.5.tgz", @@ -5453,6 +5588,12 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@mixmark-io/domino": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@mixmark-io/domino/-/domino-2.2.0.tgz", + "integrity": "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==", + "license": "BSD-2-Clause" + }, "node_modules/@modelcontextprotocol/sdk": { "version": "1.12.3", "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.12.3.tgz", @@ -5518,15 +5659,6 @@ "punycode": "^2.1.0" } }, - "node_modules/@modelcontextprotocol/sdk/node_modules/ajv/node_modules/uri-js/node_modules/punycode": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", - "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, "node_modules/@modelcontextprotocol/sdk/node_modules/content-type": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz", @@ -5696,24 +5828,6 @@ "node": ">= 0.8" } }, - "node_modules/@modelcontextprotocol/sdk/node_modules/express/node_modules/body-parser/node_modules/iconv-lite": { - "version": "0.6.3", - "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", - "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", - "license": "MIT", - "dependencies": { - "safer-buffer": ">= 2.1.2 < 3.0.0" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/@modelcontextprotocol/sdk/node_modules/express/node_modules/body-parser/node_modules/iconv-lite/node_modules/safer-buffer": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", - "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", - "license": "MIT" - }, "node_modules/@modelcontextprotocol/sdk/node_modules/express/node_modules/content-disposition": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-1.0.0.tgz", @@ -6652,24 +6766,6 @@ "node": ">=0.6" } }, - "node_modules/@modelcontextprotocol/sdk/node_modules/raw-body/node_modules/iconv-lite": { - "version": "0.6.3", - "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", - "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", - "license": "MIT", - "dependencies": { - "safer-buffer": ">= 2.1.2 < 3.0.0" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/@modelcontextprotocol/sdk/node_modules/raw-body/node_modules/iconv-lite/node_modules/safer-buffer": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", - "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", - "license": "MIT" - }, "node_modules/@modelcontextprotocol/sdk/node_modules/raw-body/node_modules/unpipe": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", @@ -6697,6 +6793,15 @@ "zod": "^3.24.1" } }, + "node_modules/@mozilla/readability": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/@mozilla/readability/-/readability-0.6.0.tgz", + "integrity": "sha512-juG5VWh4qAivzTAeMzvY9xs9HY5rAcr2E4I7tiSSCokRFi7XIZCAu92ZkSTsIj1OPceCifL3cpfteP3pDT9/QQ==", + "license": "Apache-2.0", + "engines": { + "node": ">=14.0.0" + } + }, "node_modules/@pkgjs/parseargs": { "version": "0.11.0", "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", @@ -7808,6 +7913,18 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/jsdom": { + "version": "21.1.7", + "resolved": "https://registry.npmjs.org/@types/jsdom/-/jsdom-21.1.7.tgz", + "integrity": "sha512-yOriVnggzrnQ3a9OKOCxaVuSug3w3/SbOj5i7VwXWZEyUNl3bLF9V3MfxGbZKuwqJOQyRfqXyROBB1CoZLFWzA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*", + "@types/tough-cookie": "*", + "parse5": "^7.0.0" + } + }, "node_modules/@types/mdast": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-4.0.4.tgz", @@ -7827,6 +7944,20 @@ "undici-types": "~7.8.0" } }, + "node_modules/@types/tough-cookie": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.5.tgz", + "integrity": "sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/turndown": { + "version": "5.0.5", + "resolved": "https://registry.npmjs.org/@types/turndown/-/turndown-5.0.5.tgz", + "integrity": "sha512-TL2IgGgc7B5j78rIccBtlYAnkuv8nUQqhQc+DSYV5j9Be9XOcm/SKOVRuA47xAVI3680Tk9B1d8flK2GWT2+4w==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/unist": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz", @@ -9538,6 +9669,15 @@ "url": "https://opencollective.com/vitest" } }, + "node_modules/agent-base": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", + "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, "node_modules/ajv": { "version": "8.17.1", "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", @@ -9649,6 +9789,12 @@ "dev": true, "license": "MIT" }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", + "license": "ISC" + }, "node_modules/brace-expansion": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", @@ -10452,13 +10598,6 @@ "node": ">=0.10.0" } }, - "node_modules/commitizen/node_modules/inquirer/node_modules/external-editor/node_modules/iconv-lite/node_modules/safer-buffer": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", - "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", - "dev": true, - "license": "MIT" - }, "node_modules/commitizen/node_modules/inquirer/node_modules/external-editor/node_modules/tmp": { "version": "0.0.33", "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.0.33.tgz", @@ -10992,6 +11131,47 @@ "node": ">= 8" } }, + "node_modules/css-select": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz", + "integrity": "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-what": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.2.2.tgz", + "integrity": "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/cssstyle": { + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-4.6.0.tgz", + "integrity": "sha512-2z+rWdzbbSZv6/rhtvzvqeZQHrBaqgogqt85sqFNbabZOuFbCVFb8kPeEtZjiKkbrm395irpNKiYeFeLiQnFPg==", + "license": "MIT", + "dependencies": { + "@asamuzakjp/css-color": "^3.2.0", + "rrweb-cssom": "^0.8.0" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/cz-conventional-changelog": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/cz-conventional-changelog/-/cz-conventional-changelog-3.3.0.tgz", @@ -11540,6 +11720,19 @@ "node": ">=0.10.0" } }, + "node_modules/data-urls": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-5.0.0.tgz", + "integrity": "sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==", + "license": "MIT", + "dependencies": { + "whatwg-mimetype": "^4.0.0", + "whatwg-url": "^14.0.0" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/debug": { "version": "4.4.1", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz", @@ -11557,6 +11750,12 @@ } } }, + "node_modules/decimal.js": { + "version": "10.6.0", + "resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.6.0.tgz", + "integrity": "sha512-YpgQiITW3JXGntzdUmyUR1V812Hn8T1YVXhCu+wO3OpS4eU9l4YdD3qjyiKdV6mvV29zapkMeD390UVEf2lkUg==", + "license": "MIT" + }, "node_modules/deep-eql": { "version": "5.0.2", "resolved": "https://registry.npmjs.org/deep-eql/-/deep-eql-5.0.2.tgz", @@ -11567,6 +11766,61 @@ "node": ">=6" } }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "BSD-2-Clause" + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "license": "BSD-2-Clause", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz", + "integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==", + "license": "BSD-2-Clause", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, "node_modules/eastasianwidth": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz", @@ -11579,6 +11833,18 @@ "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", "license": "MIT" }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/es-module-lexer": { "version": "1.7.0", "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.7.0.tgz", @@ -12159,16 +12425,6 @@ "punycode": "^2.1.0" } }, - "node_modules/eslint/node_modules/ajv/node_modules/uri-js/node_modules/punycode": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", - "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, "node_modules/eslint/node_modules/chalk": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", @@ -12802,6 +13058,65 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/he": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz", + "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", + "license": "MIT", + "bin": { + "he": "bin/he" + } + }, + "node_modules/html-encoding-sniffer": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-4.0.0.tgz", + "integrity": "sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==", + "license": "MIT", + "dependencies": { + "whatwg-encoding": "^3.1.1" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/http-proxy-agent": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", + "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.0", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/is-fullwidth-code-point": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", @@ -12811,6 +13126,12 @@ "node": ">=8" } }, + "node_modules/is-potential-custom-element-name": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz", + "integrity": "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==", + "license": "MIT" + }, "node_modules/isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", @@ -12849,6 +13170,45 @@ "dev": true, "license": "MIT" }, + "node_modules/jsdom": { + "version": "26.1.0", + "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-26.1.0.tgz", + "integrity": "sha512-Cvc9WUhxSMEo4McES3P7oK3QaXldCfNWp7pl2NNeiIFlCoLr3kfq9kb1fxftiwk1FLV7CvpvDfonxtzUDeSOPg==", + "license": "MIT", + "dependencies": { + "cssstyle": "^4.2.1", + "data-urls": "^5.0.0", + "decimal.js": "^10.5.0", + "html-encoding-sniffer": "^4.0.0", + "http-proxy-agent": "^7.0.2", + "https-proxy-agent": "^7.0.6", + "is-potential-custom-element-name": "^1.0.1", + "nwsapi": "^2.2.16", + "parse5": "^7.2.1", + "rrweb-cssom": "^0.8.0", + "saxes": "^6.0.0", + "symbol-tree": "^3.2.4", + "tough-cookie": "^5.1.1", + "w3c-xmlserializer": "^5.0.0", + "webidl-conversions": "^7.0.0", + "whatwg-encoding": "^3.1.1", + "whatwg-mimetype": "^4.0.0", + "whatwg-url": "^14.1.1", + "ws": "^8.18.0", + "xml-name-validator": "^5.0.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "canvas": "^3.0.0" + }, + "peerDependenciesMeta": { + "canvas": { + "optional": true + } + } + }, "node_modules/loupe": { "version": "3.1.3", "resolved": "https://registry.npmjs.org/loupe/-/loupe-3.1.3.tgz", @@ -12932,12 +13292,64 @@ "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" } }, + "node_modules/node-html-parser": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/node-html-parser/-/node-html-parser-7.0.1.tgz", + "integrity": "sha512-KGtmPY2kS0thCWGK0VuPyOS+pBKhhe8gXztzA2ilAOhbUbxa9homF1bOyKvhGzMLXUoRds9IOmr/v5lr/lqNmA==", + "license": "MIT", + "dependencies": { + "css-select": "^5.1.0", + "he": "1.2.0" + } + }, + "node_modules/nth-check": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", + "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + }, + "node_modules/nwsapi": { + "version": "2.2.21", + "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.2.21.tgz", + "integrity": "sha512-o6nIY3qwiSXl7/LuOU0Dmuctd34Yay0yeuZRLFmDPrrdHpXKFndPj3hM+YEPVHYC5fx2otBx4Ilc/gyYSAUaIA==", + "license": "MIT" + }, "node_modules/package-json-from-dist": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", "license": "BlueOak-1.0.0" }, + "node_modules/parse5": { + "version": "7.3.0", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz", + "integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==", + "license": "MIT", + "dependencies": { + "entities": "^6.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5/node_modules/entities": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz", + "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/path-key": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", @@ -13683,6 +14095,15 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/punycode": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", + "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/remark": { "version": "15.0.1", "resolved": "https://registry.npmjs.org/remark/-/remark-15.0.1.tgz", @@ -14619,6 +15040,30 @@ "dev": true, "license": "MIT" }, + "node_modules/rrweb-cssom": { + "version": "0.8.0", + "resolved": "https://registry.npmjs.org/rrweb-cssom/-/rrweb-cssom-0.8.0.tgz", + "integrity": "sha512-guoltQEx+9aMf2gDZ0s62EcV8lsXR+0w8915TC3ITdn2YueuNjdAYh/levpU9nFaoChh9RUS5ZdQMrKfVEN9tw==", + "license": "MIT" + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", + "license": "MIT" + }, + "node_modules/saxes": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz", + "integrity": "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==", + "license": "ISC", + "dependencies": { + "xmlchars": "^2.2.0" + }, + "engines": { + "node": ">=v12.22.7" + } + }, "node_modules/semantic-release": { "version": "24.2.5", "resolved": "https://registry.npmjs.org/semantic-release/-/semantic-release-24.2.5.tgz", @@ -15460,54 +15905,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/semantic-release/node_modules/@semantic-release/github/node_modules/http-proxy-agent": { - "version": "7.0.2", - "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", - "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", - "dev": true, - "license": "MIT", - "dependencies": { - "agent-base": "^7.1.0", - "debug": "^4.3.4" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/semantic-release/node_modules/@semantic-release/github/node_modules/http-proxy-agent/node_modules/agent-base": { - "version": "7.1.3", - "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.3.tgz", - "integrity": "sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 14" - } - }, - "node_modules/semantic-release/node_modules/@semantic-release/github/node_modules/https-proxy-agent": { - "version": "7.0.6", - "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", - "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", - "dev": true, - "license": "MIT", - "dependencies": { - "agent-base": "^7.1.2", - "debug": "4" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/semantic-release/node_modules/@semantic-release/github/node_modules/https-proxy-agent/node_modules/agent-base": { - "version": "7.1.3", - "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.3.tgz", - "integrity": "sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 14" - } - }, "node_modules/semantic-release/node_modules/@semantic-release/github/node_modules/issue-parser": { "version": "7.0.1", "resolved": "https://registry.npmjs.org/issue-parser/-/issue-parser-7.0.1.tgz", @@ -21887,6 +22284,12 @@ "url": "https://github.com/sponsors/antfu" } }, + "node_modules/symbol-tree": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz", + "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==", + "license": "MIT" + }, "node_modules/test-exclude": { "version": "7.0.1", "resolved": "https://registry.npmjs.org/test-exclude/-/test-exclude-7.0.1.tgz", @@ -22017,6 +22420,48 @@ "node": ">=14.0.0" } }, + "node_modules/tldts": { + "version": "6.1.86", + "resolved": "https://registry.npmjs.org/tldts/-/tldts-6.1.86.tgz", + "integrity": "sha512-WMi/OQ2axVTf/ykqCQgXiIct+mSQDFdH2fkwhPwgEwvJ1kSzZRiinb0zF2Xb8u4+OqPChmyI6MEu4EezNJz+FQ==", + "license": "MIT", + "dependencies": { + "tldts-core": "^6.1.86" + }, + "bin": { + "tldts": "bin/cli.js" + } + }, + "node_modules/tldts-core": { + "version": "6.1.86", + "resolved": "https://registry.npmjs.org/tldts-core/-/tldts-core-6.1.86.tgz", + "integrity": "sha512-Je6p7pkk+KMzMv2XXKmAE3McmolOQFdxkKw0R8EYNr7sELW46JqnNeTX8ybPiQgvg1ymCoF8LXs5fzFaZvJPTA==", + "license": "MIT" + }, + "node_modules/tough-cookie": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-5.1.2.tgz", + "integrity": "sha512-FVDYdxtnj0G6Qm/DhNPSb8Ju59ULcup3tuJxkFb5K8Bv2pUXILbf0xZWU8PX8Ov19OXljbUyveOFwRMwkXzO+A==", + "license": "BSD-3-Clause", + "dependencies": { + "tldts": "^6.1.32" + }, + "engines": { + "node": ">=16" + } + }, + "node_modules/tr46": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-5.1.1.tgz", + "integrity": "sha512-hdF5ZgjTqgAntKkklYw0R03MG2x/bSzTtkxmIRw/sTNV8YXsCJ1tfLAX23lhxhHJlEf3CRCOCGGWw3vI3GaSPw==", + "license": "MIT", + "dependencies": { + "punycode": "^2.3.1" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/ts-json-schema-generator": { "version": "2.4.0", "resolved": "https://registry.npmjs.org/ts-json-schema-generator/-/ts-json-schema-generator-2.4.0.tgz", @@ -22140,6 +22585,15 @@ "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" } }, + "node_modules/turndown": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/turndown/-/turndown-7.2.0.tgz", + "integrity": "sha512-eCZGBN4nNNqM9Owkv9HAtWRYfLA4h909E/WGAWWBpmB275ehNhZyk87/Tpvjbp0jjNl9XwCsbe6bm6CqFsgD+A==", + "license": "MIT", + "dependencies": { + "@mixmark-io/domino": "^2.2.0" + } + }, "node_modules/typedoc": { "version": "0.28.5", "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.28.5.tgz", @@ -22332,19 +22786,6 @@ "dev": true, "license": "Python-2.0" }, - "node_modules/typedoc/node_modules/markdown-it/node_modules/entities": { - "version": "4.5.0", - "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", - "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", - "dev": true, - "license": "BSD-2-Clause", - "engines": { - "node": ">=0.12" - }, - "funding": { - "url": "https://github.com/fb55/entities?sponsor=1" - } - }, "node_modules/typedoc/node_modules/markdown-it/node_modules/linkify-it": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/linkify-it/-/linkify-it-5.0.0.tgz", @@ -23301,6 +23742,61 @@ "dev": true, "license": "MIT" }, + "node_modules/w3c-xmlserializer": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-5.0.0.tgz", + "integrity": "sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==", + "license": "MIT", + "dependencies": { + "xml-name-validator": "^5.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/webidl-conversions": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz", + "integrity": "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + } + }, + "node_modules/whatwg-encoding": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", + "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", + "license": "MIT", + "dependencies": { + "iconv-lite": "0.6.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-mimetype": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", + "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-url": { + "version": "14.2.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-14.2.0.tgz", + "integrity": "sha512-De72GdQZzNTUBBChsXueQUnPKDkg/5A5zp7pFDuQAj5UFoENpiACU0wlCvzpAGnTkj++ihpKwKyYewn/XNUbKw==", + "license": "MIT", + "dependencies": { + "tr46": "^5.1.0", + "webidl-conversions": "^7.0.0" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", @@ -23406,6 +23902,42 @@ "engines": { "node": ">=8" } + }, + "node_modules/ws": { + "version": "8.18.3", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz", + "integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, + "node_modules/xml-name-validator": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz", + "integrity": "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==", + "license": "Apache-2.0", + "engines": { + "node": ">=18" + } + }, + "node_modules/xmlchars": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz", + "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==", + "license": "MIT" } } } diff --git a/package.json b/package.json index fbf0cbe..b31943f 100644 --- a/package.json +++ b/package.json @@ -78,8 +78,10 @@ "@semantic-release/changelog": "^6.0.3", "@semantic-release/exec": "^7.1.0", "@semantic-release/git": "^10.0.1", + "@types/jsdom": "^21.1.7", "@types/mdast": "^4.0.4", "@types/node": "^24.0.1", + "@types/turndown": "^5.0.5", "@types/unist": "^3.0.0", "@typescript-eslint/eslint-plugin": "^8.34.0", "@typescript-eslint/parser": "^8.34.0", @@ -108,11 +110,15 @@ }, "dependencies": { "@modelcontextprotocol/sdk": "^1.12.3", + "@mozilla/readability": "^0.6.0", "commander": "^14.0.0", "glob": "^11.0.3", + "jsdom": "^26.1.0", + "node-html-parser": "^7.0.1", "remark": "^15.0.0", "remark-parse": "^11.0.0", "remark-stringify": "^11.0.0", + "turndown": "^7.2.0", "unified": "^11.0.0", "unist-util-visit": "^5.0.0" }, diff --git a/src/cli.test.ts b/src/cli.test.ts index 1a25b9b..16e1be6 100644 --- a/src/cli.test.ts +++ b/src/cli.test.ts @@ -146,8 +146,8 @@ describe('CLI Entry Point', () => { it('should set action handlers for commands', async () => { await import('./cli.js'); - // Should call action 9 times (once for each command: convert, move, split, join, merge, index, barrel, toc, validate) - expect(mockAction).toHaveBeenCalledTimes(9); + // Should call action 10 times (once for each command: clip, convert, move, split, join, merge, index, barrel, toc, validate) + expect(mockAction).toHaveBeenCalledTimes(10); }); it('should add help text for convert command', async () => { diff --git a/src/cli.ts b/src/cli.ts index f8b9b08..cd582c0 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -1,6 +1,7 @@ #!/usr/bin/env node import { Command } from 'commander'; +import { clipCommand } from './commands/clip.js'; import { convertCommand } from './commands/convert.js'; import { indexCommand } from './commands/index.js'; import { joinCommand } from './commands/join.js'; @@ -17,6 +18,68 @@ program .description('CLI for markdown file operations with intelligent link refactoring') .version('0.1.0'); +program + .command('clip') + .description('Convert web pages to markdown (web clipper)') + .argument('', 'URLs to clip or paths to files containing URLs (use --batch)') + .option('-o, --output ', 'Output file name (single URL only)') + .option('--output-dir ', 'Output directory for clipped files') + .option('--batch', 'Process multiple URLs from input files') + .option( + '--strategy ', + 'Extraction strategy: auto|readability|manual|full|structured', + 'auto' + ) + .option( + '--image-strategy ', + 'Image handling: skip|link-only|download|base64', + 'link-only' + ) + .option('--image-dir ', 'Directory for downloaded images', './images') + .option('--selectors ', 'CSS selectors for manual extraction (comma-separated)') + .option('--no-frontmatter', 'Skip frontmatter generation') + .option('--timeout ', 'Request timeout in milliseconds', parseInt, 30000) + .option('--user-agent ', 'Custom User-Agent string') + .option('--headers ', 'Custom HTTP headers (JSON format)') + .option('--cookies ', 'Path to cookies file') + .option('--no-follow-redirects', 'Don\'t follow HTTP redirects') + .option('--max-redirects ', 'Maximum redirects to follow', parseInt, 5) + .option('-d, --dry-run', 'Show what would be clipped without creating files') + .option('-v, --verbose', 'Show detailed output with processing information') + .option('--json', 'Output results in JSON format') + .addHelpText( + 'after', + ` +Examples: + $ markmv clip https://example.com/article + $ markmv clip https://example.com/article -o article.md + $ markmv clip urls.txt --batch --output-dir ./clipped + $ markmv clip https://docs.site.com --strategy manual --selectors "article,.content" + $ markmv clip https://blog.com/post --strategy readability --image-strategy download + $ markmv clip https://example.com --dry-run --verbose + +Extraction Strategies: + auto Automatically choose best strategy based on content + readability Mozilla Readability algorithm (best for articles/blogs) + manual Extract using custom CSS selectors + full Extract entire page content + structured Use Schema.org and semantic markup + +Image Strategies: + skip Don't process images at all + link-only Keep images as external links (fastest) + download Download images locally and update paths + base64 Embed small images as base64 (increases file size) + +Advanced Features: + --headers '{"Authorization": "Bearer token"}' Custom headers for auth + --cookies cookies.txt Use cookies for protected content + --selectors "article,.post-content,main" Custom content selectors + --timeout 60000 Extended timeout for slow sites + --user-agent "Custom Bot 1.0" Custom user agent string` + ) + .action(clipCommand); + program .command('convert') .description('Convert markdown link formats and path resolution') diff --git a/src/commands/clip.test.ts b/src/commands/clip.test.ts new file mode 100644 index 0000000..0e449d8 --- /dev/null +++ b/src/commands/clip.test.ts @@ -0,0 +1,663 @@ +/** + * Tests for the web clipper command. + * + * @fileoverview Comprehensive tests for web page to markdown conversion functionality + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { mkdtemp, writeFile, readFile, rm } from 'node:fs/promises'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { clipCommand } from './clip.js'; + +// Mock the WebClipper class +vi.mock('../core/web-clipper.js', () => { + const mockWebClipper = vi.fn(); + mockWebClipper.prototype.clip = vi.fn(); + + return { + WebClipper: mockWebClipper, + }; +}); + +describe('Clip Command', () => { + let testDir: string; + + beforeEach(async () => { + testDir = await mkdtemp(join(tmpdir(), 'markmv-clip-test-')); + }); + + afterEach(async () => { + await rm(testDir, { recursive: true, force: true }); + vi.clearAllMocks(); + }); + + describe('Basic functionality', () => { + it('should require at least one URL', async () => { + let exitCode = 0; + const originalExit = process.exit; + process.exit = vi.fn(((code: number | undefined): never => { + exitCode = code || 0; + return null as never; + }) as typeof process.exit); + + const originalError = console.error; + const errors: string[] = []; + console.error = vi.fn((message: string) => { + errors.push(message); + }); + + try { + await clipCommand([], {}); + } finally { + process.exit = originalExit; + console.error = originalError; + } + + expect(exitCode).toBe(1); + expect(errors.some(error => error.includes('At least one URL must be specified'))).toBe(true); + }); + + it('should process a single URL successfully', async () => { + const { WebClipper } = await import('../core/web-clipper.js'); + const mockClip = vi.mocked(WebClipper.prototype.clip); + + mockClip.mockResolvedValue({ + markdown: '# Test Article\n\nThis is test content.', + title: 'Test Article', + author: 'Test Author', + publishedDate: '2024-01-01', + description: 'Test description', + sourceUrl: 'https://example.com/article', + strategy: 'readability', + images: [], + links: [], + structuredData: undefined, + }); + + let exitCode = 0; + const originalExit = process.exit; + process.exit = vi.fn(((code: number | undefined): never => { + exitCode = code || 0; + return null as never; + }) as typeof process.exit); + + const originalLog = console.log; + const logs: string[] = []; + console.log = vi.fn((message: string) => { + logs.push(message); + }); + + try { + await clipCommand(['https://example.com/article'], { + output: join(testDir, 'article.md'), + verbose: true, + }); + } finally { + process.exit = originalExit; + console.log = originalLog; + } + + expect(exitCode).toBe(0); + expect(mockClip).toHaveBeenCalledWith('https://example.com/article'); + + // Check that file was created + const content = await readFile(join(testDir, 'article.md'), 'utf-8'); + expect(content).toBe('# Test Article\n\nThis is test content.'); + + expect(logs.some(log => log.includes('Successfully clipped: 1'))).toBe(true); + }); + + it('should handle dry run mode', async () => { + const { WebClipper } = await import('../core/web-clipper.js'); + const mockClip = vi.mocked(WebClipper.prototype.clip); + + mockClip.mockResolvedValue({ + markdown: '# Test Article\n\nContent', + title: 'Test Article', + sourceUrl: 'https://example.com/article', + strategy: 'readability', + images: [], + links: [], + }); + + let exitCode = 0; + const originalExit = process.exit; + process.exit = vi.fn(((code: number | undefined): never => { + exitCode = code || 0; + return null as never; + }) as typeof process.exit); + + const originalLog = console.log; + const logs: string[] = []; + console.log = vi.fn((message: string) => { + logs.push(message); + }); + + try { + await clipCommand(['https://example.com/article'], { + output: join(testDir, 'article.md'), + dryRun: true, + verbose: true, + }); + } finally { + process.exit = originalExit; + console.log = originalLog; + } + + expect(exitCode).toBe(0); + expect(logs.some(log => log.includes('Dry run - no files were actually created'))).toBe(true); + + // File should not exist in dry run + try { + await readFile(join(testDir, 'article.md'), 'utf-8'); + expect.fail('File should not exist in dry run mode'); + } catch (error) { + expect(error).toBeDefined(); + } + }); + + it('should output JSON when requested', async () => { + const { WebClipper } = await import('../core/web-clipper.js'); + const mockClip = vi.mocked(WebClipper.prototype.clip); + + mockClip.mockResolvedValue({ + markdown: '# Test', + title: 'Test', + sourceUrl: 'https://example.com/test', + strategy: 'readability', + images: [], + links: [], + }); + + let exitCode = 0; + const originalExit = process.exit; + process.exit = vi.fn(((code: number | undefined): never => { + exitCode = code || 0; + return null as never; + }) as typeof process.exit); + + const originalLog = console.log; + const logs: string[] = []; + console.log = vi.fn((message: string) => { + logs.push(message); + }); + + try { + await clipCommand(['https://example.com/test'], { + output: join(testDir, 'test.md'), + json: true, + }); + } finally { + process.exit = originalExit; + console.log = originalLog; + } + + expect(exitCode).toBe(0); + + // Should output valid JSON + const jsonOutput = logs.join('\n'); + expect(() => JSON.parse(jsonOutput)).not.toThrow(); + + const result = JSON.parse(jsonOutput); + expect(result.clippedUrls).toContain('https://example.com/test'); + expect(result.generatedFiles).toContain(join(testDir, 'test.md')); + }); + }); + + describe('Batch processing', () => { + it('should process multiple URLs from a file', async () => { + const urlsFile = join(testDir, 'urls.txt'); + await writeFile(urlsFile, 'https://example.com/page1\nhttps://example.com/page2\n# Comment line\n\nhttps://example.com/page3'); + + const { WebClipper } = await import('../core/web-clipper.js'); + const mockClip = vi.mocked(WebClipper.prototype.clip); + + mockClip + .mockResolvedValueOnce({ + markdown: '# Page 1', + title: 'Page 1', + sourceUrl: 'https://example.com/page1', + strategy: 'readability', + images: [], + links: [], + }) + .mockResolvedValueOnce({ + markdown: '# Page 2', + title: 'Page 2', + sourceUrl: 'https://example.com/page2', + strategy: 'readability', + images: [], + links: [], + }) + .mockResolvedValueOnce({ + markdown: '# Page 3', + title: 'Page 3', + sourceUrl: 'https://example.com/page3', + strategy: 'readability', + images: [], + links: [], + }); + + let exitCode = 0; + const originalExit = process.exit; + process.exit = vi.fn(((code: number | undefined): never => { + exitCode = code || 0; + return null as never; + }) as typeof process.exit); + + const originalLog = console.log; + const logs: string[] = []; + console.log = vi.fn((message: string) => { + logs.push(message); + }); + + try { + await clipCommand([urlsFile], { + batch: true, + outputDir: testDir, + verbose: true, + }); + } finally { + process.exit = originalExit; + console.log = originalLog; + } + + expect(exitCode).toBe(0); + expect(mockClip).toHaveBeenCalledTimes(3); + expect(mockClip).toHaveBeenCalledWith('https://example.com/page1'); + expect(mockClip).toHaveBeenCalledWith('https://example.com/page2'); + expect(mockClip).toHaveBeenCalledWith('https://example.com/page3'); + + expect(logs.some(log => log.includes('Successfully clipped: 3'))).toBe(true); + }); + + it('should handle mixed valid and invalid URLs in batch mode', async () => { + const urlsFile = join(testDir, 'mixed-urls.txt'); + await writeFile(urlsFile, 'https://example.com/valid\ninvalid-url\nhttps://example.com/another-valid'); + + const { WebClipper } = await import('../core/web-clipper.js'); + const mockClip = vi.mocked(WebClipper.prototype.clip); + + mockClip + .mockResolvedValueOnce({ + markdown: '# Valid Page', + title: 'Valid Page', + sourceUrl: 'https://example.com/valid', + strategy: 'readability', + images: [], + links: [], + }) + .mockResolvedValueOnce({ + markdown: '# Another Valid Page', + title: 'Another Valid Page', + sourceUrl: 'https://example.com/another-valid', + strategy: 'readability', + images: [], + links: [], + }); + + let exitCode = 0; + const originalExit = process.exit; + process.exit = vi.fn(((code: number | undefined): never => { + exitCode = code || 0; + return null as never; + }) as typeof process.exit); + + const originalLog = console.log; + const logs: string[] = []; + console.log = vi.fn((message: string) => { + logs.push(message); + }); + + try { + await clipCommand([urlsFile], { + batch: true, + outputDir: testDir, + verbose: true, + }); + } finally { + process.exit = originalExit; + console.log = originalLog; + } + + expect(exitCode).toBe(0); + expect(mockClip).toHaveBeenCalledTimes(2); + expect(mockClip).toHaveBeenCalledWith('https://example.com/valid'); + expect(mockClip).toHaveBeenCalledWith('https://example.com/another-valid'); + + expect(logs.some(log => log.includes('Successfully clipped: 2'))).toBe(true); + }); + }); + + describe('Option parsing', () => { + it('should parse and pass WebClipper options correctly', async () => { + const { WebClipper } = await import('../core/web-clipper.js'); + const mockConstructor = vi.mocked(WebClipper); + const mockClip = vi.mocked(WebClipper.prototype.clip); + + mockClip.mockResolvedValue({ + markdown: '# Test', + sourceUrl: 'https://example.com/test', + strategy: 'manual', + images: [], + links: [], + }); + + let exitCode = 0; + const originalExit = process.exit; + process.exit = vi.fn(((code: number | undefined): never => { + exitCode = code || 0; + return null as never; + }) as typeof process.exit); + + try { + await clipCommand(['https://example.com/test'], { + output: join(testDir, 'test.md'), + strategy: 'manual', + imageStrategy: 'download', + imageDir: './custom-images', + selectors: 'article,.content,main', + headers: '{"Authorization": "Bearer token"}', + timeout: 60000, + userAgent: 'Custom Bot 1.0', + maxRedirects: 10, + verbose: true, + dryRun: false, + }); + } finally { + process.exit = originalExit; + } + + expect(exitCode).toBe(0); + expect(mockConstructor).toHaveBeenCalledWith( + expect.objectContaining({ + strategy: 'manual', + imageStrategy: 'download', + imageDir: './custom-images', + selectors: ['article', '.content', 'main'], + headers: { Authorization: 'Bearer token' }, + timeout: 60000, + userAgent: 'Custom Bot 1.0', + maxRedirects: 10, + verbose: true, + }) + ); + }); + + it('should handle invalid JSON headers gracefully', async () => { + let exitCode = 0; + const originalExit = process.exit; + process.exit = vi.fn(((code: number | undefined): never => { + exitCode = code || 0; + return null as never; + }) as typeof process.exit); + + const originalError = console.error; + const errors: string[] = []; + console.error = vi.fn((message: string) => { + errors.push(message); + }); + + try { + await clipCommand(['https://example.com/test'], { + headers: 'invalid json', + }); + } finally { + process.exit = originalExit; + console.error = originalError; + } + + expect(exitCode).toBe(1); + expect(errors.some(error => error.includes('Invalid JSON format for headers'))).toBe(true); + }); + }); + + describe('Error handling', () => { + it('should handle clipping failures gracefully', async () => { + const { WebClipper } = await import('../core/web-clipper.js'); + const mockClip = vi.mocked(WebClipper.prototype.clip); + + mockClip.mockRejectedValue(new Error('Failed to fetch URL')); + + let exitCode = 0; + const originalExit = process.exit; + process.exit = vi.fn(((code: number | undefined): never => { + exitCode = code || 0; + return null as never; + }) as typeof process.exit); + + const originalLog = console.log; + const logs: string[] = []; + console.log = vi.fn((message: string) => { + logs.push(message); + }); + + try { + await clipCommand(['https://invalid-url.example'], { + output: join(testDir, 'failed.md'), + verbose: true, + }); + } finally { + process.exit = originalExit; + console.log = originalLog; + } + + expect(exitCode).toBe(1); + expect(logs.some(log => log.includes('Failed: 1'))).toBe(true); + expect(logs.some(log => log.includes('Failed to fetch URL'))).toBe(true); + }); + + it('should continue processing other URLs when one fails', async () => { + const { WebClipper } = await import('../core/web-clipper.js'); + const mockClip = vi.mocked(WebClipper.prototype.clip); + + mockClip + .mockRejectedValueOnce(new Error('First URL failed')) + .mockResolvedValueOnce({ + markdown: '# Success', + sourceUrl: 'https://example.com/success', + strategy: 'readability', + images: [], + links: [], + }); + + let exitCode = 0; + const originalExit = process.exit; + process.exit = vi.fn(((code: number | undefined): never => { + exitCode = code || 0; + return null as never; + }) as typeof process.exit); + + const originalLog = console.log; + const logs: string[] = []; + console.log = vi.fn((message: string) => { + logs.push(message); + }); + + try { + await clipCommand(['https://fail.example', 'https://example.com/success'], { + outputDir: testDir, + verbose: true, + }); + } finally { + process.exit = originalExit; + console.log = originalLog; + } + + expect(exitCode).toBe(1); // Should exit with error because one failed + expect(mockClip).toHaveBeenCalledTimes(2); + expect(logs.some(log => log.includes('Successfully clipped: 1'))).toBe(true); + expect(logs.some(log => log.includes('Failed: 1'))).toBe(true); + }); + }); + + describe('File output', () => { + it('should generate appropriate filenames from URLs', async () => { + const { WebClipper } = await import('../core/web-clipper.js'); + const mockClip = vi.mocked(WebClipper.prototype.clip); + + mockClip.mockResolvedValue({ + markdown: '# Article', + sourceUrl: 'https://example.com/path/to/article', + strategy: 'readability', + images: [], + links: [], + }); + + let exitCode = 0; + const originalExit = process.exit; + process.exit = vi.fn(((code: number | undefined): never => { + exitCode = code || 0; + return null as never; + }) as typeof process.exit); + + try { + await clipCommand(['https://example.com/path/to/article'], { + outputDir: testDir, + }); + } finally { + process.exit = originalExit; + } + + expect(exitCode).toBe(0); + + // Should create a file based on the URL path + const content = await readFile(join(testDir, 'article.md'), 'utf-8'); + expect(content).toBe('# Article'); + }); + + it('should use title for filename when available', async () => { + const { WebClipper } = await import('../core/web-clipper.js'); + const mockClip = vi.mocked(WebClipper.prototype.clip); + + mockClip.mockResolvedValue({ + markdown: '# My Great Article', + title: 'My Great Article with Special Characters!', + sourceUrl: 'https://example.com/article', + strategy: 'readability', + images: [], + links: [], + }); + + let exitCode = 0; + const originalExit = process.exit; + process.exit = vi.fn(((code: number | undefined): never => { + exitCode = code || 0; + return null as never; + }) as typeof process.exit); + + try { + await clipCommand(['https://example.com/article'], { + outputDir: testDir, + }); + } finally { + process.exit = originalExit; + } + + expect(exitCode).toBe(0); + + // Check what file was actually created + const { readdir } = await import('node:fs/promises'); + const files = await readdir(testDir); + expect(files.length).toBe(1); + + const content = await readFile(join(testDir, files[0]), 'utf-8'); + expect(content).toBe('# My Great Article'); + }); + + it('should create output directories as needed', async () => { + const { WebClipper } = await import('../core/web-clipper.js'); + const mockClip = vi.mocked(WebClipper.prototype.clip); + + mockClip.mockResolvedValue({ + markdown: '# Test', + sourceUrl: 'https://example.com/test', + strategy: 'readability', + images: [], + links: [], + }); + + const nestedDir = join(testDir, 'nested', 'directory'); + + let exitCode = 0; + const originalExit = process.exit; + process.exit = vi.fn(((code: number | undefined): never => { + exitCode = code || 0; + return null as never; + }) as typeof process.exit); + + try { + await clipCommand(['https://example.com/test'], { + outputDir: nestedDir, + }); + } finally { + process.exit = originalExit; + } + + expect(exitCode).toBe(0); + + // Should create nested directories and file + const content = await readFile(join(nestedDir, 'test.md'), 'utf-8'); + expect(content).toBe('# Test'); + }); + }); + + describe('Output formatting', () => { + it('should format results with comprehensive summary', async () => { + const { WebClipper } = await import('../core/web-clipper.js'); + const mockClip = vi.mocked(WebClipper.prototype.clip); + + mockClip + .mockResolvedValueOnce({ + markdown: '# Success 1', + title: 'Success Article 1', + author: 'Author 1', + publishedDate: '2024-01-01', + sourceUrl: 'https://example.com/success1', + strategy: 'readability', + images: [], + links: [], + }) + .mockRejectedValueOnce(new Error('Network error')); + + let exitCode = 0; + const originalExit = process.exit; + process.exit = vi.fn(((code: number | undefined): never => { + exitCode = code || 0; + return null as never; + }) as typeof process.exit); + + const originalLog = console.log; + const logs: string[] = []; + console.log = vi.fn((message: string) => { + logs.push(message); + }); + + try { + await clipCommand(['https://example.com/success1', 'https://example.com/fail'], { + outputDir: testDir, + verbose: true, + }); + } finally { + process.exit = originalExit; + console.log = originalLog; + } + + expect(exitCode).toBe(1); + + const output = logs.join('\n'); + expect(output).toContain('🕷️ Web Clipper Results'); + expect(output).toContain('Successfully clipped: 1'); + expect(output).toContain('Failed: 1'); + expect(output).toContain('Files generated: 1'); + expect(output).toContain('✅ Successfully Clipped:'); + expect(output).toContain('❌ Failed to Clip:'); + expect(output).toContain('📄 Title: Success Article 1'); + expect(output).toContain('✍️ Author: Author 1'); + expect(output).toContain('📅 Published: 2024-01-01'); + expect(output).toContain('🔧 Strategy: readability'); + expect(output).toContain('Network error'); + }); + }); +}); \ No newline at end of file diff --git a/src/commands/clip.ts b/src/commands/clip.ts new file mode 100644 index 0000000..39b910b --- /dev/null +++ b/src/commands/clip.ts @@ -0,0 +1,427 @@ +/** + * Web clipper command for converting web pages to markdown. + * + * @fileoverview Implements comprehensive web page to markdown conversion with multiple extraction strategies + * @category Commands + */ + +import { writeFile, mkdir } from 'node:fs/promises'; +import { dirname, join, basename } from 'node:path'; +import { WebClipper, type WebClipperOptions } from '../core/web-clipper.js'; +import type { OperationResult } from '../types/operations.js'; + +/** + * CLI-specific options for the clip command. + * + * @category Commands + */ +export interface ClipCliOptions { + /** Output file path */ + output?: string; + /** Output directory for clipped files */ + outputDir?: string; + /** Output results in JSON format */ + json?: boolean; + /** Process multiple URLs from a file */ + batch?: boolean; + /** Custom selectors as comma-separated string */ + selectors?: string; + /** Custom headers as JSON string */ + headers?: string; + /** Cookies file path */ + cookies?: string; + /** Don't follow redirects */ + followRedirects?: boolean; + /** Include frontmatter */ + frontmatter?: boolean; + // Inherit from WebClipperOptions but as individual properties + /** Extraction strategy to use */ + strategy?: WebClipperOptions['strategy']; + /** How to handle images */ + imageStrategy?: WebClipperOptions['imageStrategy']; + /** Directory to save downloaded images */ + imageDir?: string; + /** Request timeout in milliseconds */ + timeout?: number; + /** Custom User-Agent string */ + userAgent?: string; + /** Maximum redirects to follow */ + maxRedirects?: number; + /** Show detailed output */ + verbose?: boolean; + /** Show what would be done without doing it */ + dryRun?: boolean; +} + +/** + * Result of a web clipping operation. + * + * @category Commands + */ +export interface ClipResult extends OperationResult { + /** URLs that were successfully clipped */ + clippedUrls: string[]; + /** Generated markdown files */ + generatedFiles: string[]; + /** URLs that failed to clip */ + failedUrls: Array<{ + url: string; + error: string; + }>; + /** Metadata extracted from pages */ + metadata: Array<{ + url: string; + title?: string; + author?: string; + publishedDate?: string; + extractionStrategy: string; + }>; +} + +/** + * Clip web pages to markdown files. + * + * @param urls - URLs to clip or paths to files containing URLs + * @param options - Clipping options + * + * @returns Promise resolving to clipping results + * + * @category Commands + * + * @example + * Basic usage + * ```typescript + * await clipCommand(['https://example.com/article'], { + * output: 'article.md', + * strategy: 'readability' + * }); + * ``` + * + * @example + * Batch processing + * ```typescript + * await clipCommand(['urls.txt'], { + * batch: true, + * outputDir: './clipped', + * downloadImages: true + * }); + * ``` + */ +export async function clipCommand(urls: string[], options: ClipCliOptions = {}): Promise { + try { + // Validate input + if (urls.length === 0) { + console.error('💥 Error: At least one URL must be specified'); + process.exit(1); + } + + // Parse CLI options into WebClipperOptions + const webClipperOptions: WebClipperOptions = {}; + + if (options.strategy) webClipperOptions.strategy = options.strategy; + if (options.imageStrategy) webClipperOptions.imageStrategy = options.imageStrategy; + if (options.imageDir) webClipperOptions.imageDir = options.imageDir; + if (options.frontmatter !== undefined) webClipperOptions.includeFrontmatter = options.frontmatter; + if (options.timeout) webClipperOptions.timeout = options.timeout; + if (options.userAgent) webClipperOptions.userAgent = options.userAgent; + if (options.followRedirects !== undefined) webClipperOptions.followRedirects = options.followRedirects; + if (options.maxRedirects) webClipperOptions.maxRedirects = options.maxRedirects; + if (options.verbose) webClipperOptions.verbose = options.verbose; + if (options.dryRun) webClipperOptions.dryRun = options.dryRun; + + // Parse selectors if provided + if (options.selectors) { + webClipperOptions.selectors = options.selectors.split(',').map(s => s.trim()); + } + + // Parse headers if provided + if (options.headers) { + try { + webClipperOptions.headers = JSON.parse(options.headers); + } catch { + console.error('💥 Error: Invalid JSON format for headers'); + process.exit(1); + } + } + + // Set cookies file if provided + if (options.cookies) { + webClipperOptions.cookiesFile = options.cookies; + } + + // Initialize web clipper + const clipper = new WebClipper(webClipperOptions); + + // Process URLs + const result = await processUrls(urls, options, clipper); + + // Output results + if (options.json) { + console.log(JSON.stringify(result, null, 2)); + } else { + console.log(formatClipResults(result, options)); + } + + // Exit with error code if there were failures + if (result.failedUrls.length > 0) { + process.exit(1); + } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + console.error(`💥 Clip command failed: ${errorMessage}`); + process.exit(1); + } +} + +/** + * Process URLs for clipping. + * + * @private + */ +async function processUrls( + urls: string[], + options: ClipCliOptions, + clipper: WebClipper +): Promise { + const result: ClipResult = { + success: true, + modifiedFiles: [], + createdFiles: [], + deletedFiles: [], + errors: [], + warnings: [], + changes: [], + clippedUrls: [], + generatedFiles: [], + failedUrls: [], + metadata: [], + }; + + // Determine if batch processing + const urlsToProcess = options.batch + ? await loadUrlsFromFiles(urls) + : urls.filter(url => isValidUrl(url)); + + if (urlsToProcess.length === 0) { + throw new Error('No valid URLs found to process'); + } + + // Create output directory if specified + if (options.outputDir) { + await mkdir(options.outputDir, { recursive: true }); + } + + // Process each URL + for (const url of urlsToProcess) { + try { + if (options.verbose) { + console.log(`🌐 Clipping: ${url}`); + } + + // Clip the URL + const clipResult = await clipper.clip(url); + + // Determine output file path + const outputPath = determineOutputPath(url, options, clipResult.title); + + // Ensure output directory exists + await mkdir(dirname(outputPath), { recursive: true }); + + // Write markdown file + if (!options.dryRun) { + await writeFile(outputPath, clipResult.markdown, 'utf-8'); + result.createdFiles.push(outputPath); + } + + // Track success + result.clippedUrls.push(url); + result.generatedFiles.push(outputPath); + + const metadata: { + url: string; + title?: string; + author?: string; + publishedDate?: string; + extractionStrategy: string; + } = { url, extractionStrategy: clipResult.strategy }; + + if (clipResult.title) metadata.title = clipResult.title; + if (clipResult.author) metadata.author = clipResult.author; + if (clipResult.publishedDate) metadata.publishedDate = clipResult.publishedDate; + + result.metadata.push(metadata); + + if (options.verbose) { + console.log(`✅ Clipped to: ${outputPath} (strategy: ${clipResult.strategy})`); + } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + result.failedUrls.push({ + url, + error: errorMessage, + }); + result.errors.push(`Failed to clip ${url}: ${errorMessage}`); + result.success = false; + + if (options.verbose) { + console.error(`❌ Failed to clip ${url}: ${errorMessage}`); + } + } + } + + return result; +} + +/** + * Load URLs from files for batch processing. + * + * @private + */ +async function loadUrlsFromFiles(filePaths: string[]): Promise { + const urls: string[] = []; + + for (const filePath of filePaths) { + try { + const { readFile } = await import('node:fs/promises'); + const content = await readFile(filePath, 'utf-8'); + const fileUrls = content + .split('\n') + .map(line => line.trim()) + .filter(line => line && !line.startsWith('#') && isValidUrl(line)); + + urls.push(...fileUrls); + } catch (error) { + console.warn(`⚠️ Could not read URL file ${filePath}: ${error}`); + } + } + + return urls; +} + +/** + * Check if a string is a valid URL. + * + * @private + */ +function isValidUrl(string: string): boolean { + try { + const url = new URL(string); + return url.protocol === 'http:' || url.protocol === 'https:'; + } catch { + return false; + } +} + +/** + * Determine output file path for a clipped URL. + * + * @private + */ +function determineOutputPath(url: string, options: ClipCliOptions, title?: string): string { + // If specific output file specified + if (options.output && !options.batch) { + return options.output; + } + + // Generate filename from title or URL + let filename: string; + if (title) { + filename = sanitizeFilename(title) + '.md'; + } else { + const urlObj = new URL(url); + const pathname = urlObj.pathname === '/' ? 'index' : basename(urlObj.pathname); + filename = sanitizeFilename(pathname) + '.md'; + } + + // Use output directory if specified + if (options.outputDir) { + return join(options.outputDir, filename); + } + + return filename; +} + +/** + * Sanitize a string for use as a filename. + * + * @private + */ +function sanitizeFilename(name: string): string { + return name + .replace(/[<>:"/\\|?*]/g, '-') // Replace invalid characters + .replace(/\s+/g, '-') // Replace spaces with hyphens + .replace(/-+/g, '-') // Collapse multiple hyphens + .replace(/^-|-$/g, '') // Remove leading/trailing hyphens + .toLowerCase() + .substring(0, 100); // Limit length +} + +/** + * Format clipping results for display. + * + * @private + */ +function formatClipResults(result: ClipResult, options: ClipCliOptions): string { + const lines: string[] = []; + + // Header + lines.push('🕷️ Web Clipper Results'); + lines.push(''.padEnd(40, '=')); + + // Summary + lines.push(`\n📊 Summary:`); + lines.push(` Successfully clipped: ${result.clippedUrls.length}`); + lines.push(` Failed: ${result.failedUrls.length}`); + lines.push(` Files generated: ${result.generatedFiles.length}`); + + if (options.dryRun) { + lines.push('\n🔍 Dry run - no files were actually created'); + } + + // Successful clips + if (result.clippedUrls.length > 0) { + lines.push('\n✅ Successfully Clipped:'); + lines.push(''.padEnd(30, '-')); + + result.metadata.forEach(meta => { + lines.push(`\n🌐 ${meta.url}`); + if (meta.title) { + lines.push(` 📄 Title: ${meta.title}`); + } + if (meta.author) { + lines.push(` ✍️ Author: ${meta.author}`); + } + if (meta.publishedDate) { + lines.push(` 📅 Published: ${meta.publishedDate}`); + } + lines.push(` 🔧 Strategy: ${meta.extractionStrategy}`); + + const outputFile = result.generatedFiles[result.metadata.indexOf(meta)]; + if (outputFile) { + lines.push(` 💾 Saved to: ${outputFile}`); + } + }); + } + + // Failed clips + if (result.failedUrls.length > 0) { + lines.push('\n❌ Failed to Clip:'); + lines.push(''.padEnd(30, '-')); + + result.failedUrls.forEach(failed => { + lines.push(`\n🌐 ${failed.url}`); + lines.push(` 💥 Error: ${failed.error}`); + }); + } + + // Warnings + if (result.warnings.length > 0) { + lines.push('\n⚠️ Warnings:'); + lines.push(''.padEnd(30, '-')); + result.warnings.forEach(warning => { + lines.push(` ⚠️ ${warning}`); + }); + } + + return lines.join('\n'); +} \ No newline at end of file diff --git a/src/core/web-clipper.test.ts b/src/core/web-clipper.test.ts new file mode 100644 index 0000000..8b9bd55 --- /dev/null +++ b/src/core/web-clipper.test.ts @@ -0,0 +1,804 @@ +/** + * Tests for the WebClipper core class. + * + * @fileoverview Tests for web page content extraction and processing + */ + +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import { WebClipper } from './web-clipper.js'; + +// Mock external dependencies +vi.mock('jsdom', () => ({ + JSDOM: vi.fn().mockImplementation((_html: string, _options: { url: string }) => ({ + window: { + document: { + title: 'Test Article', + body: { + textContent: 'Test content', + }, + }, + }, + })), +})); + +vi.mock('@mozilla/readability', () => ({ + Readability: vi.fn().mockImplementation((_document: Document) => ({ + parse: vi.fn().mockReturnValue({ + title: 'Test Article', + byline: 'Test Author', + excerpt: 'Test excerpt', + content: '

Test Article

This is test content.

', + }), + })), +})); + +vi.mock('turndown', () => { + const TurndownService = vi.fn().mockImplementation(() => ({ + turndown: vi.fn().mockReturnValue('# Test Article\n\nThis is test content.'), + addRule: vi.fn(), + })); + return { default: TurndownService }; +}); + +vi.mock('node-html-parser', () => ({ + parse: vi.fn().mockImplementation((html: string) => { + const baseElement = { + innerHTML: '

Test Article

This is test content.

', + querySelectorAll: vi.fn().mockImplementation((selector: string) => { + if (selector === 'img') { + return [ + { getAttribute: vi.fn().mockImplementation((attr: string) => { + if (attr === 'src') return '/image1.jpg'; + if (attr === 'alt') return 'Image 1'; + return null; + })}, + { getAttribute: vi.fn().mockImplementation((attr: string) => { + if (attr === 'src') return 'https://external.com/image2.png'; + if (attr === 'alt') return 'Image 2'; + return null; + })}, + { getAttribute: vi.fn().mockImplementation((attr: string) => { + if (attr === 'src') return 'relative-image.gif'; + if (attr === 'alt') return null; + return null; + })}, + ]; + } + if (selector === 'a[href]') { + return [ + { + getAttribute: vi.fn().mockImplementation((attr: string) => { + if (attr === 'href') return '/internal-page'; + return null; + }), + text: 'this internal link', + }, + { + getAttribute: vi.fn().mockImplementation((attr: string) => { + if (attr === 'href') return 'https://external.com'; + return null; + }), + text: 'this external link', + }, + ]; + } + return []; + }), + }; + + return { + querySelector: vi.fn().mockImplementation((selector: string) => { + if (selector === 'body') { + return { + ...baseElement, + innerHTML: '

Test Article

This is test content.

', + }; + } + if (selector === '.article-content') { + return { + ...baseElement, + innerHTML: '

Manual Extraction

This content was extracted manually.

', + }; + } + if (selector === 'h1') { + return { + text: 'Test Title', + getAttribute: vi.fn().mockReturnValue(null), + }; + } + if (selector === 'article') { + return baseElement; + } + if (selector === '[role="main"]') { + return baseElement; + } + if (selector === '[itemscope]') { + if (html.includes('itemscope')) return baseElement; + return null; + } + if (selector === 'script[type="application/ld+json"]') { + if (html.includes('application/ld+json')) { + return { + innerHTML: '{"@context": "http://schema.org", "@type": "Article", "headline": "Test"}', + }; + } + return null; + } + return null; + }), + querySelectorAll: vi.fn().mockImplementation((selector: string) => { + if (selector === 'img') { + return [ + { getAttribute: vi.fn().mockImplementation((attr: string) => { + if (attr === 'src') return '/image1.jpg'; + if (attr === 'alt') return 'Image 1'; + return null; + })}, + { getAttribute: vi.fn().mockImplementation((attr: string) => { + if (attr === 'src') return 'https://external.com/image2.png'; + if (attr === 'alt') return 'Image 2'; + return null; + })}, + { getAttribute: vi.fn().mockImplementation((attr: string) => { + if (attr === 'src') return 'relative-image.gif'; + if (attr === 'alt') return null; + return null; + })}, + ]; + } + if (selector === 'a[href]') { + return [ + { + getAttribute: vi.fn().mockImplementation((attr: string) => { + if (attr === 'href') return '/internal-page'; + return null; + }), + text: 'this internal link', + }, + { + getAttribute: vi.fn().mockImplementation((attr: string) => { + if (attr === 'href') return 'https://external.com'; + return null; + }), + text: 'this external link', + }, + ]; + } + return []; + }), + }; + }), +})); + +// Mock fetch +global.fetch = vi.fn(); + +describe('WebClipper', () => { + let clipper: WebClipper; + + beforeEach(() => { + vi.clearAllMocks(); + clipper = new WebClipper({ + strategy: 'readability', + verbose: false, + }); + }); + + describe('Initialization', () => { + it('should initialize with default options', () => { + const defaultClipper = new WebClipper(); + expect(defaultClipper).toBeDefined(); + }); + + it('should initialize with custom options', () => { + const customClipper = new WebClipper({ + strategy: 'manual', + imageStrategy: 'download', + selectors: ['article', '.content'], + timeout: 60000, + verbose: true, + }); + expect(customClipper).toBeDefined(); + }); + }); + + describe('URL fetching', () => { + it('should fetch HTML content successfully', async () => { + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue('

Test

'), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + const result = await clipper.clip('https://example.com/article'); + + expect(fetch).toHaveBeenCalledWith('https://example.com/article', expect.objectContaining({ + headers: expect.objectContaining({ + 'User-Agent': expect.stringContaining('markmv-clipper'), + 'Accept': expect.stringContaining('text/html'), + }), + })); + expect(result.sourceUrl).toBe('https://example.com/article'); + }); + + it('should handle HTTP errors', async () => { + const mockResponse = { + ok: false, + status: 404, + statusText: 'Not Found', + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + await expect(clipper.clip('https://example.com/nonexistent')).rejects.toThrow('HTTP 404: Not Found'); + }); + + it('should handle network errors', async () => { + vi.mocked(fetch).mockRejectedValue(new Error('Network error')); + + await expect(clipper.clip('https://example.com/unreachable')).rejects.toThrow('Network error'); + }); + + it('should respect timeout', async () => { + const timeoutClipper = new WebClipper({ timeout: 100 }); + + // Mock a slow response + vi.mocked(fetch).mockImplementation(() => + new Promise(resolve => setTimeout(resolve, 200)) + ); + + await expect(timeoutClipper.clip('https://slow.example.com')).rejects.toThrow(); + }); + + it('should use custom headers when provided', async () => { + const headerClipper = new WebClipper({ + headers: { + 'Authorization': 'Bearer token123', + 'Custom-Header': 'custom-value', + }, + userAgent: 'Custom Bot 1.0', + }); + + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue('

Test

'), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + await headerClipper.clip('https://example.com/protected'); + + expect(fetch).toHaveBeenCalledWith('https://example.com/protected', expect.objectContaining({ + headers: expect.objectContaining({ + 'Authorization': 'Bearer token123', + 'Custom-Header': 'custom-value', + 'User-Agent': 'Custom Bot 1.0', + }), + })); + }); + }); + + describe('Strategy determination', () => { + it('should auto-detect readability strategy for articles', async () => { + const autoClipper = new WebClipper({ strategy: 'auto' }); + + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue(` + + +
+

Blog Post Title

+

This is a blog post.

+
+ + + `), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + const result = await autoClipper.clip('https://blog.example.com/post'); + expect(result.strategy).toBe('readability'); + }); + + it('should auto-detect manual strategy for documentation', async () => { + const autoClipper = new WebClipper({ strategy: 'auto' }); + + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue(` + + +
+

API Documentation

+

API docs content.

+
+ + + `), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + const result = await autoClipper.clip('https://example.com/docs/api'); + expect(result.strategy).toBe('manual'); + }); + + it('should auto-detect structured strategy for schema.org content', async () => { + const autoClipper = new WebClipper({ strategy: 'auto' }); + + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue(` + + +
+

Structured Article

+

Article content.

+
+ + + `), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + const result = await autoClipper.clip('https://example.com/structured'); + expect(result.strategy).toBe('structured'); + }); + }); + + describe('Content extraction strategies', () => { + it('should extract content using readability strategy', async () => { + const readabilityClipper = new WebClipper({ + strategy: 'readability', + includeFrontmatter: false, + }); + + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue('

Test

Content

'), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + const result = await readabilityClipper.clip('https://example.com/article'); + + expect(result.strategy).toBe('readability'); + expect(result.title).toBe('Test Article'); + expect(result.author).toBe('Test Author'); + expect(result.description).toBe('Test excerpt'); + expect(result.markdown).toBe('# Test Article\n\nThis is test content.'); + }); + + it('should extract content using manual strategy with custom selectors', async () => { + const manualClipper = new WebClipper({ + strategy: 'manual', + selectors: ['.article-content', 'main'], + includeFrontmatter: false, + }); + + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue(` + + +
+

Manual Extraction

+

This content was extracted manually.

+
+ + + `), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + const result = await manualClipper.clip('https://example.com/manual'); + + expect(result.strategy).toBe('manual'); + expect(result.markdown).toBe('# Test Article\n\nThis is test content.'); + }); + + it('should extract content using full page strategy', async () => { + const fullClipper = new WebClipper({ + strategy: 'full', + includeFrontmatter: false, + }); + + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue(` + + +
Header content
+
Main content
+
Footer content
+ + + `), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + const result = await fullClipper.clip('https://example.com/full'); + + expect(result.strategy).toBe('full'); + expect(result.markdown).toBe('# Test Article\n\nThis is test content.'); + }); + + it('should extract content using structured strategy', async () => { + const structuredClipper = new WebClipper({ + strategy: 'structured', + includeFrontmatter: false, + }); + + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue(` + + + +
+

Structured Content

+

Content with structured data.

+
+ + + `), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + const result = await structuredClipper.clip('https://example.com/structured'); + + expect(result.strategy).toBe('structured'); + expect(result.structuredData).toBeDefined(); + expect(result.structuredData?.['@type']).toBe('Article'); + expect(result.markdown).toBe('# Test Article\n\nThis is test content.'); + }); + }); + + describe('Metadata extraction', () => { + it('should extract title from various sources', async () => { + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue(` + + + Page Title + + + +

Main Heading

+

Content

+ + + `), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + const result = await clipper.clip('https://example.com/metadata'); + expect(result.title).toBe('Test Article'); + }); + + it('should extract published date from various sources', async () => { + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue(` + + + + + +
+

Article with Date

+

Content

+
+ + + `), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + const result = await clipper.clip('https://example.com/dated-article'); + expect(result.publishedDate).toBe('2024-01-01T12:00:00Z'); + }); + + it('should extract author information', async () => { + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue(` + + +
+

Article Title

+
Jane Doe
+

Article content

+
+ + + `), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + const result = await clipper.clip('https://example.com/authored-article'); + expect(result.author).toBe('Test Author'); + }); + }); + + describe('Link and image processing', () => { + it('should extract and classify links', async () => { + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue(` + + + + + + `), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + const result = await clipper.clip('https://example.com/article-with-links'); + + expect(result.links).toBeDefined(); + expect(result.links.length).toBeGreaterThan(0); + expect(result.links.some(link => link.type === 'internal')).toBe(true); + expect(result.links.some(link => link.type === 'external')).toBe(true); + }); + + it('should extract images with metadata', async () => { + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue(` + + +
+

Article with Images

+ Image 1 + Image 2 + +
+ + + `), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + const result = await clipper.clip('https://example.com/article-with-images'); + + expect(result.images).toBeDefined(); + expect(result.images.length).toBe(3); + expect(result.images[0].originalUrl).toBe('https://example.com/image1.jpg'); + expect(result.images[0].alt).toBe('Image 1'); + expect(result.images[1].originalUrl).toBe('https://external.com/image2.png'); + expect(result.images[2].originalUrl).toBe('https://example.com/relative-image.gif'); + }); + + it('should handle different image strategies', async () => { + const skipImagesClipper = new WebClipper({ imageStrategy: 'skip' }); + const linkOnlyClipper = new WebClipper({ imageStrategy: 'link-only' }); + + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue('
'), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + const skipResult = await skipImagesClipper.clip('https://example.com/images'); + const linkResult = await linkOnlyClipper.clip('https://example.com/images'); + + expect(skipResult.images).toBeDefined(); + expect(linkResult.images).toBeDefined(); + }); + }); + + describe('Frontmatter generation', () => { + it('should generate frontmatter when enabled', async () => { + const frontmatterClipper = new WebClipper({ + includeFrontmatter: true, + strategy: 'readability', + }); + + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue('

Test

'), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + const result = await frontmatterClipper.clip('https://example.com/frontmatter-test'); + + expect(result.markdown).toContain('---'); + expect(result.markdown).toContain('title:'); + expect(result.markdown).toContain('source:'); + expect(result.markdown).toContain('clipped:'); + }); + + it('should skip frontmatter when disabled', async () => { + const noFrontmatterClipper = new WebClipper({ + includeFrontmatter: false, + strategy: 'readability', + }); + + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue('

Test

'), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + const result = await noFrontmatterClipper.clip('https://example.com/no-frontmatter'); + + expect(result.markdown).not.toContain('---'); + expect(result.markdown).not.toContain('title:'); + }); + }); + + describe('Error handling', () => { + it('should handle Readability parsing failures', async () => { + const failingClipper = new WebClipper({ strategy: 'readability' }); + + // Mock the existing Readability mock to return null for parse() + const { Readability } = await import('@mozilla/readability'); + const mockInstance = { + parse: vi.fn().mockReturnValue(null), + }; + vi.mocked(Readability).mockImplementationOnce(() => mockInstance as any); + + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue('

Unparseable content

'), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + await expect(failingClipper.clip('https://example.com/unparseable')).rejects.toThrow('Could not extract article content using Readability'); + }); + + it('should handle selector not found in manual strategy', async () => { + const manualClipper = new WebClipper({ + strategy: 'manual', + selectors: ['.nonexistent-selector'], + }); + + // Create a custom mock that returns null for the nonexistent selector + const customParse = vi.fn().mockReturnValue({ + querySelector: vi.fn().mockImplementation((selector: string) => { + if (selector === '.nonexistent-selector') return null; + if (selector === 'body') return null; // Also make body return null + return null; + }), + querySelectorAll: vi.fn().mockReturnValue([]), + }); + + // Override the parse function temporarily + const { parse } = await import('node-html-parser'); + vi.mocked(parse).mockImplementationOnce(customParse); + + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue('

No matching selectors

'), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + await expect(manualClipper.clip('https://example.com/no-selectors')).rejects.toThrow('Could not find content with specified selectors'); + }); + + it('should handle missing body element in full strategy', async () => { + const fullClipper = new WebClipper({ strategy: 'full' }); + + // Create a custom mock that returns null for body + const customParse = vi.fn().mockReturnValue({ + querySelector: vi.fn().mockImplementation((selector: string) => { + if (selector === 'body') return null; + return null; + }), + querySelectorAll: vi.fn().mockReturnValue([]), + }); + + // Override the parse function temporarily + const { parse } = await import('node-html-parser'); + vi.mocked(parse).mockImplementationOnce(customParse); + + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue('No Body'), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + await expect(fullClipper.clip('https://example.com/no-body')).rejects.toThrow('Could not find body element'); + }); + + it('should handle invalid JSON-LD structured data gracefully', async () => { + const structuredClipper = new WebClipper({ strategy: 'structured' }); + + // Create a custom mock that returns invalid JSON + const customParse = vi.fn().mockReturnValue({ + querySelector: vi.fn().mockImplementation((selector: string) => { + if (selector === 'script[type="application/ld+json"]') { + return { + innerHTML: '{ invalid json }', // This will cause JSON.parse to fail + }; + } + if (selector === 'article') { + return { + innerHTML: '

Test

', + querySelectorAll: vi.fn().mockReturnValue([]), + }; + } + return null; + }), + querySelectorAll: vi.fn().mockReturnValue([]), + }); + + // Override the parse function temporarily + const { parse } = await import('node-html-parser'); + vi.mocked(parse).mockImplementationOnce(customParse); + + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue(` + + + +

Test

+ + + `), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + // Should not throw, just ignore invalid JSON + const result = await structuredClipper.clip('https://example.com/invalid-json'); + expect(result.strategy).toBe('structured'); + expect(result.structuredData).toBeUndefined(); + }); + }); + + describe('Configuration options', () => { + it('should respect redirect settings', async () => { + const noRedirectClipper = new WebClipper({ followRedirects: false }); + + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue('

Test

'), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + await noRedirectClipper.clip('https://example.com/test'); + + expect(fetch).toHaveBeenCalledWith('https://example.com/test', expect.objectContaining({ + redirect: 'manual', + })); + }); + + it('should handle verbose logging', async () => { + const verboseClipper = new WebClipper({ verbose: true }); + + const originalLog = console.log; + const logs: string[] = []; + console.log = vi.fn((message: string) => { + logs.push(message); + }); + + const mockResponse = { + ok: true, + text: vi.fn().mockResolvedValue('

Test

'), + }; + vi.mocked(fetch).mockResolvedValue(mockResponse as any); + + try { + await verboseClipper.clip('https://example.com/verbose'); + expect(logs.some(log => log.includes('Fetching:'))).toBe(true); + expect(logs.some(log => log.includes('Using strategy:'))).toBe(true); + } finally { + console.log = originalLog; + } + }); + }); +}); \ No newline at end of file diff --git a/src/core/web-clipper.ts b/src/core/web-clipper.ts new file mode 100644 index 0000000..e17ea2b --- /dev/null +++ b/src/core/web-clipper.ts @@ -0,0 +1,745 @@ +/** + * Core web clipper for converting web pages to markdown. + * + * @fileoverview Implements multiple extraction strategies for different types of web content + * @category Core + */ + +import { parse, HTMLElement } from 'node-html-parser'; +import TurndownService from 'turndown'; +import { Readability } from '@mozilla/readability'; +import { JSDOM } from 'jsdom'; + +/** + * Extraction strategies for different types of content. + * + * @category Core + */ +export type ExtractionStrategy = + | 'auto' // Automatically choose best strategy + | 'readability' // Mozilla Readability algorithm + | 'manual' // Custom selectors + | 'full' // Full page content + | 'structured' // Schema.org and semantic extraction + | 'headless'; // Browser automation (future) + +/** + * Image handling strategies. + * + * @category Core + */ +export type ImageStrategy = + | 'skip' // Don't process images + | 'link-only' // Keep as external links + | 'download' // Download and save locally + | 'base64'; // Embed as base64 (small images only) + +/** + * Options for web clipping operations. + * + * @category Core + */ +export interface WebClipperOptions { + /** Extraction strategy to use */ + strategy?: ExtractionStrategy; + /** How to handle images */ + imageStrategy?: ImageStrategy; + /** Directory to save downloaded images */ + imageDir?: string; + /** Custom CSS selectors for manual extraction */ + selectors?: string[]; + /** Include metadata in frontmatter */ + includeFrontmatter?: boolean; + /** Request timeout in milliseconds */ + timeout?: number; + /** Custom User-Agent string */ + userAgent?: string; + /** Custom HTTP headers */ + headers?: Record; + /** Path to cookies file */ + cookiesFile?: string; + /** Follow redirects */ + followRedirects?: boolean; + /** Maximum redirects to follow */ + maxRedirects?: number; + /** Show detailed output */ + verbose?: boolean; + /** Show what would be done without doing it */ + dryRun?: boolean; +} + +/** + * Content extracted from a web page. + * + * @category Core + */ +interface ExtractedContent { + /** Extracted title */ + title?: string; + /** Extracted author */ + author?: string; + /** Published date */ + publishedDate?: string; + /** Description/excerpt */ + description?: string; + /** HTML content */ + content: string; + /** Images found in content */ + images: Array<{ + originalUrl: string; + alt: string | undefined; + processed: boolean; + }>; + /** Links found in content */ + links: Array<{ + url: string; + text: string; + type: 'internal' | 'external'; + }>; + /** Structured data found */ + structuredData?: Record; +} + +/** + * Result of a web clipping operation. + * + * @category Core + */ +export interface ClipResult { + /** Generated markdown content */ + markdown: string; + /** Extracted title */ + title?: string; + /** Extracted author */ + author?: string; + /** Published date */ + publishedDate?: string; + /** Description/excerpt */ + description?: string; + /** Source URL */ + sourceUrl: string; + /** Extraction strategy used */ + strategy: ExtractionStrategy; + /** Images found and processed */ + images: Array<{ + originalUrl: string; + localPath?: string; + alt: string | undefined; + processed: boolean; + }>; + /** Links found in content */ + links: Array<{ + url: string; + text: string; + type: 'internal' | 'external'; + }>; + /** Structured data found */ + structuredData?: Record; +} + +/** + * Default options for web clipping. + */ +const DEFAULT_CLIPPER_OPTIONS: Required> = { + strategy: 'auto', + imageStrategy: 'link-only', + imageDir: './images', + includeFrontmatter: true, + timeout: 30000, + userAgent: 'Mozilla/5.0 (compatible; markmv-clipper/1.0)', + followRedirects: true, + maxRedirects: 5, + verbose: false, + dryRun: false, +}; + +/** + * Core web clipper class with multiple extraction strategies. + * + * Provides comprehensive web page to markdown conversion with support for different + * content types, extraction strategies, and output formats. + * + * @category Core + * + * @example + * Basic usage + * ```typescript + * const clipper = new WebClipper({ + * strategy: 'readability', + * imageStrategy: 'download' + * }); + * + * const result = await clipper.clip('https://example.com/article'); + * console.log(result.markdown); + * ``` + * + * @example + * Custom extraction + * ```typescript + * const clipper = new WebClipper({ + * strategy: 'manual', + * selectors: ['article', '.content', 'main'] + * }); + * + * const result = await clipper.clip('https://docs.example.com'); + * ``` + */ +export class WebClipper { + private options: Required> & + Pick; + private turndown: TurndownService; + + constructor(options: WebClipperOptions = {}) { + this.options = { ...DEFAULT_CLIPPER_OPTIONS, ...options }; + this.turndown = this.configureTurndown(); + } + + /** + * Clip a web page to markdown. + * + * @param url - URL to clip + * + * @returns Promise resolving to clip result + */ + async clip(url: string): Promise { + if (this.options.verbose) { + console.log(`🌐 Fetching: ${url}`); + } + + // Fetch the web page + const html = await this.fetchHtml(url); + + // Determine extraction strategy + const strategy = this.options.strategy === 'auto' + ? this.determineStrategy(html, url) + : this.options.strategy; + + if (this.options.verbose) { + console.log(`🔧 Using strategy: ${strategy}`); + } + + // Extract content using chosen strategy + const extracted = await this.extractContent(html, url, strategy); + + // Process images if needed + const processedImages = await this.processImages(extracted.images, url); + + // Generate markdown + const markdown = await this.generateMarkdown(extracted, processedImages, url); + + const result: ClipResult = { + markdown, + sourceUrl: url, + strategy, + images: processedImages, + links: extracted.links, + }; + + if (extracted.title) result.title = extracted.title; + if (extracted.author) result.author = extracted.author; + if (extracted.publishedDate) result.publishedDate = extracted.publishedDate; + if (extracted.description) result.description = extracted.description; + if (extracted.structuredData) result.structuredData = extracted.structuredData; + + return result; + } + + /** + * Fetch HTML content from a URL. + * + * @private + */ + private async fetchHtml(url: string): Promise { + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), this.options.timeout); + + try { + const headers: Record = { + 'User-Agent': this.options.userAgent, + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.5', + 'Accept-Encoding': 'gzip, deflate', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + ...this.options.headers, + }; + + const response = await fetch(url, { + headers, + signal: controller.signal, + redirect: this.options.followRedirects ? 'follow' : 'manual', + }); + + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${response.statusText}`); + } + + return await response.text(); + } finally { + clearTimeout(timeoutId); + } + } + + /** + * Determine the best extraction strategy for content. + * + * @private + */ + private determineStrategy(html: string, url: string): ExtractionStrategy { + const root = parse(html); + + // Check for common article patterns + const articleSelectors = ['article', '[role="main"]', '.post-content', '.entry-content']; + const hasArticle = articleSelectors.some(selector => root.querySelector(selector)); + + // Check for documentation patterns + const docsPatterns = ['/docs/', '/documentation/', '/api/', '/guide/']; + const isDocs = docsPatterns.some(pattern => url.includes(pattern)); + + // Check for blog patterns + const blogPatterns = ['/blog/', '/post/', '/article/']; + const isBlog = blogPatterns.some(pattern => url.includes(pattern)); + + // Check for structured data + const hasStructuredData = root.querySelector('[itemscope]') || + root.querySelector('script[type="application/ld+json"]'); + + if (hasStructuredData) { + return 'structured'; + } else if (hasArticle && (isBlog || url.includes('medium.com') || url.includes('dev.to'))) { + return 'readability'; + } else if (isDocs) { + return 'manual'; + } else { + return 'readability'; + } + } + + /** + * Extract content using the specified strategy. + * + * @private + */ + private async extractContent(html: string, url: string, strategy: ExtractionStrategy): Promise { + switch (strategy) { + case 'readability': + return this.extractWithReadability(html, url); + case 'manual': + return this.extractWithSelectors(html, url); + case 'full': + return this.extractFullPage(html, url); + case 'structured': + return this.extractStructured(html, url); + default: + return this.extractWithReadability(html, url); + } + } + + /** + * Extract content using Mozilla Readability. + * + * @private + */ + private extractWithReadability(html: string, url: string): ExtractedContent { + const dom = new JSDOM(html, { url }); + const reader = new Readability(dom.window.document); + const article = reader.parse(); + + if (!article || !article.content) { + throw new Error('Could not extract article content using Readability'); + } + + const root = parse(article.content); + + const result: ExtractedContent = { + content: article.content, + images: this.extractImages(root, url), + links: this.extractLinks(root, url), + }; + + if (article.title) result.title = article.title; + if (article.byline) result.author = article.byline; + if (article.excerpt) result.description = article.excerpt; + + const publishedDate = this.extractPublishedDate(html); + if (publishedDate) result.publishedDate = publishedDate; + + return result; + } + + /** + * Extract content using custom selectors. + * + * @private + */ + private extractWithSelectors(html: string, url: string): ExtractedContent { + const root = parse(html); + + const selectors = this.options.selectors || [ + 'article', + '[role="main"]', + '.content', + '#content', + 'main', + '.post-content', + '.entry-content', + ]; + + let contentElement: HTMLElement | null = null; + + for (const selector of selectors) { + contentElement = root.querySelector(selector); + if (contentElement) break; + } + + if (!contentElement) { + // Fallback to body + contentElement = root.querySelector('body'); + } + + if (!contentElement) { + throw new Error('Could not find content with specified selectors'); + } + + const result: ExtractedContent = { + content: contentElement.innerHTML, + images: this.extractImages(contentElement, url), + links: this.extractLinks(contentElement, url), + }; + + const title = this.extractTitle(root); + if (title) result.title = title; + + const author = this.extractAuthor(root); + if (author) result.author = author; + + const publishedDate = this.extractPublishedDate(html); + if (publishedDate) result.publishedDate = publishedDate; + + const description = this.extractDescription(root); + if (description) result.description = description; + + return result; + } + + /** + * Extract full page content. + * + * @private + */ + private extractFullPage(html: string, url: string): ExtractedContent { + const root = parse(html); + const body = root.querySelector('body'); + + if (!body) { + throw new Error('Could not find body element'); + } + + const result: ExtractedContent = { + content: body.innerHTML, + images: this.extractImages(body, url), + links: this.extractLinks(body, url), + }; + + const title = this.extractTitle(root); + if (title) result.title = title; + + const author = this.extractAuthor(root); + if (author) result.author = author; + + const publishedDate = this.extractPublishedDate(html); + if (publishedDate) result.publishedDate = publishedDate; + + const description = this.extractDescription(root); + if (description) result.description = description; + + return result; + } + + /** + * Extract content using structured data. + * + * @private + */ + private extractStructured(html: string, url: string): ExtractedContent { + const root = parse(html); + + // Try JSON-LD structured data first + const jsonLdScript = root.querySelector('script[type="application/ld+json"]'); + let structuredData: Record | undefined; + + if (jsonLdScript) { + try { + structuredData = JSON.parse(jsonLdScript.innerHTML); + } catch { + // Ignore JSON parsing errors + } + } + + // For now, fall back to readability with structured data + const content = this.extractWithReadability(html, url); + + const result: ExtractedContent = { + ...content, + }; + + if (structuredData) { + result.structuredData = structuredData; + } + + return result; + } + + /** + * Extract title from HTML. + * + * @private + */ + private extractTitle(root: HTMLElement): string | undefined { + // Try various title sources in order of preference + const titleSelectors = [ + 'h1', + 'title', + '[property="og:title"]', + '[name="twitter:title"]', + '.title', + '#title', + ]; + + for (const selector of titleSelectors) { + const element = root.querySelector(selector); + if (element) { + const title = element.getAttribute('content') || element.text?.trim() || ''; + if (title) return title; + } + } + + return undefined; + } + + /** + * Extract author from HTML. + * + * @private + */ + private extractAuthor(root: HTMLElement): string | undefined { + const authorSelectors = [ + '[rel="author"]', + '.author', + '.byline', + '[property="article:author"]', + '[name="author"]', + ]; + + for (const selector of authorSelectors) { + const element = root.querySelector(selector); + if (element) { + const author = element.getAttribute('content') || element.text?.trim() || ''; + if (author) return author; + } + } + + return undefined; + } + + /** + * Extract published date from HTML. + * + * @private + */ + private extractPublishedDate(html: string): string | undefined { + // Look for various date patterns in the HTML + const datePatterns = [ + /"datePublished":\s*"([^"]+)"/, + /"published_time":\s*"([^"]+)"/, + /property="article:published_time"\s+content="([^"]+)"/, + /name="date"\s+content="([^"]+)"/, + ]; + + for (const pattern of datePatterns) { + const match = html.match(pattern); + if (match) return match[1]; + } + + return undefined; + } + + /** + * Extract description from HTML. + * + * @private + */ + private extractDescription(root: HTMLElement): string | undefined { + const descriptionSelectors = [ + '[property="og:description"]', + '[name="description"]', + '[name="twitter:description"]', + ]; + + for (const selector of descriptionSelectors) { + const element = root.querySelector(selector); + if (element) { + const description = element.getAttribute('content'); + if (description) return description; + } + } + + return undefined; + } + + /** + * Extract images from content. + * + * @private + */ + private extractImages(root: HTMLElement, baseUrl: string) { + const images = root.querySelectorAll('img'); + + return images.map(img => { + const alt = img.getAttribute('alt'); + return { + originalUrl: this.resolveUrl(img.getAttribute('src') || '', baseUrl), + alt: alt || undefined, + processed: false, + }; + }); + } + + /** + * Extract links from content. + * + * @private + */ + private extractLinks(root: HTMLElement, baseUrl: string) { + const links = root.querySelectorAll('a[href]'); + + return links.map(link => { + const url = this.resolveUrl(link.getAttribute('href') || '', baseUrl); + const text = link.text?.trim() || ''; + const type = this.isInternalLink(url, baseUrl) ? 'internal' : 'external'; + + return { url, text, type } as const; + }); + } + + /** + * Process images according to the image strategy. + * + * @private + */ + private async processImages(images: Array<{ originalUrl: string; alt: string | undefined; processed: boolean }>, _baseUrl: string) { + // For now, just mark as processed without downloading + // TODO: Implement actual image downloading and processing + return images.map(img => ({ + originalUrl: img.originalUrl, + alt: img.alt, + processed: true, + })); + } + + /** + * Generate final markdown content. + * + * @private + */ + private async generateMarkdown(extracted: ExtractedContent, _images: ClipResult['images'], sourceUrl: string): Promise { + const parts: string[] = []; + + // Add frontmatter if requested + if (this.options.includeFrontmatter) { + const frontmatter = this.generateFrontmatter(extracted, sourceUrl); + if (frontmatter) { + parts.push('---'); + parts.push(frontmatter); + parts.push('---\n'); + } + } + + // Convert HTML to markdown + const markdown = this.turndown.turndown(extracted.content); + parts.push(markdown); + + return parts.join('\n'); + } + + /** + * Generate frontmatter for the markdown file. + * + * @private + */ + private generateFrontmatter(extracted: ExtractedContent, sourceUrl: string): string { + const frontmatter: Record = {}; + + if (extracted.title) frontmatter.title = extracted.title; + if (extracted.author) frontmatter.author = extracted.author; + if (extracted.publishedDate) frontmatter.published = extracted.publishedDate; + if (extracted.description) frontmatter.description = extracted.description; + + frontmatter.source = sourceUrl; + frontmatter.clipped = new Date().toISOString(); + + if (Object.keys(frontmatter).length === 0) return ''; + + return Object.entries(frontmatter) + .map(([key, value]) => `${key}: ${JSON.stringify(value)}`) + .join('\n'); + } + + /** + * Configure Turndown service for HTML to Markdown conversion. + * + * @private + */ + private configureTurndown(): TurndownService { + const turndown = new TurndownService({ + headingStyle: 'atx', + codeBlockStyle: 'fenced', + fence: '```', + emDelimiter: '*', + strongDelimiter: '**', + linkStyle: 'inlined', + linkReferenceStyle: 'full', + }); + + // Custom rules for better conversion + turndown.addRule('strikethrough', { + filter: ['del', 's'], + replacement: (content) => `~~${content}~~`, + }); + + turndown.addRule('highlight', { + filter: ['mark'], + replacement: (content) => `==${content}==`, + }); + + return turndown; + } + + /** + * Resolve a URL relative to a base URL. + * + * @private + */ + private resolveUrl(url: string, baseUrl: string): string { + try { + return new URL(url, baseUrl).toString(); + } catch { + return url; + } + } + + /** + * Check if a URL is internal to the base domain. + * + * @private + */ + private isInternalLink(url: string, baseUrl: string): boolean { + try { + const urlObj = new URL(url); + const baseObj = new URL(baseUrl); + return urlObj.hostname === baseObj.hostname; + } catch { + return false; + } + } +} \ No newline at end of file diff --git a/src/generated/ajv-validators.ts b/src/generated/ajv-validators.ts index 60eb2df..59cb9ae 100644 --- a/src/generated/ajv-validators.ts +++ b/src/generated/ajv-validators.ts @@ -1,431 +1,447 @@ /** * Auto-generated AJV validators for markmv API methods - * + * * DO NOT EDIT MANUALLY - This file is auto-generated */ import Ajv from 'ajv'; -const ajv = new Ajv({ - allErrors: true, +const ajv = new Ajv({ + allErrors: true, verbose: true, - strict: false, + strict: false }); // Schema definitions export const schemas = { - $schema: 'http://json-schema.org/draft-07/schema#', - title: 'markmv API Schemas', - description: 'Auto-generated schemas for markmv methods with @group annotations', - definitions: { - moveFile: { - title: 'moveFile', - description: 'Move a single markdown file and update all references', - type: 'object', - properties: { - input: { - type: 'object', - properties: { - sourcePath: { - type: 'string', - description: 'Source file path', + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "markmv API Schemas", + "description": "Auto-generated schemas for markmv methods with @group annotations", + "definitions": { + "moveFile": { + "title": "moveFile", + "description": "Move a single markdown file and update all references", + "type": "object", + "properties": { + "input": { + "type": "object", + "properties": { + "sourcePath": { + "type": "string", + "description": "Source file path" }, - destinationPath: { - type: 'string', - description: 'Destination file path', + "destinationPath": { + "type": "string", + "description": "Destination file path" }, - options: { - type: 'object', - properties: { - dryRun: { - type: 'boolean', - description: 'Show changes without executing', + "options": { + "type": "object", + "properties": { + "dryRun": { + "type": "boolean", + "description": "Show changes without executing" }, - verbose: { - type: 'boolean', - description: 'Show detailed output', + "verbose": { + "type": "boolean", + "description": "Show detailed output" }, - force: { - type: 'boolean', - description: 'Force operation even if conflicts exist', - }, - createDirectories: { - type: 'boolean', - description: 'Create missing directories', + "force": { + "type": "boolean", + "description": "Force operation even if conflicts exist" }, + "createDirectories": { + "type": "boolean", + "description": "Create missing directories" + } }, - additionalProperties: false, - }, + "additionalProperties": false + } }, - required: ['sourcePath', 'destinationPath'], - additionalProperties: false, + "required": [ + "sourcePath", + "destinationPath" + ], + "additionalProperties": false }, - output: { - type: 'object', - properties: { - success: { - type: 'boolean', + "output": { + "type": "object", + "properties": { + "success": { + "type": "boolean" }, - modifiedFiles: { - type: 'array', - items: { - type: 'string', - }, + "modifiedFiles": { + "type": "array", + "items": { + "type": "string" + } }, - createdFiles: { - type: 'array', - items: { - type: 'string', - }, - }, - deletedFiles: { - type: 'array', - items: { - type: 'string', - }, + "createdFiles": { + "type": "array", + "items": { + "type": "string" + } }, - errors: { - type: 'array', - items: { - type: 'string', - }, + "deletedFiles": { + "type": "array", + "items": { + "type": "string" + } }, - warnings: { - type: 'array', - items: { - type: 'string', - }, + "errors": { + "type": "array", + "items": { + "type": "string" + } }, - changes: { - type: 'array', - items: { - type: 'object', - }, + "warnings": { + "type": "array", + "items": { + "type": "string" + } }, + "changes": { + "type": "array", + "items": { + "type": "object" + } + } }, - required: [ - 'success', - 'modifiedFiles', - 'createdFiles', - 'deletedFiles', - 'errors', - 'warnings', - 'changes', + "required": [ + "success", + "modifiedFiles", + "createdFiles", + "deletedFiles", + "errors", + "warnings", + "changes" ], - additionalProperties: false, - }, + "additionalProperties": false + } }, - additionalProperties: false, - 'x-group': 'Core API', - 'x-examples': [ - 'markmv move old.md new.md', - 'markmv move docs/old.md archive/renamed.md --dry-run', - ], + "additionalProperties": false, + "x-group": "Core API", + "x-examples": [ + "markmv move old.md new.md", + "markmv move docs/old.md archive/renamed.md --dry-run" + ] }, - moveFiles: { - title: 'moveFiles', - description: 'Move multiple markdown files and update all references', - type: 'object', - properties: { - input: { - type: 'object', - properties: { - moves: { - type: 'array', - description: 'Array of source/destination pairs', - items: { - type: 'object', - properties: { - source: { - type: 'string', - }, - destination: { - type: 'string', + "moveFiles": { + "title": "moveFiles", + "description": "Move multiple markdown files and update all references", + "type": "object", + "properties": { + "input": { + "type": "object", + "properties": { + "moves": { + "type": "array", + "description": "Array of source/destination pairs", + "items": { + "type": "object", + "properties": { + "source": { + "type": "string" }, + "destination": { + "type": "string" + } }, - required: ['source', 'destination'], - additionalProperties: false, - }, + "required": [ + "source", + "destination" + ], + "additionalProperties": false + } }, - options: { - type: 'object', - properties: { - dryRun: { - type: 'boolean', - description: 'Show changes without executing', - }, - verbose: { - type: 'boolean', - description: 'Show detailed output', + "options": { + "type": "object", + "properties": { + "dryRun": { + "type": "boolean", + "description": "Show changes without executing" }, - force: { - type: 'boolean', - description: 'Force operation even if conflicts exist', + "verbose": { + "type": "boolean", + "description": "Show detailed output" }, - createDirectories: { - type: 'boolean', - description: 'Create missing directories', + "force": { + "type": "boolean", + "description": "Force operation even if conflicts exist" }, + "createDirectories": { + "type": "boolean", + "description": "Create missing directories" + } }, - additionalProperties: false, - }, + "additionalProperties": false + } }, - required: ['moves'], - additionalProperties: false, + "required": [ + "moves" + ], + "additionalProperties": false }, - output: { - type: 'object', - properties: { - success: { - type: 'boolean', + "output": { + "type": "object", + "properties": { + "success": { + "type": "boolean" }, - modifiedFiles: { - type: 'array', - items: { - type: 'string', - }, + "modifiedFiles": { + "type": "array", + "items": { + "type": "string" + } }, - createdFiles: { - type: 'array', - items: { - type: 'string', - }, + "createdFiles": { + "type": "array", + "items": { + "type": "string" + } }, - deletedFiles: { - type: 'array', - items: { - type: 'string', - }, - }, - errors: { - type: 'array', - items: { - type: 'string', - }, + "deletedFiles": { + "type": "array", + "items": { + "type": "string" + } }, - warnings: { - type: 'array', - items: { - type: 'string', - }, + "errors": { + "type": "array", + "items": { + "type": "string" + } }, - changes: { - type: 'array', - items: { - type: 'object', - }, + "warnings": { + "type": "array", + "items": { + "type": "string" + } }, + "changes": { + "type": "array", + "items": { + "type": "object" + } + } }, - required: [ - 'success', - 'modifiedFiles', - 'createdFiles', - 'deletedFiles', - 'errors', - 'warnings', - 'changes', + "required": [ + "success", + "modifiedFiles", + "createdFiles", + "deletedFiles", + "errors", + "warnings", + "changes" ], - additionalProperties: false, - }, + "additionalProperties": false + } }, - additionalProperties: false, - 'x-group': 'Core API', - 'x-examples': ['markmv move-files --batch file1.md:new1.md file2.md:new2.md'], + "additionalProperties": false, + "x-group": "Core API", + "x-examples": [ + "markmv move-files --batch file1.md:new1.md file2.md:new2.md" + ] }, - validateOperation: { - title: 'validateOperation', - description: 'Validate the result of a previous operation for broken links', - type: 'object', - properties: { - input: { - type: 'object', - properties: { - result: { - type: 'object', - description: 'Operation result to validate', - properties: { - success: { - type: 'boolean', + "validateOperation": { + "title": "validateOperation", + "description": "Validate the result of a previous operation for broken links", + "type": "object", + "properties": { + "input": { + "type": "object", + "properties": { + "result": { + "type": "object", + "description": "Operation result to validate", + "properties": { + "success": { + "type": "boolean" }, - modifiedFiles: { - type: 'array', - items: { - type: 'string', - }, + "modifiedFiles": { + "type": "array", + "items": { + "type": "string" + } }, - createdFiles: { - type: 'array', - items: { - type: 'string', - }, + "createdFiles": { + "type": "array", + "items": { + "type": "string" + } }, - deletedFiles: { - type: 'array', - items: { - type: 'string', - }, + "deletedFiles": { + "type": "array", + "items": { + "type": "string" + } }, - errors: { - type: 'array', - items: { - type: 'string', - }, + "errors": { + "type": "array", + "items": { + "type": "string" + } }, - warnings: { - type: 'array', - items: { - type: 'string', - }, - }, - changes: { - type: 'array', - items: { - type: 'object', - }, + "warnings": { + "type": "array", + "items": { + "type": "string" + } }, + "changes": { + "type": "array", + "items": { + "type": "object" + } + } }, - required: [ - 'success', - 'modifiedFiles', - 'createdFiles', - 'deletedFiles', - 'errors', - 'warnings', - 'changes', + "required": [ + "success", + "modifiedFiles", + "createdFiles", + "deletedFiles", + "errors", + "warnings", + "changes" ], - additionalProperties: false, - }, + "additionalProperties": false + } }, - required: ['result'], - additionalProperties: false, + "required": [ + "result" + ], + "additionalProperties": false }, - output: { - type: 'object', - properties: { - valid: { - type: 'boolean', - }, - brokenLinks: { - type: 'number', + "output": { + "type": "object", + "properties": { + "valid": { + "type": "boolean" }, - errors: { - type: 'array', - items: { - type: 'string', - }, + "brokenLinks": { + "type": "number" }, + "errors": { + "type": "array", + "items": { + "type": "string" + } + } }, - required: ['valid', 'brokenLinks', 'errors'], - additionalProperties: false, - }, + "required": [ + "valid", + "brokenLinks", + "errors" + ], + "additionalProperties": false + } }, - additionalProperties: false, - 'x-group': 'Core API', - 'x-examples': [], + "additionalProperties": false, + "x-group": "Core API", + "x-examples": [] }, - testAutoExposure: { - title: 'testAutoExposure', - description: 'Test function to demonstrate auto-exposure pattern', - type: 'object', - properties: { - input: { - type: 'object', - properties: { - input: { - type: 'string', - description: 'The input message to echo', - }, + "testAutoExposure": { + "title": "testAutoExposure", + "description": "Test function to demonstrate auto-exposure pattern", + "type": "object", + "properties": { + "input": { + "type": "object", + "properties": { + "input": { + "type": "string", + "description": "The input message to echo" + } }, - required: ['input'], - additionalProperties: false, + "required": [ + "input" + ], + "additionalProperties": false }, - output: { - type: 'object', - properties: { - message: { - type: 'string', + "output": { + "type": "object", + "properties": { + "message": { + "type": "string" }, - timestamp: { - type: 'string', - }, - success: { - type: 'boolean', + "timestamp": { + "type": "string" }, + "success": { + "type": "boolean" + } }, - required: ['message', 'timestamp', 'success'], - additionalProperties: false, - }, + "required": [ + "message", + "timestamp", + "success" + ], + "additionalProperties": false + } }, - additionalProperties: false, - 'x-group': 'Testing', - 'x-examples': ['markmv test "Hello World"'], - }, - }, + "additionalProperties": false, + "x-group": "Testing", + "x-examples": [ + "markmv test \"Hello World\"" + ] + } + } }; // Compiled validators export const validators = { moveFile: { input: ajv.compile(schemas.definitions.moveFile.properties.input), - output: ajv.compile(schemas.definitions.moveFile.properties.output), + output: ajv.compile(schemas.definitions.moveFile.properties.output) }, moveFiles: { input: ajv.compile(schemas.definitions.moveFiles.properties.input), - output: ajv.compile(schemas.definitions.moveFiles.properties.output), + output: ajv.compile(schemas.definitions.moveFiles.properties.output) }, validateOperation: { input: ajv.compile(schemas.definitions.validateOperation.properties.input), - output: ajv.compile(schemas.definitions.validateOperation.properties.output), + output: ajv.compile(schemas.definitions.validateOperation.properties.output) }, testAutoExposure: { input: ajv.compile(schemas.definitions.testAutoExposure.properties.input), - output: ajv.compile(schemas.definitions.testAutoExposure.properties.output), - }, + output: ajv.compile(schemas.definitions.testAutoExposure.properties.output) + } }; -/** Validate input for a specific method */ -export function validateInput( - methodName: string, - data: unknown -): { valid: boolean; errors: string[] } { +/** + * Validate input for a specific method + */ +export function validateInput(methodName: string, data: unknown): { valid: boolean; errors: string[] } { const validator = validators[methodName as keyof typeof validators]?.input; if (!validator) { return { valid: false, errors: [`Unknown method: ${methodName}`] }; } - + const valid = validator(data); - return valid - ? { valid, errors: [] } - : { - valid, - errors: validator.errors?.map((err) => `${err.instancePath} ${err.message}`) ?? [ - 'Validation failed', - ], - }; + return valid ? { valid, errors: [] } : { + valid, + errors: validator.errors?.map(err => `${err.instancePath} ${err.message}`) ?? ['Validation failed'] + }; } -/** Validate output for a specific method */ -export function validateOutput( - methodName: string, - data: unknown -): { valid: boolean; errors: string[] } { +/** + * Validate output for a specific method + */ +export function validateOutput(methodName: string, data: unknown): { valid: boolean; errors: string[] } { const validator = validators[methodName as keyof typeof validators]?.output; if (!validator) { return { valid: false, errors: [`Unknown method: ${methodName}`] }; } - + const valid = validator(data); - return valid - ? { valid, errors: [] } - : { - valid, - errors: validator.errors?.map((err) => `${err.instancePath} ${err.message}`) ?? [ - 'Validation failed', - ], - }; + return valid ? { valid, errors: [] } : { + valid, + errors: validator.errors?.map(err => `${err.instancePath} ${err.message}`) ?? ['Validation failed'] + }; } -/** Get list of available methods */ +/** + * Get list of available methods + */ export function getAvailableMethods(): string[] { return Object.keys(validators); } diff --git a/src/generated/api-routes.ts b/src/generated/api-routes.ts index af31fbf..4bb1c6f 100644 --- a/src/generated/api-routes.ts +++ b/src/generated/api-routes.ts @@ -1,6 +1,6 @@ /** * Auto-generated REST API route definitions for markmv API methods - * + * * DO NOT EDIT MANUALLY - This file is auto-generated */ @@ -11,11 +11,7 @@ import type { FileOperations } from '../core/file-operations.js'; export interface ApiRoute { path: string; method: 'GET' | 'POST' | 'PUT' | 'DELETE'; - handler: ( - req: IncomingMessage, - res: ServerResponse, - markmvInstance: FileOperations - ) => Promise; + handler: (req: IncomingMessage, res: ServerResponse, markmvInstance: FileOperations) => Promise; description: string; inputSchema: object; outputSchema: object; @@ -27,568 +23,557 @@ export const autoGeneratedApiRoutes: ApiRoute[] = [ path: '/api/move-file', method: 'POST', handler: createmoveFileHandler, - description: 'Move a single markdown file and update all references', + description: "Move a single markdown file and update all references", inputSchema: { - type: 'object', - properties: { - sourcePath: { - type: 'string', - description: 'Source file path', - }, - destinationPath: { - type: 'string', - description: 'Destination file path', - }, - options: { - type: 'object', - properties: { - dryRun: { - type: 'boolean', - description: 'Show changes without executing', + "type": "object", + "properties": { + "sourcePath": { + "type": "string", + "description": "Source file path" }, - verbose: { - type: 'boolean', - description: 'Show detailed output', + "destinationPath": { + "type": "string", + "description": "Destination file path" }, - force: { - type: 'boolean', - description: 'Force operation even if conflicts exist', - }, - createDirectories: { - type: 'boolean', - description: 'Create missing directories', - }, - }, - additionalProperties: false, - }, + "options": { + "type": "object", + "properties": { + "dryRun": { + "type": "boolean", + "description": "Show changes without executing" + }, + "verbose": { + "type": "boolean", + "description": "Show detailed output" + }, + "force": { + "type": "boolean", + "description": "Force operation even if conflicts exist" + }, + "createDirectories": { + "type": "boolean", + "description": "Create missing directories" + } + }, + "additionalProperties": false + } }, - required: ['sourcePath', 'destinationPath'], - additionalProperties: false, - }, + "required": [ + "sourcePath", + "destinationPath" + ], + "additionalProperties": false +}, outputSchema: { - type: 'object', - properties: { - success: { - type: 'boolean', - }, - modifiedFiles: { - type: 'array', - items: { - type: 'string', - }, - }, - createdFiles: { - type: 'array', - items: { - type: 'string', - }, - }, - deletedFiles: { - type: 'array', - items: { - type: 'string', - }, - }, - errors: { - type: 'array', - items: { - type: 'string', - }, - }, - warnings: { - type: 'array', - items: { - type: 'string', - }, - }, - changes: { - type: 'array', - items: { - type: 'object', - }, - }, + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "modifiedFiles": { + "type": "array", + "items": { + "type": "string" + } + }, + "createdFiles": { + "type": "array", + "items": { + "type": "string" + } + }, + "deletedFiles": { + "type": "array", + "items": { + "type": "string" + } + }, + "errors": { + "type": "array", + "items": { + "type": "string" + } + }, + "warnings": { + "type": "array", + "items": { + "type": "string" + } + }, + "changes": { + "type": "array", + "items": { + "type": "object" + } + } }, - required: [ - 'success', - 'modifiedFiles', - 'createdFiles', - 'deletedFiles', - 'errors', - 'warnings', - 'changes', + "required": [ + "success", + "modifiedFiles", + "createdFiles", + "deletedFiles", + "errors", + "warnings", + "changes" ], - additionalProperties: false, - }, + "additionalProperties": false +} }, { path: '/api/move-files', method: 'POST', handler: createmoveFilesHandler, - description: 'Move multiple markdown files and update all references', + description: "Move multiple markdown files and update all references", inputSchema: { - type: 'object', - properties: { - moves: { - type: 'array', - description: 'Array of source/destination pairs', - items: { - type: 'object', - properties: { - source: { - type: 'string', - }, - destination: { - type: 'string', - }, + "type": "object", + "properties": { + "moves": { + "type": "array", + "description": "Array of source/destination pairs", + "items": { + "type": "object", + "properties": { + "source": { + "type": "string" + }, + "destination": { + "type": "string" + } + }, + "required": [ + "source", + "destination" + ], + "additionalProperties": false + } + }, + "options": { + "type": "object", + "properties": { + "dryRun": { + "type": "boolean", + "description": "Show changes without executing" + }, + "verbose": { + "type": "boolean", + "description": "Show detailed output" + }, + "force": { + "type": "boolean", + "description": "Force operation even if conflicts exist" + }, + "createDirectories": { + "type": "boolean", + "description": "Create missing directories" + } + }, + "additionalProperties": false + } + }, + "required": [ + "moves" + ], + "additionalProperties": false +}, + outputSchema: { + "type": "object", + "properties": { + "success": { + "type": "boolean" }, - required: ['source', 'destination'], - additionalProperties: false, - }, - }, - options: { - type: 'object', - properties: { - dryRun: { - type: 'boolean', - description: 'Show changes without executing', + "modifiedFiles": { + "type": "array", + "items": { + "type": "string" + } }, - verbose: { - type: 'boolean', - description: 'Show detailed output', + "createdFiles": { + "type": "array", + "items": { + "type": "string" + } }, - force: { - type: 'boolean', - description: 'Force operation even if conflicts exist', + "deletedFiles": { + "type": "array", + "items": { + "type": "string" + } }, - createDirectories: { - type: 'boolean', - description: 'Create missing directories', + "errors": { + "type": "array", + "items": { + "type": "string" + } }, - }, - additionalProperties: false, - }, - }, - required: ['moves'], - additionalProperties: false, - }, - outputSchema: { - type: 'object', - properties: { - success: { - type: 'boolean', - }, - modifiedFiles: { - type: 'array', - items: { - type: 'string', - }, - }, - createdFiles: { - type: 'array', - items: { - type: 'string', - }, - }, - deletedFiles: { - type: 'array', - items: { - type: 'string', - }, - }, - errors: { - type: 'array', - items: { - type: 'string', - }, - }, - warnings: { - type: 'array', - items: { - type: 'string', - }, - }, - changes: { - type: 'array', - items: { - type: 'object', - }, - }, + "warnings": { + "type": "array", + "items": { + "type": "string" + } + }, + "changes": { + "type": "array", + "items": { + "type": "object" + } + } }, - required: [ - 'success', - 'modifiedFiles', - 'createdFiles', - 'deletedFiles', - 'errors', - 'warnings', - 'changes', + "required": [ + "success", + "modifiedFiles", + "createdFiles", + "deletedFiles", + "errors", + "warnings", + "changes" ], - additionalProperties: false, - }, + "additionalProperties": false +} }, { path: '/api/validate-operation', method: 'POST', handler: createvalidateOperationHandler, - description: 'Validate the result of a previous operation for broken links', + description: "Validate the result of a previous operation for broken links", inputSchema: { - type: 'object', - properties: { - result: { - type: 'object', - description: 'Operation result to validate', - properties: { - success: { - type: 'boolean', - }, - modifiedFiles: { - type: 'array', - items: { - type: 'string', - }, - }, - createdFiles: { - type: 'array', - items: { - type: 'string', - }, - }, - deletedFiles: { - type: 'array', - items: { - type: 'string', - }, - }, - errors: { - type: 'array', - items: { - type: 'string', - }, - }, - warnings: { - type: 'array', - items: { - type: 'string', - }, - }, - changes: { - type: 'array', - items: { - type: 'object', - }, - }, - }, - required: [ - 'success', - 'modifiedFiles', - 'createdFiles', - 'deletedFiles', - 'errors', - 'warnings', - 'changes', - ], - additionalProperties: false, - }, + "type": "object", + "properties": { + "result": { + "type": "object", + "description": "Operation result to validate", + "properties": { + "success": { + "type": "boolean" + }, + "modifiedFiles": { + "type": "array", + "items": { + "type": "string" + } + }, + "createdFiles": { + "type": "array", + "items": { + "type": "string" + } + }, + "deletedFiles": { + "type": "array", + "items": { + "type": "string" + } + }, + "errors": { + "type": "array", + "items": { + "type": "string" + } + }, + "warnings": { + "type": "array", + "items": { + "type": "string" + } + }, + "changes": { + "type": "array", + "items": { + "type": "object" + } + } + }, + "required": [ + "success", + "modifiedFiles", + "createdFiles", + "deletedFiles", + "errors", + "warnings", + "changes" + ], + "additionalProperties": false + } }, - required: ['result'], - additionalProperties: false, - }, + "required": [ + "result" + ], + "additionalProperties": false +}, outputSchema: { - type: 'object', - properties: { - valid: { - type: 'boolean', - }, - brokenLinks: { - type: 'number', - }, - errors: { - type: 'array', - items: { - type: 'string', - }, - }, + "type": "object", + "properties": { + "valid": { + "type": "boolean" + }, + "brokenLinks": { + "type": "number" + }, + "errors": { + "type": "array", + "items": { + "type": "string" + } + } }, - required: ['valid', 'brokenLinks', 'errors'], - additionalProperties: false, - }, + "required": [ + "valid", + "brokenLinks", + "errors" + ], + "additionalProperties": false +} }, { path: '/api/test-auto-exposure', method: 'POST', handler: createtestAutoExposureHandler, - description: 'Test function to demonstrate auto-exposure pattern', + description: "Test function to demonstrate auto-exposure pattern", inputSchema: { - type: 'object', - properties: { - input: { - type: 'string', - description: 'The input message to echo', - }, + "type": "object", + "properties": { + "input": { + "type": "string", + "description": "The input message to echo" + } }, - required: ['input'], - additionalProperties: false, - }, + "required": [ + "input" + ], + "additionalProperties": false +}, outputSchema: { - type: 'object', - properties: { - message: { - type: 'string', - }, - timestamp: { - type: 'string', - }, - success: { - type: 'boolean', - }, + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "timestamp": { + "type": "string" + }, + "success": { + "type": "boolean" + } }, - required: ['message', 'timestamp', 'success'], - additionalProperties: false, - }, - }, + "required": [ + "message", + "timestamp", + "success" + ], + "additionalProperties": false +} + } ]; // These handler functions will be created dynamically by the API server // They are placeholders for the auto-generated route definitions export async function createmoveFileHandler( - req: IncomingMessage, + req: IncomingMessage, res: ServerResponse, markmvInstance: FileOperations ): Promise { try { // Parse request body const body = await parseRequestBody(req); - + // Validate input const inputValidation = validateInput('moveFile', body); if (!inputValidation.valid) { res.writeHead(400, { 'Content-Type': 'application/json' }); - res.end( - JSON.stringify({ - error: 'Validation failed', - details: inputValidation.errors, - }) - ); + res.end(JSON.stringify({ + error: 'Validation failed', + details: inputValidation.errors + })); return; } - + // Route to appropriate method based on methodName let result: unknown; if (typeof body !== 'object' || body === null || Array.isArray(body)) { throw new Error('Invalid request body'); } - + const bodyObj = body as Record; + const sourcePath = bodyObj.sourcePath; - const destinationPath = bodyObj.destinationPath; + const destinationPath = bodyObj.destinationPath; const options = bodyObj.options || {}; - - if ( - typeof sourcePath === 'string' && - typeof destinationPath === 'string' && - typeof options === 'object' && - options !== null && - !Array.isArray(options) - ) { - result = await markmvInstance.moveFile( - sourcePath, - destinationPath, - options as Record - ); + + if (typeof sourcePath === 'string' && typeof destinationPath === 'string' && + (typeof options === 'object' && options !== null && !Array.isArray(options))) { + result = await markmvInstance.moveFile(sourcePath, destinationPath, options as Record); } else { throw new Error('Invalid parameters for moveFile'); } - + // Validate output const outputValidation = validateOutput('moveFile', result); if (!outputValidation.valid) { console.warn('Output validation failed for moveFile:', outputValidation.errors); } - + res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify(result)); } catch (error) { res.writeHead(500, { 'Content-Type': 'application/json' }); - res.end( - JSON.stringify({ - error: 'Internal server error', - message: error instanceof Error ? error.message : String(error), - }) - ); + res.end(JSON.stringify({ + error: 'Internal server error', + message: error instanceof Error ? error.message : String(error) + })); } } export async function createmoveFilesHandler( - req: IncomingMessage, + req: IncomingMessage, res: ServerResponse, markmvInstance: FileOperations ): Promise { try { // Parse request body const body = await parseRequestBody(req); - + // Validate input const inputValidation = validateInput('moveFiles', body); if (!inputValidation.valid) { res.writeHead(400, { 'Content-Type': 'application/json' }); - res.end( - JSON.stringify({ - error: 'Validation failed', - details: inputValidation.errors, - }) - ); + res.end(JSON.stringify({ + error: 'Validation failed', + details: inputValidation.errors + })); return; } - + // Route to appropriate method based on methodName let result: unknown; if (typeof body !== 'object' || body === null || Array.isArray(body)) { throw new Error('Invalid request body'); } - + const bodyObj = body as Record; + const moves = bodyObj.moves; const options = bodyObj.options || {}; - - if ( - Array.isArray(moves) && - typeof options === 'object' && - options !== null && - !Array.isArray(options) - ) { + + if (Array.isArray(moves) && + (typeof options === 'object' && options !== null && !Array.isArray(options))) { result = await markmvInstance.moveFiles(moves, options as Record); } else { throw new Error('Invalid parameters for moveFiles'); } - + // Validate output const outputValidation = validateOutput('moveFiles', result); if (!outputValidation.valid) { console.warn('Output validation failed for moveFiles:', outputValidation.errors); } - + res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify(result)); } catch (error) { res.writeHead(500, { 'Content-Type': 'application/json' }); - res.end( - JSON.stringify({ - error: 'Internal server error', - message: error instanceof Error ? error.message : String(error), - }) - ); + res.end(JSON.stringify({ + error: 'Internal server error', + message: error instanceof Error ? error.message : String(error) + })); } } export async function createvalidateOperationHandler( - req: IncomingMessage, + req: IncomingMessage, res: ServerResponse, markmvInstance: FileOperations ): Promise { try { // Parse request body const body = await parseRequestBody(req); - + // Validate input const inputValidation = validateInput('validateOperation', body); if (!inputValidation.valid) { res.writeHead(400, { 'Content-Type': 'application/json' }); - res.end( - JSON.stringify({ - error: 'Validation failed', - details: inputValidation.errors, - }) - ); + res.end(JSON.stringify({ + error: 'Validation failed', + details: inputValidation.errors + })); return; } - + // Route to appropriate method based on methodName let result: unknown; if (typeof body !== 'object' || body === null || Array.isArray(body)) { throw new Error('Invalid request body'); } - + const bodyObj = body as Record; + const operationResult = bodyObj.result; - - if ( - typeof operationResult === 'object' && - operationResult !== null && - !Array.isArray(operationResult) - ) { + + if (typeof operationResult === 'object' && operationResult !== null && !Array.isArray(operationResult)) { // Type guard to ensure operationResult has required OperationResult properties const opResult = operationResult as Record; - if ( - typeof opResult.success === 'boolean' && - Array.isArray(opResult.modifiedFiles) && - Array.isArray(opResult.createdFiles) && - Array.isArray(opResult.deletedFiles) && - Array.isArray(opResult.errors) && - Array.isArray(opResult.warnings) && - Array.isArray(opResult.changes) - ) { - result = await markmvInstance.validateOperation( - opResult as unknown as import('../types/operations.js').OperationResult - ); + if (typeof opResult.success === 'boolean' && + Array.isArray(opResult.modifiedFiles) && + Array.isArray(opResult.createdFiles) && + Array.isArray(opResult.deletedFiles) && + Array.isArray(opResult.errors) && + Array.isArray(opResult.warnings) && + Array.isArray(opResult.changes)) { + result = await markmvInstance.validateOperation(opResult as unknown as import('../types/operations.js').OperationResult); } else { throw new Error('Invalid OperationResult structure'); } } else { throw new Error('Invalid parameters for validateOperation'); } - + // Validate output const outputValidation = validateOutput('validateOperation', result); if (!outputValidation.valid) { console.warn('Output validation failed for validateOperation:', outputValidation.errors); } - + res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify(result)); } catch (error) { res.writeHead(500, { 'Content-Type': 'application/json' }); - res.end( - JSON.stringify({ - error: 'Internal server error', - message: error instanceof Error ? error.message : String(error), - }) - ); + res.end(JSON.stringify({ + error: 'Internal server error', + message: error instanceof Error ? error.message : String(error) + })); } } export async function createtestAutoExposureHandler( - req: IncomingMessage, + req: IncomingMessage, res: ServerResponse, _markmvInstance: FileOperations ): Promise { try { // Parse request body const body = await parseRequestBody(req); - + // Validate input const inputValidation = validateInput('testAutoExposure', body); if (!inputValidation.valid) { res.writeHead(400, { 'Content-Type': 'application/json' }); - res.end( - JSON.stringify({ - error: 'Validation failed', - details: inputValidation.errors, - }) - ); + res.end(JSON.stringify({ + error: 'Validation failed', + details: inputValidation.errors + })); return; } - + // Route to appropriate method based on methodName let result: unknown; if (typeof body !== 'object' || body === null || Array.isArray(body)) { throw new Error('Invalid request body'); } - + const bodyObj = body as Record; + const input = bodyObj.input; - + if (typeof input === 'string') { // Import and call the standalone function const { testAutoExposure } = await import('../index.js'); @@ -596,32 +581,32 @@ export async function createtestAutoExposureHandler( } else { throw new Error('Invalid parameters for testAutoExposure'); } - + // Validate output const outputValidation = validateOutput('testAutoExposure', result); if (!outputValidation.valid) { console.warn('Output validation failed for testAutoExposure:', outputValidation.errors); } - + res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify(result)); } catch (error) { res.writeHead(500, { 'Content-Type': 'application/json' }); - res.end( - JSON.stringify({ - error: 'Internal server error', - message: error instanceof Error ? error.message : String(error), - }) - ); + res.end(JSON.stringify({ + error: 'Internal server error', + message: error instanceof Error ? error.message : String(error) + })); } } -/** Helper functions */ +/** + * Helper functions + */ async function parseRequestBody(req: IncomingMessage): Promise { return new Promise((resolve, reject) => { let body = ''; - req.on('data', (chunk) => { + req.on('data', chunk => { body += chunk.toString(); }); req.on('end', () => { @@ -635,12 +620,16 @@ async function parseRequestBody(req: IncomingMessage): Promise { }); } -/** Get API route by path */ +/** + * Get API route by path + */ export function getApiRouteByPath(path: string): ApiRoute | undefined { - return autoGeneratedApiRoutes.find((route) => route.path === path); + return autoGeneratedApiRoutes.find(route => route.path === path); } -/** Get all API route paths */ +/** + * Get all API route paths + */ export function getApiRoutePaths(): string[] { - return autoGeneratedApiRoutes.map((route) => route.path); + return autoGeneratedApiRoutes.map(route => route.path); } diff --git a/src/generated/mcp-tools.ts b/src/generated/mcp-tools.ts index fb176e5..764ff58 100644 --- a/src/generated/mcp-tools.ts +++ b/src/generated/mcp-tools.ts @@ -1,6 +1,6 @@ /** * Auto-generated MCP tool definitions for markmv API methods - * + * * DO NOT EDIT MANUALLY - This file is auto-generated */ @@ -10,184 +10,201 @@ import type { Tool } from '@modelcontextprotocol/sdk/types.js'; export const autoGeneratedMcpTools: Tool[] = [ { name: 'move_file', - description: 'Move a single markdown file and update all references', + description: "Move a single markdown file and update all references", inputSchema: { - type: 'object', - properties: { - sourcePath: { - type: 'string', - description: 'Source file path', - }, - destinationPath: { - type: 'string', - description: 'Destination file path', - }, - options: { - type: 'object', - properties: { - dryRun: { - type: 'boolean', - description: 'Show changes without executing', - }, - verbose: { - type: 'boolean', - description: 'Show detailed output', - }, - force: { - type: 'boolean', - description: 'Force operation even if conflicts exist', - }, - createDirectories: { - type: 'boolean', - description: 'Create missing directories', - }, - }, - additionalProperties: false, - }, + "type": "object", + "properties": { + "sourcePath": { + "type": "string", + "description": "Source file path" + }, + "destinationPath": { + "type": "string", + "description": "Destination file path" + }, + "options": { + "type": "object", + "properties": { + "dryRun": { + "type": "boolean", + "description": "Show changes without executing" + }, + "verbose": { + "type": "boolean", + "description": "Show detailed output" + }, + "force": { + "type": "boolean", + "description": "Force operation even if conflicts exist" + }, + "createDirectories": { + "type": "boolean", + "description": "Create missing directories" + } + }, + "additionalProperties": false + } }, - required: ['sourcePath', 'destinationPath'], - additionalProperties: false, - }, + "required": [ + "sourcePath", + "destinationPath" + ], + "additionalProperties": false +} }, { name: 'move_files', - description: 'Move multiple markdown files and update all references', + description: "Move multiple markdown files and update all references", inputSchema: { - type: 'object', - properties: { - moves: { - type: 'array', - description: 'Array of source/destination pairs', - items: { - type: 'object', - properties: { - source: { - type: 'string', - }, - destination: { - type: 'string', - }, - }, - required: ['source', 'destination'], - additionalProperties: false, - }, - }, - options: { - type: 'object', - properties: { - dryRun: { - type: 'boolean', - description: 'Show changes without executing', - }, - verbose: { - type: 'boolean', - description: 'Show detailed output', - }, - force: { - type: 'boolean', - description: 'Force operation even if conflicts exist', - }, - createDirectories: { - type: 'boolean', - description: 'Create missing directories', - }, - }, - additionalProperties: false, - }, + "type": "object", + "properties": { + "moves": { + "type": "array", + "description": "Array of source/destination pairs", + "items": { + "type": "object", + "properties": { + "source": { + "type": "string" + }, + "destination": { + "type": "string" + } + }, + "required": [ + "source", + "destination" + ], + "additionalProperties": false + } + }, + "options": { + "type": "object", + "properties": { + "dryRun": { + "type": "boolean", + "description": "Show changes without executing" + }, + "verbose": { + "type": "boolean", + "description": "Show detailed output" + }, + "force": { + "type": "boolean", + "description": "Force operation even if conflicts exist" + }, + "createDirectories": { + "type": "boolean", + "description": "Create missing directories" + } + }, + "additionalProperties": false + } }, - required: ['moves'], - additionalProperties: false, - }, + "required": [ + "moves" + ], + "additionalProperties": false +} }, { name: 'validate_operation', - description: 'Validate the result of a previous operation for broken links', + description: "Validate the result of a previous operation for broken links", inputSchema: { - type: 'object', - properties: { - result: { - type: 'object', - description: 'Operation result to validate', - properties: { - success: { - type: 'boolean', - }, - modifiedFiles: { - type: 'array', - items: { - type: 'string', - }, - }, - createdFiles: { - type: 'array', - items: { - type: 'string', - }, - }, - deletedFiles: { - type: 'array', - items: { - type: 'string', - }, - }, - errors: { - type: 'array', - items: { - type: 'string', - }, - }, - warnings: { - type: 'array', - items: { - type: 'string', - }, - }, - changes: { - type: 'array', - items: { - type: 'object', - }, - }, - }, - required: [ - 'success', - 'modifiedFiles', - 'createdFiles', - 'deletedFiles', - 'errors', - 'warnings', - 'changes', - ], - additionalProperties: false, - }, + "type": "object", + "properties": { + "result": { + "type": "object", + "description": "Operation result to validate", + "properties": { + "success": { + "type": "boolean" + }, + "modifiedFiles": { + "type": "array", + "items": { + "type": "string" + } + }, + "createdFiles": { + "type": "array", + "items": { + "type": "string" + } + }, + "deletedFiles": { + "type": "array", + "items": { + "type": "string" + } + }, + "errors": { + "type": "array", + "items": { + "type": "string" + } + }, + "warnings": { + "type": "array", + "items": { + "type": "string" + } + }, + "changes": { + "type": "array", + "items": { + "type": "object" + } + } + }, + "required": [ + "success", + "modifiedFiles", + "createdFiles", + "deletedFiles", + "errors", + "warnings", + "changes" + ], + "additionalProperties": false + } }, - required: ['result'], - additionalProperties: false, - }, + "required": [ + "result" + ], + "additionalProperties": false +} }, { name: 'test_auto_exposure', - description: 'Test function to demonstrate auto-exposure pattern', + description: "Test function to demonstrate auto-exposure pattern", inputSchema: { - type: 'object', - properties: { - input: { - type: 'string', - description: 'The input message to echo', - }, + "type": "object", + "properties": { + "input": { + "type": "string", + "description": "The input message to echo" + } }, - required: ['input'], - additionalProperties: false, - }, - }, + "required": [ + "input" + ], + "additionalProperties": false +} + } ]; -/** Get MCP tool by name */ +/** + * Get MCP tool by name + */ export function getMcpToolByName(name: string): Tool | undefined { - return autoGeneratedMcpTools.find((tool) => tool.name === name); + return autoGeneratedMcpTools.find(tool => tool.name === name); } -/** Get all MCP tool names */ +/** + * Get all MCP tool names + */ export function getMcpToolNames(): string[] { - return autoGeneratedMcpTools.map((tool) => tool.name); + return autoGeneratedMcpTools.map(tool => tool.name); } +