From 57d6c4bd94bcb3564894edbe05a7fd637cbcd98c Mon Sep 17 00:00:00 2001 From: RYGRIT Date: Sat, 28 Feb 2026 17:31:59 +0800 Subject: [PATCH 1/7] fix: single-pass markdown rendering for correct TOC and heading IDs (#1323) --- server/utils/readme.ts | 115 +++++++++++----- test/unit/server/utils/readme.spec.ts | 181 ++++++++++++++++++++++++++ 2 files changed, 266 insertions(+), 30 deletions(-) diff --git a/server/utils/readme.ts b/server/utils/readme.ts index e3d337ece..53b7126e0 100644 --- a/server/utils/readme.ts +++ b/server/utils/readme.ts @@ -251,8 +251,12 @@ function resolveUrl(url: string, packageName: string, repoInfo?: RepositoryInfo) if (!url) return url if (url.startsWith('#')) { // Prefix anchor links to match heading IDs (avoids collision with page IDs) + // Idempotent: don't double-prefix if already prefixed + if (url.startsWith('#user-content-')) return url return `#user-content-${url.slice(1)}` } + // Absolute paths (e.g. /package/foo from a previous npmjs redirect) are already resolved + if (url.startsWith('/')) return url if (hasProtocol(url, { acceptRelative: true })) { try { const parsed = new URL(url, 'https://example.com') @@ -381,37 +385,52 @@ export async function renderReadmeHtml( // So README starts at h3, and we ensure no levels are skipped // Visual styling preserved via data-level attribute (original depth) let lastSemanticLevel = 2 // Start after h2 (the "Readme" section heading) - renderer.heading = function ({ tokens, depth }: Tokens.Heading) { - // Calculate the target semantic level based on document structure - // Start at h3 (since page h1 + section h2 already exist) - // But ensure we never skip levels - can only go down by 1 or stay same/go up + + // Shared heading processing for both markdown and HTML headings + function processHeading(depth: number, plainText: string) { const semanticLevel = calculateSemanticDepth(depth, lastSemanticLevel) lastSemanticLevel = semanticLevel - const text = this.parser.parseInline(tokens) - // Generate GitHub-style slug for anchor links - let slug = slugify(text) - if (!slug) slug = 'heading' // Fallback for empty headings + let slug = slugify(plainText) + if (!slug) slug = 'heading' - // Handle duplicate slugs (GitHub-style: foo, foo-1, foo-2) const count = usedSlugs.get(slug) ?? 0 usedSlugs.set(slug, count + 1) const uniqueSlug = count === 0 ? slug : `${slug}-${count}` - - // Prefix with 'user-content-' to avoid collisions with page IDs - // (e.g., #install, #dependencies, #versions are used by the package page) const id = `user-content-${uniqueSlug}` - // Collect TOC item with plain text (HTML stripped, entities decoded) - const plainText = decodeHtmlEntities(stripHtmlTags(text).trim()) if (plainText) { toc.push({ text: plainText, id, depth }) } - /** The link href uses the unique slug WITHOUT the 'user-content-' prefix, because that will later be added for all links. */ return `${plainText}\n` } + renderer.heading = function ({ tokens, depth }: Tokens.Heading) { + const text = this.parser.parseInline(tokens) + const plainText = decodeHtmlEntities(stripHtmlTags(text).trim()) + return processHeading(depth, plainText) + } + + // Intercept HTML headings so they get id, TOC entry, and correct semantic level. + // Also intercept raw HTML tags so playground links are collected in the same pass. + const htmlHeadingRe = /]*)?>([\s\S]*?)<\/h\1>/gi + const htmlAnchorRe = /]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/gi + renderer.html = function ({ text }: Tokens.HTML) { + let result = text.replace(htmlHeadingRe, (_, level, _attrs, inner) => { + const depth = parseInt(level) + const plainText = decodeHtmlEntities(stripHtmlTags(inner).trim()) + return processHeading(depth, plainText).trimEnd() + }) + // Process raw HTML tags for playground link collection and URL resolution + result = result.replace(htmlAnchorRe, (_full, href, inner) => { + const label = decodeHtmlEntities(stripHtmlTags(inner).trim()) + const { resolvedHref, extraAttrs } = processLink(href, label) + return `${inner}` + }) + return result + } + // Syntax highlighting for code blocks (uses shared highlighter) renderer.code = ({ text, lang }: Tokens.Code) => { const html = highlightCodeSync(shiki, text, lang || 'text') @@ -433,7 +452,35 @@ ${html} return `` } + // Helper: resolve a link href, collect playground links, and build attributes. + // Used by both the markdown renderer.link and the HTML interceptor so that + // all link processing happens in a single pass during marked rendering. + function processLink(href: string, label: string): { resolvedHref: string; extraAttrs: string } { + const resolvedHref = resolveUrl(href, packageName, repoInfo) + + // Collect playground links + const provider = matchPlaygroundProvider(resolvedHref) + if (provider && !seenUrls.has(resolvedHref)) { + seenUrls.add(resolvedHref) + collectedLinks.push({ + url: resolvedHref, + provider: provider.id, + providerName: provider.name, + label: decodeHtmlEntities(label || provider.name), + }) + } + + // Security attributes for external links + let extraAttrs = '' + if (resolvedHref && hasProtocol(resolvedHref, { acceptRelative: true })) { + extraAttrs = ' rel="nofollow noreferrer noopener" target="_blank"' + } + + return { resolvedHref, extraAttrs } + } + // Resolve link URLs, add security attributes, and collect playground links + // — all in a single pass during marked rendering (no deferred processing) renderer.link = function ({ href, title, tokens }: Tokens.Link) { const text = this.parser.parseInline(tokens) const titleAttr = title ? ` title="${title}"` : '' @@ -444,10 +491,9 @@ ${html} plainText = tokens[0].text } - const intermediateTitleAttr = - plainText || title ? ` data-title-intermediate="${plainText || title}"` : '' + const { resolvedHref, extraAttrs } = processLink(href, plainText || title || '') - return `${text}` + return `${text}` } // GitHub-style callouts: > [!NOTE], > [!TIP], etc. @@ -475,26 +521,32 @@ ${html} allowedSchemes: ['http', 'https', 'mailto'], // Transform img src URLs (GitHub blob → raw, relative → GitHub raw) transformTags: { + // Headings are already processed to correct semantic levels by processHeading() + // during the marked rendering pass. The sanitizer just needs to preserve them. + // For any stray headings that didn't go through processHeading (shouldn't happen), + // we still apply a safe fallback shift. h1: (_, attribs) => { + if (attribs['data-level']) return { tagName: 'h1', attribs } return { tagName: 'h3', attribs: { ...attribs, 'data-level': '1' } } }, h2: (_, attribs) => { + if (attribs['data-level']) return { tagName: 'h2', attribs } return { tagName: 'h4', attribs: { ...attribs, 'data-level': '2' } } }, h3: (_, attribs) => { - if (attribs['data-level']) return { tagName: 'h3', attribs: attribs } + if (attribs['data-level']) return { tagName: 'h3', attribs } return { tagName: 'h5', attribs: { ...attribs, 'data-level': '3' } } }, h4: (_, attribs) => { - if (attribs['data-level']) return { tagName: 'h4', attribs: attribs } + if (attribs['data-level']) return { tagName: 'h4', attribs } return { tagName: 'h6', attribs: { ...attribs, 'data-level': '4' } } }, h5: (_, attribs) => { - if (attribs['data-level']) return { tagName: 'h5', attribs: attribs } + if (attribs['data-level']) return { tagName: 'h5', attribs } return { tagName: 'h6', attribs: { ...attribs, 'data-level': '5' } } }, h6: (_, attribs) => { - if (attribs['data-level']) return { tagName: 'h6', attribs: attribs } + if (attribs['data-level']) return { tagName: 'h6', attribs } return { tagName: 'h6', attribs: { ...attribs, 'data-level': '6' } } }, img: (tagName, attribs) => { @@ -522,6 +574,11 @@ ${html} } return { tagName, attribs } }, + // Markdown links are fully processed in renderer.link (single-pass). + // However, inline HTML tags inside paragraphs are NOT seen by + // renderer.html (marked parses them as paragraph tokens, not html tokens). + // So we still need to collect playground links here for those cases. + // The seenUrls set ensures no duplicates across both paths. a: (tagName, attribs) => { if (!attribs.href) { return { tagName, attribs } @@ -529,24 +586,22 @@ ${html} const resolvedHref = resolveUrl(attribs.href, packageName, repoInfo) + // Collect playground links from inline HTML tags that weren't + // caught by renderer.link or renderer.html const provider = matchPlaygroundProvider(resolvedHref) if (provider && !seenUrls.has(resolvedHref)) { seenUrls.add(resolvedHref) - collectedLinks.push({ url: resolvedHref, provider: provider.id, providerName: provider.name, - /** - * We need to set some data attribute before hand because `transformTags` doesn't - * provide the text of the element. This will automatically be removed, because there - * is an allow list for link attributes. - * */ - label: decodeHtmlEntities(attribs['data-title-intermediate'] || provider.name), + // sanitize-html transformTags doesn't provide element text content, + // so we fall back to the provider name for the label + label: provider.name, }) } - // Add security attributes for external links + // Add security attributes for external links (idempotent) if (resolvedHref && hasProtocol(resolvedHref, { acceptRelative: true })) { attribs.rel = 'nofollow noreferrer noopener' attribs.target = '_blank' diff --git a/test/unit/server/utils/readme.spec.ts b/test/unit/server/utils/readme.spec.ts index d06be1806..50a5cc3c9 100644 --- a/test/unit/server/utils/readme.spec.ts +++ b/test/unit/server/utils/readme.spec.ts @@ -475,4 +475,185 @@ describe('HTML output', () => {

Some bold text and a link.

`) }) + + it('adds id to raw HTML headings', async () => { + const result = await renderReadmeHtml('

Title

', 'test-pkg') + expect(result.html).toContain('id="user-content-title"') + expect(result.toc).toHaveLength(1) + expect(result.toc[0]).toMatchObject({ text: 'Title', depth: 1 }) + }) + + it('adds id to HTML heading in multi-element token', async () => { + const md = '

My Package

\n

A description

' + const result = await renderReadmeHtml(md, 'test-pkg') + expect(result.html).toContain('id="user-content-my-package"') + expect(result.toc[0]).toMatchObject({ text: 'My Package', depth: 1 }) + }) + + it('handles duplicate raw HTML heading slugs', async () => { + const md = '

API

\n\n

API

' + const result = await renderReadmeHtml(md, 'test-pkg') + expect(result.html).toContain('id="user-content-api"') + expect(result.html).toContain('id="user-content-api-1"') + }) +}) + +/** + * Tests for issue #1323: single-pass markdown rendering behavior. + * + * The core concern is that mixing markdown headings and raw HTML headings + * must produce TOC entries, heading IDs, and duplicate-slug suffixes in + * exact document order — the same as GitHub does. + * + * If the implementation processes markdown headings in one pass and HTML + * headings in a separate (later) pass, the ordering will be wrong. + */ +describe('Issue #1323 — single-pass rendering correctness', () => { + describe('mixed markdown + HTML headings: TOC order and IDs', () => { + it('produces TOC entries in document order when markdown and HTML headings are interleaved', async () => { + // This is the core scenario from the issue: HTML headings appear + // between markdown headings. A two-pass approach would collect all + // markdown headings first, then HTML headings — scrambling the order. + const md = [ + '# First (markdown)', + '', + '

Second (html)

', + '', + '## Third (markdown)', + '', + '

Fourth (html)

', + '', + '## Fifth (markdown)', + ].join('\n') + + const result = await renderReadmeHtml(md, 'test-pkg') + + // TOC must reflect exact document order + expect(result.toc).toHaveLength(5) + expect(result.toc[0]!.text).toBe('First (markdown)') + expect(result.toc[1]!.text).toBe('Second (html)') + expect(result.toc[2]!.text).toBe('Third (markdown)') + expect(result.toc[3]!.text).toBe('Fourth (html)') + expect(result.toc[4]!.text).toBe('Fifth (markdown)') + }) + + it('assigns duplicate-slug suffixes in document order across mixed heading types', async () => { + // Two markdown "API" headings with an HTML "API" heading in between. + // Correct: api, api-1, api-2 in that order. + // If HTML headings are processed in a separate pass, the HTML one + // could get suffix -2 while the last markdown one gets -1. + const md = ['## API', '', '

API

', '', '## API'].join('\n') + + const result = await renderReadmeHtml(md, 'test-pkg') + + expect(result.toc).toHaveLength(3) + expect(result.toc[0]!.id).toBe('user-content-api') + expect(result.toc[1]!.id).toBe('user-content-api-1') + expect(result.toc[2]!.id).toBe('user-content-api-2') + + // The HTML output must also have these IDs in order + const ids = Array.from(result.html.matchAll(/id="(user-content-api(?:-\d+)?)"/g), m => m[1]) + expect(ids).toEqual(['user-content-api', 'user-content-api-1', 'user-content-api-2']) + }) + + it('heading semantic levels are sequential even when mixing heading types', async () => { + // h1 (md) → h3, h3 (html) → should be h4 (max = lastSemantic + 1), + // not jump to h5 or h6 because it was processed in a later pass. + const md = ['# Title', '', '

Subsection

', '', '#### Deep'].join('\n') + + const result = await renderReadmeHtml(md, 'test-pkg') + + // Extract semantic tags in order from the HTML + const tags = Array.from(result.html.matchAll(/ Number(m[1])) + // h1→h3, h3→h4 (sequential after h3), h4→h5 (sequential after h4) + expect(tags).toEqual([3, 4, 5]) + }) + }) + + describe('HTML heading between markdown headings — ID and href consistency', () => { + it('heading id and its anchor href point to the same slug', async () => { + const md = ['# Introduction', '', '

Getting Started

', '', '## Installation'].join( + '\n', + ) + + const result = await renderReadmeHtml(md, 'test-pkg') + + // For every heading, the slug used in id="user-content-{slug}" must + // match the slug in the child anchor href="#user-content-{slug}" + // (resolveUrl prefixes # anchors with user-content-). + const headingPairs = [ + ...result.html.matchAll(/id="user-content-([^"]+)"[^>]*>/g), + ] + expect(headingPairs.length).toBeGreaterThan(0) + for (const match of headingPairs) { + // slug portion must be identical + expect(match[1]).toBe(match[2]) + } + }) + }) + + describe('playground links collected from HTML tags in single pass', () => { + it('collects playground links from raw HTML anchor tags', async () => { + // Some READMEs (like eslint's sponsor section) use raw HTML tags + // instead of markdown link syntax. These must also be picked up. + const md = [ + '# My Package', + '', + 'Open in StackBlitz', + '', + 'Some text with a [CodeSandbox link](https://codesandbox.io/s/example)', + ].join('\n') + + const result = await renderReadmeHtml(md, 'test-pkg') + + // Both playground links should be collected regardless of syntax + const providers = result.playgroundLinks.map(l => l.provider) + expect(providers).toContain('stackblitz') + expect(providers).toContain('codesandbox') + }) + }) + + describe('complex real-world interleaving (atproxy-like)', () => { + it('handles a README with HTML h1 followed by markdown h2 and mixed content', async () => { + // Simulates a pattern like atproxy's README where h1 is HTML + // and subsequent headings are markdown + const md = [ + '

atproxy

', + '

A cool proxy library

', + '', + '## Features', + '', + '- Fast', + '- Simple', + '', + '## Installation', + '', + '```bash', + 'npm install atproxy', + '```', + '', + '

Advanced Usage

', + '', + '## API', + '', + '### Methods', + ].join('\n') + + const result = await renderReadmeHtml(md, 'test-pkg') + + // TOC order must be: atproxy, Features, Installation, Advanced Usage, API, Methods + expect(result.toc.map(t => t.text)).toEqual([ + 'atproxy', + 'Features', + 'Installation', + 'Advanced Usage', + 'API', + 'Methods', + ]) + + // All IDs should be unique + const ids = result.toc.map(t => t.id) + expect(new Set(ids).size).toBe(ids.length) + }) + }) }) From a4d07f7c5e22dbea298e5d3b041a88cb9ec24b2a Mon Sep 17 00:00:00 2001 From: rygrit Date: Sun, 1 Mar 2026 14:37:16 +0800 Subject: [PATCH 2/7] refactor: add user content prefixing for IDs --- server/utils/readme.ts | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/server/utils/readme.ts b/server/utils/readme.ts index 9920522b4..747115fd9 100644 --- a/server/utils/readme.ts +++ b/server/utils/readme.ts @@ -266,6 +266,16 @@ const reservedPathsNpmJs = [ const npmJsHosts = new Set(['www.npmjs.com', 'npmjs.com', 'www.npmjs.org', 'npmjs.org']) +const USER_CONTENT_PREFIX = 'user-content-' + +function withUserContentPrefix(value: string): string { + return value.startsWith(USER_CONTENT_PREFIX) ? value : `${USER_CONTENT_PREFIX}${value}` +} + +function toUserContentHash(value: string): string { + return `#${withUserContentPrefix(value)}` +} + const isNpmJsUrlThatCanBeRedirected = (url: URL) => { if (!npmJsHosts.has(url.host)) { return false @@ -292,8 +302,7 @@ function resolveUrl(url: string, packageName: string, repoInfo?: RepositoryInfo) if (url.startsWith('#')) { // Prefix anchor links to match heading IDs (avoids collision with page IDs) // Idempotent: don't double-prefix if already prefixed - if (url.startsWith('#user-content-')) return url - return `#user-content-${url.slice(1)}` + return toUserContentHash(url.slice(1)) } // Absolute paths (e.g. /package/foo from a previous npmjs redirect) are already resolved if (url.startsWith('/')) return url @@ -385,8 +394,8 @@ function resolveImageUrl(url: string, packageName: string, repoInfo?: Repository // Helper to prefix id attributes with 'user-content-' function prefixId(tagName: string, attribs: sanitizeHtml.Attributes) { - if (attribs.id && !attribs.id.startsWith('user-content-')) { - attribs.id = `user-content-${attribs.id}` + if (attribs.id) { + attribs.id = withUserContentPrefix(attribs.id) } return { tagName, attribs } } @@ -437,7 +446,7 @@ export async function renderReadmeHtml( const count = usedSlugs.get(slug) ?? 0 usedSlugs.set(slug, count + 1) const uniqueSlug = count === 0 ? slug : `${slug}-${count}` - const id = `user-content-${uniqueSlug}` + const id = withUserContentPrefix(uniqueSlug) if (plainText) { toc.push({ text: plainText, id, depth }) From 7fb441490822266b26dfa162bfa41d2f6045abef Mon Sep 17 00:00:00 2001 From: rygrit Date: Sun, 1 Mar 2026 21:25:41 +0800 Subject: [PATCH 3/7] fix: IDs may conflict --- server/utils/readme.ts | 8 ++++++-- test/unit/server/utils/readme.spec.ts | 11 +++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/server/utils/readme.ts b/server/utils/readme.ts index 747115fd9..611ad2725 100644 --- a/server/utils/readme.ts +++ b/server/utils/readme.ts @@ -272,6 +272,10 @@ function withUserContentPrefix(value: string): string { return value.startsWith(USER_CONTENT_PREFIX) ? value : `${USER_CONTENT_PREFIX}${value}` } +function toUserContentId(value: string): string { + return `${USER_CONTENT_PREFIX}${value}` +} + function toUserContentHash(value: string): string { return `#${withUserContentPrefix(value)}` } @@ -446,13 +450,13 @@ export async function renderReadmeHtml( const count = usedSlugs.get(slug) ?? 0 usedSlugs.set(slug, count + 1) const uniqueSlug = count === 0 ? slug : `${slug}-${count}` - const id = withUserContentPrefix(uniqueSlug) + const id = toUserContentId(uniqueSlug) if (plainText) { toc.push({ text: plainText, id, depth }) } - return `${plainText}\n` + return `${plainText}\n` } renderer.heading = function ({ tokens, depth }: Tokens.Heading) { diff --git a/test/unit/server/utils/readme.spec.ts b/test/unit/server/utils/readme.spec.ts index 6393e9d8f..501a9833d 100644 --- a/test/unit/server/utils/readme.spec.ts +++ b/test/unit/server/utils/readme.spec.ts @@ -642,6 +642,17 @@ describe('Issue #1323 — single-pass rendering correctness', () => { expect(ids).toEqual(['user-content-api', 'user-content-api-1', 'user-content-api-2']) }) + it('does not collide when heading text already starts with user-content-', async () => { + const md = ['# Title', '', '# user-content-title'].join('\n') + + const result = await renderReadmeHtml(md, 'test-pkg') + + const ids = Array.from(result.html.matchAll(/id="(user-content-[^"]+)"/g), m => m[1]) + expect(ids).toEqual(['user-content-title', 'user-content-user-content-title']) + expect(new Set(ids).size).toBe(ids.length) + expect(result.toc.map(t => t.id)).toEqual(ids) + }) + it('heading semantic levels are sequential even when mixing heading types', async () => { // h1 (md) → h3, h3 (html) → should be h4 (max = lastSemantic + 1), // not jump to h5 or h6 because it was processed in a later pass. From 027cfb8587549340d70672259eae53e24b4044f7 Mon Sep 17 00:00:00 2001 From: rygrit Date: Sun, 1 Mar 2026 21:28:26 +0800 Subject: [PATCH 4/7] test: TOC access --- test/unit/server/utils/readme.spec.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/test/unit/server/utils/readme.spec.ts b/test/unit/server/utils/readme.spec.ts index 501a9833d..1d234610b 100644 --- a/test/unit/server/utils/readme.spec.ts +++ b/test/unit/server/utils/readme.spec.ts @@ -572,6 +572,7 @@ describe('HTML output', () => { it('adds id to HTML heading in multi-element token', async () => { const md = '

My Package

\n

A description

' const result = await renderReadmeHtml(md, 'test-pkg') + expect(result.toc).toHaveLength(1) expect(result.html).toContain('id="user-content-my-package"') expect(result.toc[0]).toMatchObject({ text: 'My Package', depth: 1 }) }) From 84272f99ea5b66abfb9ae3b5a96eecdac5c592cf Mon Sep 17 00:00:00 2001 From: rygrit Date: Sun, 1 Mar 2026 22:29:48 +0800 Subject: [PATCH 5/7] test: add unit test headings & anchors raw HTML --- test/unit/server/utils/readme.spec.ts | 50 +++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/test/unit/server/utils/readme.spec.ts b/test/unit/server/utils/readme.spec.ts index 1d234610b..099459754 100644 --- a/test/unit/server/utils/readme.spec.ts +++ b/test/unit/server/utils/readme.spec.ts @@ -583,6 +583,56 @@ describe('HTML output', () => { expect(result.html).toContain('id="user-content-api"') expect(result.html).toContain('id="user-content-api-1"') }) + + it('preserves supported attributes on raw HTML headings', async () => { + const md = '

My Package

' + const result = await renderReadmeHtml(md, 'test-pkg') + + expect(result.html).toContain('id="user-content-my-package"') + expect(result.html).toContain('align="center"') + }) + + it('preserves supported attributes on rewritten raw HTML anchors (renderer.html path)', async () => { + const md = [ + '
', + ' Open in StackBlitz', + '
', + ].join('\n') + const result = await renderReadmeHtml(md, 'test-pkg') + + expect(result.html).toContain('href="https://stackblitz.com/edit/my-demo"') + expect(result.html).toContain('title="Open demo"') + expect(result.html).toContain('rel="nofollow noreferrer noopener"') + expect(result.html).toContain('target="_blank"') + }) + + it('preserves title when it appears before href (renderer.html path)', async () => { + const md = [ + '
', + ' Open in StackBlitz', + '
', + ].join('\n') + const result = await renderReadmeHtml(md, 'test-pkg') + + expect(result.html).toContain('title="Open demo"') + expect(result.html).toContain('href="https://stackblitz.com/edit/my-demo"') + expect(result.html).toContain('rel="nofollow noreferrer noopener"') + expect(result.html).toContain('target="_blank"') + }) + + it('overrides existing rel and target instead of duplicating them (renderer.html path)', async () => { + const md = [ + '
', + ' Open in StackBlitz', + '
', + ].join('\n') + const result = await renderReadmeHtml(md, 'test-pkg') + + expect(result.html).toContain('rel="nofollow noreferrer noopener"') + expect(result.html).toContain('target="_blank"') + expect(result.html).not.toContain('rel="bookmark"') + expect(result.html).not.toContain('target="_self"') + }) }) /** From dcda67fdd5fab62a1f29a538cbf7af7166aac00a Mon Sep 17 00:00:00 2001 From: rygrit Date: Sun, 1 Mar 2026 22:30:43 +0800 Subject: [PATCH 6/7] fix: heading & anchor for ALLOW_ATTR --- server/utils/readme.ts | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/server/utils/readme.ts b/server/utils/readme.ts index 611ad2725..f2441a9ea 100644 --- a/server/utils/readme.ts +++ b/server/utils/readme.ts @@ -280,6 +280,16 @@ function toUserContentHash(value: string): string { return `#${withUserContentPrefix(value)}` } +function normalizePreservedAnchorAttrs(attrs: string): string { + const cleanedAttrs = attrs + .replace(/\s+href\s*=\s*("[^"]*"|'[^']*'|[^\s>]+)/gi, '') + .replace(/\s+rel\s*=\s*("[^"]*"|'[^']*'|[^\s>]+)/gi, '') + .replace(/\s+target\s*=\s*("[^"]*"|'[^']*'|[^\s>]+)/gi, '') + .trim() + + return cleanedAttrs ? ` ${cleanedAttrs}` : '' +} + const isNpmJsUrlThatCanBeRedirected = (url: URL) => { if (!npmJsHosts.has(url.host)) { return false @@ -440,7 +450,7 @@ export async function renderReadmeHtml( let lastSemanticLevel = 2 // Start after h2 (the "Readme" section heading) // Shared heading processing for both markdown and HTML headings - function processHeading(depth: number, plainText: string) { + function processHeading(depth: number, plainText: string, preservedAttrs = '') { const semanticLevel = calculateSemanticDepth(depth, lastSemanticLevel) lastSemanticLevel = semanticLevel @@ -456,7 +466,7 @@ export async function renderReadmeHtml( toc.push({ text: plainText, id, depth }) } - return `${plainText}\n` + return `${plainText}\n` } renderer.heading = function ({ tokens, depth }: Tokens.Heading) { @@ -468,18 +478,21 @@ export async function renderReadmeHtml( // Intercept HTML headings so they get id, TOC entry, and correct semantic level. // Also intercept raw HTML tags so playground links are collected in the same pass. const htmlHeadingRe = /]*)?>([\s\S]*?)<\/h\1>/gi - const htmlAnchorRe = /]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/gi + const htmlAnchorRe = /]*?)href=(["'])([^"']*)\2([^>]*)>([\s\S]*?)<\/a>/gi renderer.html = function ({ text }: Tokens.HTML) { - let result = text.replace(htmlHeadingRe, (_, level, _attrs, inner) => { + let result = text.replace(htmlHeadingRe, (_, level, attrs, inner) => { const depth = parseInt(level) const plainText = decodeHtmlEntities(stripHtmlTags(inner).trim()) - return processHeading(depth, plainText).trimEnd() + const align = /\balign=(["'])(.*?)\1/i.exec(attrs)?.[2] + const preservedAttrs = align ? ` align="${align}"` : '' + return processHeading(depth, plainText, preservedAttrs).trimEnd() }) // Process raw HTML tags for playground link collection and URL resolution - result = result.replace(htmlAnchorRe, (_full, href, inner) => { + result = result.replace(htmlAnchorRe, (_full, beforeHref, _quote, href, afterHref, inner) => { const label = decodeHtmlEntities(stripHtmlTags(inner).trim()) const { resolvedHref, extraAttrs } = processLink(href, label) - return `${inner}` + const preservedAttrs = normalizePreservedAnchorAttrs(`${beforeHref ?? ''}${afterHref ?? ''}`) + return `${inner}` }) return result } From 8f8e3c0b1e5869a2c5b1f03122bb408c8fa7f2a8 Mon Sep 17 00:00:00 2001 From: RYGRIT Date: Mon, 2 Mar 2026 09:08:33 +0800 Subject: [PATCH 7/7] fix: --- server/utils/readme.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/utils/readme.ts b/server/utils/readme.ts index f2441a9ea..297dc5bcd 100644 --- a/server/utils/readme.ts +++ b/server/utils/readme.ts @@ -480,7 +480,7 @@ export async function renderReadmeHtml( const htmlHeadingRe = /]*)?>([\s\S]*?)<\/h\1>/gi const htmlAnchorRe = /]*?)href=(["'])([^"']*)\2([^>]*)>([\s\S]*?)<\/a>/gi renderer.html = function ({ text }: Tokens.HTML) { - let result = text.replace(htmlHeadingRe, (_, level, attrs, inner) => { + let result = text.replace(htmlHeadingRe, (_, level, attrs = '', inner) => { const depth = parseInt(level) const plainText = decodeHtmlEntities(stripHtmlTags(inner).trim()) const align = /\balign=(["'])(.*?)\1/i.exec(attrs)?.[2]