From 57d6c4bd94bcb3564894edbe05a7fd637cbcd98c Mon Sep 17 00:00:00 2001
From: RYGRIT <ry_grit@sina.com>
Date: Sat, 28 Feb 2026 17:31:59 +0800
Subject: [PATCH 1/7] fix: single-pass markdown rendering for correct TOC and
 heading IDs (#1323)

---
 server/utils/readme.ts                | 115 +++++++++++-----
 test/unit/server/utils/readme.spec.ts | 181 ++++++++++++++++++++++++++
 2 files changed, 266 insertions(+), 30 deletions(-)
diff --git a/server/utils/readme.ts b/server/utils/readme.ts
index e3d337ece..53b7126e0 100644
--- a/server/utils/readme.ts
+++ b/server/utils/readme.ts
@@ -251,8 +251,12 @@ function resolveUrl(url: string, packageName: string, repoInfo?: RepositoryInfo)
   if (!url) return url
   if (url.startsWith('#')) {
     // Prefix anchor links to match heading IDs (avoids collision with page IDs)
+    // Idempotent: don't double-prefix if already prefixed
+    if (url.startsWith('#user-content-')) return url
     return `#user-content-${url.slice(1)}`
   }
+  // Absolute paths (e.g. /package/foo from a previous npmjs redirect) are already resolved
+  if (url.startsWith('/')) return url
   if (hasProtocol(url, { acceptRelative: true })) {
     try {
       const parsed = new URL(url, 'https://example.com')
@@ -381,37 +385,52 @@ export async function renderReadmeHtml(
   // So README starts at h3, and we ensure no levels are skipped
   // Visual styling preserved via data-level attribute (original depth)
   let lastSemanticLevel = 2 // Start after h2 (the "Readme" section heading)
-  renderer.heading = function ({ tokens, depth }: Tokens.Heading) {
-    // Calculate the target semantic level based on document structure
-    // Start at h3 (since page h1 + section h2 already exist)
-    // But ensure we never skip levels - can only go down by 1 or stay same/go up
+
+  // Shared heading processing for both markdown and HTML headings
+  function processHeading(depth: number, plainText: string) {
     const semanticLevel = calculateSemanticDepth(depth, lastSemanticLevel)
     lastSemanticLevel = semanticLevel
-    const text = this.parser.parseInline(tokens)
 
-    // Generate GitHub-style slug for anchor links
-    let slug = slugify(text)
-    if (!slug) slug = 'heading' // Fallback for empty headings
+    let slug = slugify(plainText)
+    if (!slug) slug = 'heading'
 
-    // Handle duplicate slugs (GitHub-style: foo, foo-1, foo-2)
     const count = usedSlugs.get(slug) ?? 0
     usedSlugs.set(slug, count + 1)
     const uniqueSlug = count === 0 ? slug : `${slug}-${count}`
-
-    // Prefix with 'user-content-' to avoid collisions with page IDs
-    // (e.g., #install, #dependencies, #versions are used by the package page)
     const id = `user-content-${uniqueSlug}`
 
-    // Collect TOC item with plain text (HTML stripped, entities decoded)
-    const plainText = decodeHtmlEntities(stripHtmlTags(text).trim())
     if (plainText) {
       toc.push({ text: plainText, id, depth })
     }
 
-    /** The link href uses the unique slug WITHOUT the 'user-content-' prefix, because that will later be added for all links. */
     return `<h${semanticLevel} id="${id}" data-level="${depth}"><a href="#${uniqueSlug}">${plainText}</a></h${semanticLevel}>\n`
   }
 
+  renderer.heading = function ({ tokens, depth }: Tokens.Heading) {
+    const text = this.parser.parseInline(tokens)
+    const plainText = decodeHtmlEntities(stripHtmlTags(text).trim())
+    return processHeading(depth, plainText)
+  }
+
+  // Intercept HTML headings so they get id, TOC entry, and correct semantic level.
+  // Also intercept raw HTML <a> tags so playground links are collected in the same pass.
+  const htmlHeadingRe = /<h([1-6])(\s[^>]*)?>([\s\S]*?)<\/h\1>/gi
+  const htmlAnchorRe = /<a\s[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/gi
+  renderer.html = function ({ text }: Tokens.HTML) {
+    let result = text.replace(htmlHeadingRe, (_, level, _attrs, inner) => {
+      const depth = parseInt(level)
+      const plainText = decodeHtmlEntities(stripHtmlTags(inner).trim())
+      return processHeading(depth, plainText).trimEnd()
+    })
+    // Process raw HTML <a> tags for playground link collection and URL resolution
+    result = result.replace(htmlAnchorRe, (_full, href, inner) => {
+      const label = decodeHtmlEntities(stripHtmlTags(inner).trim())
+      const { resolvedHref, extraAttrs } = processLink(href, label)
+      return `<a href="${resolvedHref}"${extraAttrs}>${inner}</a>`
+    })
+    return result
+  }
+
   // Syntax highlighting for code blocks (uses shared highlighter)
   renderer.code = ({ text, lang }: Tokens.Code) => {
     const html = highlightCodeSync(shiki, text, lang || 'text')
@@ -433,7 +452,35 @@ ${html}
     return `<img src="${resolvedHref}"${altAttr}${titleAttr}>`
   }
 
+  // Helper: resolve a link href, collect playground links, and build <a> attributes.
+  // Used by both the markdown renderer.link and the HTML <a> interceptor so that
+  // all link processing happens in a single pass during marked rendering.
+  function processLink(href: string, label: string): { resolvedHref: string; extraAttrs: string } {
+    const resolvedHref = resolveUrl(href, packageName, repoInfo)
+
+    // Collect playground links
+    const provider = matchPlaygroundProvider(resolvedHref)
+    if (provider && !seenUrls.has(resolvedHref)) {
+      seenUrls.add(resolvedHref)
+      collectedLinks.push({
+        url: resolvedHref,
+        provider: provider.id,
+        providerName: provider.name,
+        label: decodeHtmlEntities(label || provider.name),
+      })
+    }
+
+    // Security attributes for external links
+    let extraAttrs = ''
+    if (resolvedHref && hasProtocol(resolvedHref, { acceptRelative: true })) {
+      extraAttrs = ' rel="nofollow noreferrer noopener" target="_blank"'
+    }
+
+    return { resolvedHref, extraAttrs }
+  }
+
   // Resolve link URLs, add security attributes, and collect playground links
+  // — all in a single pass during marked rendering (no deferred processing)
   renderer.link = function ({ href, title, tokens }: Tokens.Link) {
     const text = this.parser.parseInline(tokens)
     const titleAttr = title ? ` title="${title}"` : ''
@@ -444,10 +491,9 @@ ${html}
       plainText = tokens[0].text
     }
 
-    const intermediateTitleAttr =
-      plainText || title ? ` data-title-intermediate="${plainText || title}"` : ''
+    const { resolvedHref, extraAttrs } = processLink(href, plainText || title || '')
 
-    return `<a href="${href}"${titleAttr}${intermediateTitleAttr}>${text}</a>`
+    return `<a href="${resolvedHref}"${titleAttr}${extraAttrs}>${text}</a>`
   }
 
   // GitHub-style callouts: > [!NOTE], > [!TIP], etc.
@@ -475,26 +521,32 @@ ${html}
     allowedSchemes: ['http', 'https', 'mailto'],
     // Transform img src URLs (GitHub blob → raw, relative → GitHub raw)
     transformTags: {
+      // Headings are already processed to correct semantic levels by processHeading()
+      // during the marked rendering pass. The sanitizer just needs to preserve them.
+      // For any stray headings that didn't go through processHeading (shouldn't happen),
+      // we still apply a safe fallback shift.
       h1: (_, attribs) => {
+        if (attribs['data-level']) return { tagName: 'h1', attribs }
         return { tagName: 'h3', attribs: { ...attribs, 'data-level': '1' } }
       },
       h2: (_, attribs) => {
+        if (attribs['data-level']) return { tagName: 'h2', attribs }
         return { tagName: 'h4', attribs: { ...attribs, 'data-level': '2' } }
       },
       h3: (_, attribs) => {
-        if (attribs['data-level']) return { tagName: 'h3', attribs: attribs }
+        if (attribs['data-level']) return { tagName: 'h3', attribs }
         return { tagName: 'h5', attribs: { ...attribs, 'data-level': '3' } }
       },
       h4: (_, attribs) => {
-        if (attribs['data-level']) return { tagName: 'h4', attribs: attribs }
+        if (attribs['data-level']) return { tagName: 'h4', attribs }
         return { tagName: 'h6', attribs: { ...attribs, 'data-level': '4' } }
       },
       h5: (_, attribs) => {
-        if (attribs['data-level']) return { tagName: 'h5', attribs: attribs }
+        if (attribs['data-level']) return { tagName: 'h5', attribs }
         return { tagName: 'h6', attribs: { ...attribs, 'data-level': '5' } }
       },
       h6: (_, attribs) => {
-        if (attribs['data-level']) return { tagName: 'h6', attribs: attribs }
+        if (attribs['data-level']) return { tagName: 'h6', attribs }
         return { tagName: 'h6', attribs: { ...attribs, 'data-level': '6' } }
       },
       img: (tagName, attribs) => {
@@ -522,6 +574,11 @@ ${html}
         }
         return { tagName, attribs }
       },
+      // Markdown links are fully processed in renderer.link (single-pass).
+      // However, inline HTML <a> tags inside paragraphs are NOT seen by
+      // renderer.html (marked parses them as paragraph tokens, not html tokens).
+      // So we still need to collect playground links here for those cases.
+      // The seenUrls set ensures no duplicates across both paths.
       a: (tagName, attribs) => {
         if (!attribs.href) {
           return { tagName, attribs }
@@ -529,24 +586,22 @@ ${html}
 
         const resolvedHref = resolveUrl(attribs.href, packageName, repoInfo)
 
+        // Collect playground links from inline HTML <a> tags that weren't
+        // caught by renderer.link or renderer.html
         const provider = matchPlaygroundProvider(resolvedHref)
         if (provider && !seenUrls.has(resolvedHref)) {
           seenUrls.add(resolvedHref)
-
           collectedLinks.push({
             url: resolvedHref,
             provider: provider.id,
             providerName: provider.name,
-            /**
-             * We need to set some data attribute before hand because `transformTags` doesn't
-             * provide the text of the element. This will automatically be removed, because there
-             * is an allow list for link attributes.
-             * */
-            label: decodeHtmlEntities(attribs['data-title-intermediate'] || provider.name),
+            // sanitize-html transformTags doesn't provide element text content,
+            // so we fall back to the provider name for the label
+            label: provider.name,
           })
         }
 
-        // Add security attributes for external links
+        // Add security attributes for external links (idempotent)
         if (resolvedHref && hasProtocol(resolvedHref, { acceptRelative: true })) {
           attribs.rel = 'nofollow noreferrer noopener'
           attribs.target = '_blank'
diff --git a/test/unit/server/utils/readme.spec.ts b/test/unit/server/utils/readme.spec.ts
index d06be1806..50a5cc3c9 100644
--- a/test/unit/server/utils/readme.spec.ts
+++ b/test/unit/server/utils/readme.spec.ts
@@ -475,4 +475,185 @@ describe('HTML output', () => {
 <p>Some <strong>bold</strong> text and a <a href="https://example.com" rel="nofollow noreferrer noopener" target="_blank">link</a>.</p>
 `)
   })
+
+  it('adds id to raw HTML headings', async () => {
+    const result = await renderReadmeHtml('<h1>Title</h1>', 'test-pkg')
+    expect(result.html).toContain('id="user-content-title"')
+    expect(result.toc).toHaveLength(1)
+    expect(result.toc[0]).toMatchObject({ text: 'Title', depth: 1 })
+  })
+
+  it('adds id to HTML heading in multi-element token', async () => {
+    const md = '<h1 align="center">My Package</h1>\n<p align="center">A description</p>'
+    const result = await renderReadmeHtml(md, 'test-pkg')
+    expect(result.html).toContain('id="user-content-my-package"')
+    expect(result.toc[0]).toMatchObject({ text: 'My Package', depth: 1 })
+  })
+
+  it('handles duplicate raw HTML heading slugs', async () => {
+    const md = '<h2>API</h2>\n\n<h2>API</h2>'
+    const result = await renderReadmeHtml(md, 'test-pkg')
+    expect(result.html).toContain('id="user-content-api"')
+    expect(result.html).toContain('id="user-content-api-1"')
+  })
+})
+
+/**
+ * Tests for issue #1323: single-pass markdown rendering behavior.
+ *
+ * The core concern is that mixing markdown headings and raw HTML headings
+ * must produce TOC entries, heading IDs, and duplicate-slug suffixes in
+ * exact document order — the same as GitHub does.
+ *
+ * If the implementation processes markdown headings in one pass and HTML
+ * headings in a separate (later) pass, the ordering will be wrong.
+ */
+describe('Issue #1323 — single-pass rendering correctness', () => {
+  describe('mixed markdown + HTML headings: TOC order and IDs', () => {
+    it('produces TOC entries in document order when markdown and HTML headings are interleaved', async () => {
+      // This is the core scenario from the issue: HTML headings appear
+      // between markdown headings. A two-pass approach would collect all
+      // markdown headings first, then HTML headings — scrambling the order.
+      const md = [
+        '# First (markdown)',
+        '',
+        '<h2>Second (html)</h2>',
+        '',
+        '## Third (markdown)',
+        '',
+        '<h2>Fourth (html)</h2>',
+        '',
+        '## Fifth (markdown)',
+      ].join('\n')
+
+      const result = await renderReadmeHtml(md, 'test-pkg')
+
+      // TOC must reflect exact document order
+      expect(result.toc).toHaveLength(5)
+      expect(result.toc[0]!.text).toBe('First (markdown)')
+      expect(result.toc[1]!.text).toBe('Second (html)')
+      expect(result.toc[2]!.text).toBe('Third (markdown)')
+      expect(result.toc[3]!.text).toBe('Fourth (html)')
+      expect(result.toc[4]!.text).toBe('Fifth (markdown)')
+    })
+
+    it('assigns duplicate-slug suffixes in document order across mixed heading types', async () => {
+      // Two markdown "API" headings with an HTML "API" heading in between.
+      // Correct: api, api-1, api-2 in that order.
+      // If HTML headings are processed in a separate pass, the HTML one
+      // could get suffix -2 while the last markdown one gets -1.
+      const md = ['## API', '', '<h2>API</h2>', '', '## API'].join('\n')
+
+      const result = await renderReadmeHtml(md, 'test-pkg')
+
+      expect(result.toc).toHaveLength(3)
+      expect(result.toc[0]!.id).toBe('user-content-api')
+      expect(result.toc[1]!.id).toBe('user-content-api-1')
+      expect(result.toc[2]!.id).toBe('user-content-api-2')
+
+      // The HTML output must also have these IDs in order
+      const ids = Array.from(result.html.matchAll(/id="(user-content-api(?:-\d+)?)"/g), m => m[1])
+      expect(ids).toEqual(['user-content-api', 'user-content-api-1', 'user-content-api-2'])
+    })
+
+    it('heading semantic levels are sequential even when mixing heading types', async () => {
+      // h1 (md) → h3, h3 (html) → should be h4 (max = lastSemantic + 1),
+      // not jump to h5 or h6 because it was processed in a later pass.
+      const md = ['# Title', '', '<h3>Subsection</h3>', '', '#### Deep'].join('\n')
+
+      const result = await renderReadmeHtml(md, 'test-pkg')
+
+      // Extract semantic tags in order from the HTML
+      const tags = Array.from(result.html.matchAll(/<h(\d)/g), m => Number(m[1]))
+      // h1→h3, h3→h4 (sequential after h3), h4→h5 (sequential after h4)
+      expect(tags).toEqual([3, 4, 5])
+    })
+  })
+
+  describe('HTML heading between markdown headings — ID and href consistency', () => {
+    it('heading id and its anchor href point to the same slug', async () => {
+      const md = ['# Introduction', '', '<h2>Getting Started</h2>', '', '## Installation'].join(
+        '\n',
+      )
+
+      const result = await renderReadmeHtml(md, 'test-pkg')
+
+      // For every heading, the slug used in id="user-content-{slug}" must
+      // match the slug in the child anchor href="#user-content-{slug}"
+      // (resolveUrl prefixes # anchors with user-content-).
+      const headingPairs = [
+        ...result.html.matchAll(/id="user-content-([^"]+)"[^>]*><a href="#user-content-([^"]+)">/g),
+      ]
+      expect(headingPairs.length).toBeGreaterThan(0)
+      for (const match of headingPairs) {
+        // slug portion must be identical
+        expect(match[1]).toBe(match[2])
+      }
+    })
+  })
+
+  describe('playground links collected from HTML <a> tags in single pass', () => {
+    it('collects playground links from raw HTML anchor tags', async () => {
+      // Some READMEs (like eslint's sponsor section) use raw HTML <a> tags
+      // instead of markdown link syntax. These must also be picked up.
+      const md = [
+        '# My Package',
+        '',
+        '<a href="https://stackblitz.com/edit/my-demo">Open in StackBlitz</a>',
+        '',
+        'Some text with a [CodeSandbox link](https://codesandbox.io/s/example)',
+      ].join('\n')
+
+      const result = await renderReadmeHtml(md, 'test-pkg')
+
+      // Both playground links should be collected regardless of syntax
+      const providers = result.playgroundLinks.map(l => l.provider)
+      expect(providers).toContain('stackblitz')
+      expect(providers).toContain('codesandbox')
+    })
+  })
+
+  describe('complex real-world interleaving (atproxy-like)', () => {
+    it('handles a README with HTML h1 followed by markdown h2 and mixed content', async () => {
+      // Simulates a pattern like atproxy's README where h1 is HTML
+      // and subsequent headings are markdown
+      const md = [
+        '<h1 align="center">atproxy</h1>',
+        '<p align="center">A cool proxy library</p>',
+        '',
+        '## Features',
+        '',
+        '- Fast',
+        '- Simple',
+        '',
+        '## Installation',
+        '',
+        '```bash',
+        'npm install atproxy',
+        '```',
+        '',
+        '<h2>Advanced Usage</h2>',
+        '',
+        '## API',
+        '',
+        '### Methods',
+      ].join('\n')
+
+      const result = await renderReadmeHtml(md, 'test-pkg')
+
+      // TOC order must be: atproxy, Features, Installation, Advanced Usage, API, Methods
+      expect(result.toc.map(t => t.text)).toEqual([
+        'atproxy',
+        'Features',
+        'Installation',
+        'Advanced Usage',
+        'API',
+        'Methods',
+      ])
+
+      // All IDs should be unique
+      const ids = result.toc.map(t => t.id)
+      expect(new Set(ids).size).toBe(ids.length)
+    })
+  })
 })

From a4d07f7c5e22dbea298e5d3b041a88cb9ec24b2a Mon Sep 17 00:00:00 2001
From: rygrit <ry_grit@sina.com>
Date: Sun, 1 Mar 2026 14:37:16 +0800
Subject: [PATCH 2/7] refactor: add user content prefixing for IDs

---
 server/utils/readme.ts | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/server/utils/readme.ts b/server/utils/readme.ts
index 9920522b4..747115fd9 100644
--- a/server/utils/readme.ts
+++ b/server/utils/readme.ts
@@ -266,6 +266,16 @@ const reservedPathsNpmJs = [
 
 const npmJsHosts = new Set(['www.npmjs.com', 'npmjs.com', 'www.npmjs.org', 'npmjs.org'])
 
+const USER_CONTENT_PREFIX = 'user-content-'
+
+function withUserContentPrefix(value: string): string {
+  return value.startsWith(USER_CONTENT_PREFIX) ? value : `${USER_CONTENT_PREFIX}${value}`
+}
+
+function toUserContentHash(value: string): string {
+  return `#${withUserContentPrefix(value)}`
+}
+
 const isNpmJsUrlThatCanBeRedirected = (url: URL) => {
   if (!npmJsHosts.has(url.host)) {
     return false
@@ -292,8 +302,7 @@ function resolveUrl(url: string, packageName: string, repoInfo?: RepositoryInfo)
   if (url.startsWith('#')) {
     // Prefix anchor links to match heading IDs (avoids collision with page IDs)
     // Idempotent: don't double-prefix if already prefixed
-    if (url.startsWith('#user-content-')) return url
-    return `#user-content-${url.slice(1)}`
+    return toUserContentHash(url.slice(1))
   }
   // Absolute paths (e.g. /package/foo from a previous npmjs redirect) are already resolved
   if (url.startsWith('/')) return url
@@ -385,8 +394,8 @@ function resolveImageUrl(url: string, packageName: string, repoInfo?: Repository
 
 // Helper to prefix id attributes with 'user-content-'
 function prefixId(tagName: string, attribs: sanitizeHtml.Attributes) {
-  if (attribs.id && !attribs.id.startsWith('user-content-')) {
-    attribs.id = `user-content-${attribs.id}`
+  if (attribs.id) {
+    attribs.id = withUserContentPrefix(attribs.id)
   }
   return { tagName, attribs }
 }
@@ -437,7 +446,7 @@ export async function renderReadmeHtml(
     const count = usedSlugs.get(slug) ?? 0
     usedSlugs.set(slug, count + 1)
     const uniqueSlug = count === 0 ? slug : `${slug}-${count}`
-    const id = `user-content-${uniqueSlug}`
+    const id = withUserContentPrefix(uniqueSlug)
 
     if (plainText) {
       toc.push({ text: plainText, id, depth })

From 7fb441490822266b26dfa162bfa41d2f6045abef Mon Sep 17 00:00:00 2001
From: rygrit <ry_grit@sina.com>
Date: Sun, 1 Mar 2026 21:25:41 +0800
Subject: [PATCH 3/7] fix: IDs may conflict

---
 server/utils/readme.ts                |  8 ++++++--
 test/unit/server/utils/readme.spec.ts | 11 +++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/server/utils/readme.ts b/server/utils/readme.ts
index 747115fd9..611ad2725 100644
--- a/server/utils/readme.ts
+++ b/server/utils/readme.ts
@@ -272,6 +272,10 @@ function withUserContentPrefix(value: string): string {
   return value.startsWith(USER_CONTENT_PREFIX) ? value : `${USER_CONTENT_PREFIX}${value}`
 }
 
+function toUserContentId(value: string): string {
+  return `${USER_CONTENT_PREFIX}${value}`
+}
+
 function toUserContentHash(value: string): string {
   return `#${withUserContentPrefix(value)}`
 }
@@ -446,13 +450,13 @@ export async function renderReadmeHtml(
     const count = usedSlugs.get(slug) ?? 0
     usedSlugs.set(slug, count + 1)
     const uniqueSlug = count === 0 ? slug : `${slug}-${count}`
-    const id = withUserContentPrefix(uniqueSlug)
+    const id = toUserContentId(uniqueSlug)
 
     if (plainText) {
       toc.push({ text: plainText, id, depth })
     }
 
-    return `<h${semanticLevel} id="${id}" data-level="${depth}"><a href="#${uniqueSlug}">${plainText}</a></h${semanticLevel}>\n`
+    return `<h${semanticLevel} id="${id}" data-level="${depth}"><a href="#${id}">${plainText}</a></h${semanticLevel}>\n`
   }
 
   renderer.heading = function ({ tokens, depth }: Tokens.Heading) {
diff --git a/test/unit/server/utils/readme.spec.ts b/test/unit/server/utils/readme.spec.ts
index 6393e9d8f..501a9833d 100644
--- a/test/unit/server/utils/readme.spec.ts
+++ b/test/unit/server/utils/readme.spec.ts
@@ -642,6 +642,17 @@ describe('Issue #1323 — single-pass rendering correctness', () => {
       expect(ids).toEqual(['user-content-api', 'user-content-api-1', 'user-content-api-2'])
     })
 
+    it('does not collide when heading text already starts with user-content-', async () => {
+      const md = ['# Title', '', '# user-content-title'].join('\n')
+
+      const result = await renderReadmeHtml(md, 'test-pkg')
+
+      const ids = Array.from(result.html.matchAll(/id="(user-content-[^"]+)"/g), m => m[1])
+      expect(ids).toEqual(['user-content-title', 'user-content-user-content-title'])
+      expect(new Set(ids).size).toBe(ids.length)
+      expect(result.toc.map(t => t.id)).toEqual(ids)
+    })
+
     it('heading semantic levels are sequential even when mixing heading types', async () => {
       // h1 (md) → h3, h3 (html) → should be h4 (max = lastSemantic + 1),
       // not jump to h5 or h6 because it was processed in a later pass.

From 027cfb8587549340d70672259eae53e24b4044f7 Mon Sep 17 00:00:00 2001
From: rygrit <ry_grit@sina.com>
Date: Sun, 1 Mar 2026 21:28:26 +0800
Subject: [PATCH 4/7] test: TOC access

---
 test/unit/server/utils/readme.spec.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/unit/server/utils/readme.spec.ts b/test/unit/server/utils/readme.spec.ts
index 501a9833d..1d234610b 100644
--- a/test/unit/server/utils/readme.spec.ts
+++ b/test/unit/server/utils/readme.spec.ts
@@ -572,6 +572,7 @@ describe('HTML output', () => {
   it('adds id to HTML heading in multi-element token', async () => {
     const md = '<h1 align="center">My Package</h1>\n<p align="center">A description</p>'
     const result = await renderReadmeHtml(md, 'test-pkg')
+    expect(result.toc).toHaveLength(1)
     expect(result.html).toContain('id="user-content-my-package"')
     expect(result.toc[0]).toMatchObject({ text: 'My Package', depth: 1 })
   })

From 84272f99ea5b66abfb9ae3b5a96eecdac5c592cf Mon Sep 17 00:00:00 2001
From: rygrit <ry_grit@sina.com>
Date: Sun, 1 Mar 2026 22:29:48 +0800
Subject: [PATCH 5/7] test: add unit test headings & anchors raw HTML

---
 test/unit/server/utils/readme.spec.ts | 50 +++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/test/unit/server/utils/readme.spec.ts b/test/unit/server/utils/readme.spec.ts
index 1d234610b..099459754 100644
--- a/test/unit/server/utils/readme.spec.ts
+++ b/test/unit/server/utils/readme.spec.ts
@@ -583,6 +583,56 @@ describe('HTML output', () => {
     expect(result.html).toContain('id="user-content-api"')
     expect(result.html).toContain('id="user-content-api-1"')
   })
+
+  it('preserves supported attributes on raw HTML headings', async () => {
+    const md = '<h1 align="center">My Package</h1>'
+    const result = await renderReadmeHtml(md, 'test-pkg')
+
+    expect(result.html).toContain('id="user-content-my-package"')
+    expect(result.html).toContain('align="center"')
+  })
+
+  it('preserves supported attributes on rewritten raw HTML anchors (renderer.html path)', async () => {
+    const md = [
+      '<div>',
+      '  <a href="https://stackblitz.com/edit/my-demo" title="Open demo">Open in StackBlitz</a>',
+      '</div>',
+    ].join('\n')
+    const result = await renderReadmeHtml(md, 'test-pkg')
+
+    expect(result.html).toContain('href="https://stackblitz.com/edit/my-demo"')
+    expect(result.html).toContain('title="Open demo"')
+    expect(result.html).toContain('rel="nofollow noreferrer noopener"')
+    expect(result.html).toContain('target="_blank"')
+  })
+
+  it('preserves title when it appears before href (renderer.html path)', async () => {
+    const md = [
+      '<div>',
+      '  <a title="Open demo" href="https://stackblitz.com/edit/my-demo">Open in StackBlitz</a>',
+      '</div>',
+    ].join('\n')
+    const result = await renderReadmeHtml(md, 'test-pkg')
+
+    expect(result.html).toContain('title="Open demo"')
+    expect(result.html).toContain('href="https://stackblitz.com/edit/my-demo"')
+    expect(result.html).toContain('rel="nofollow noreferrer noopener"')
+    expect(result.html).toContain('target="_blank"')
+  })
+
+  it('overrides existing rel and target instead of duplicating them (renderer.html path)', async () => {
+    const md = [
+      '<div>',
+      '  <a href="https://stackblitz.com/edit/my-demo" rel="bookmark" target="_self" title="Open demo">Open in StackBlitz</a>',
+      '</div>',
+    ].join('\n')
+    const result = await renderReadmeHtml(md, 'test-pkg')
+
+    expect(result.html).toContain('rel="nofollow noreferrer noopener"')
+    expect(result.html).toContain('target="_blank"')
+    expect(result.html).not.toContain('rel="bookmark"')
+    expect(result.html).not.toContain('target="_self"')
+  })
 })
 
 /**

From dcda67fdd5fab62a1f29a538cbf7af7166aac00a Mon Sep 17 00:00:00 2001
From: rygrit <ry_grit@sina.com>
Date: Sun, 1 Mar 2026 22:30:43 +0800
Subject: [PATCH 6/7] fix: heading & anchor for ALLOW_ATTR

---
 server/utils/readme.ts | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/server/utils/readme.ts b/server/utils/readme.ts
index 611ad2725..f2441a9ea 100644
--- a/server/utils/readme.ts
+++ b/server/utils/readme.ts
@@ -280,6 +280,16 @@ function toUserContentHash(value: string): string {
   return `#${withUserContentPrefix(value)}`
 }
 
+function normalizePreservedAnchorAttrs(attrs: string): string {
+  const cleanedAttrs = attrs
+    .replace(/\s+href\s*=\s*("[^"]*"|'[^']*'|[^\s>]+)/gi, '')
+    .replace(/\s+rel\s*=\s*("[^"]*"|'[^']*'|[^\s>]+)/gi, '')
+    .replace(/\s+target\s*=\s*("[^"]*"|'[^']*'|[^\s>]+)/gi, '')
+    .trim()
+
+  return cleanedAttrs ? ` ${cleanedAttrs}` : ''
+}
+
 const isNpmJsUrlThatCanBeRedirected = (url: URL) => {
   if (!npmJsHosts.has(url.host)) {
     return false
@@ -440,7 +450,7 @@ export async function renderReadmeHtml(
   let lastSemanticLevel = 2 // Start after h2 (the "Readme" section heading)
 
   // Shared heading processing for both markdown and HTML headings
-  function processHeading(depth: number, plainText: string) {
+  function processHeading(depth: number, plainText: string, preservedAttrs = '') {
     const semanticLevel = calculateSemanticDepth(depth, lastSemanticLevel)
     lastSemanticLevel = semanticLevel
 
@@ -456,7 +466,7 @@ export async function renderReadmeHtml(
       toc.push({ text: plainText, id, depth })
     }
 
-    return `<h${semanticLevel} id="${id}" data-level="${depth}"><a href="#${id}">${plainText}</a></h${semanticLevel}>\n`
+    return `<h${semanticLevel} id="${id}" data-level="${depth}"${preservedAttrs}><a href="#${id}">${plainText}</a></h${semanticLevel}>\n`
   }
 
   renderer.heading = function ({ tokens, depth }: Tokens.Heading) {
@@ -468,18 +478,21 @@ export async function renderReadmeHtml(
   // Intercept HTML headings so they get id, TOC entry, and correct semantic level.
   // Also intercept raw HTML <a> tags so playground links are collected in the same pass.
   const htmlHeadingRe = /<h([1-6])(\s[^>]*)?>([\s\S]*?)<\/h\1>/gi
-  const htmlAnchorRe = /<a\s[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/gi
+  const htmlAnchorRe = /<a(\s[^>]*?)href=(["'])([^"']*)\2([^>]*)>([\s\S]*?)<\/a>/gi
   renderer.html = function ({ text }: Tokens.HTML) {
-    let result = text.replace(htmlHeadingRe, (_, level, _attrs, inner) => {
+    let result = text.replace(htmlHeadingRe, (_, level, attrs, inner) => {
       const depth = parseInt(level)
       const plainText = decodeHtmlEntities(stripHtmlTags(inner).trim())
-      return processHeading(depth, plainText).trimEnd()
+      const align = /\balign=(["'])(.*?)\1/i.exec(attrs)?.[2]
+      const preservedAttrs = align ? ` align="${align}"` : ''
+      return processHeading(depth, plainText, preservedAttrs).trimEnd()
     })
     // Process raw HTML <a> tags for playground link collection and URL resolution
-    result = result.replace(htmlAnchorRe, (_full, href, inner) => {
+    result = result.replace(htmlAnchorRe, (_full, beforeHref, _quote, href, afterHref, inner) => {
       const label = decodeHtmlEntities(stripHtmlTags(inner).trim())
       const { resolvedHref, extraAttrs } = processLink(href, label)
-      return `<a href="${resolvedHref}"${extraAttrs}>${inner}</a>`
+      const preservedAttrs = normalizePreservedAnchorAttrs(`${beforeHref ?? ''}${afterHref ?? ''}`)
+      return `<a${preservedAttrs} href="${resolvedHref}"${extraAttrs}>${inner}</a>`
     })
     return result
   }

From 8f8e3c0b1e5869a2c5b1f03122bb408c8fa7f2a8 Mon Sep 17 00:00:00 2001
From: RYGRIT <ry_grit@sina.com>
Date: Mon, 2 Mar 2026 09:08:33 +0800
Subject: [PATCH 7/7] fix:

---
 server/utils/readme.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server/utils/readme.ts b/server/utils/readme.ts
index f2441a9ea..297dc5bcd 100644
--- a/server/utils/readme.ts
+++ b/server/utils/readme.ts
@@ -480,7 +480,7 @@ export async function renderReadmeHtml(
   const htmlHeadingRe = /<h([1-6])(\s[^>]*)?>([\s\S]*?)<\/h\1>/gi
   const htmlAnchorRe = /<a(\s[^>]*?)href=(["'])([^"']*)\2([^>]*)>([\s\S]*?)<\/a>/gi
   renderer.html = function ({ text }: Tokens.HTML) {
-    let result = text.replace(htmlHeadingRe, (_, level, attrs, inner) => {
+    let result = text.replace(htmlHeadingRe, (_, level, attrs = '', inner) => {
       const depth = parseInt(level)
       const plainText = decodeHtmlEntities(stripHtmlTags(inner).trim())
       const align = /\balign=(["'])(.*?)\1/i.exec(attrs)?.[2]