diff --git a/packages/lexical-markdown/src/MarkdownTransformers.ts b/packages/lexical-markdown/src/MarkdownTransformers.ts index cf9c5eca8c9..ae1b70afe1d 100644 --- a/packages/lexical-markdown/src/MarkdownTransformers.ts +++ b/packages/lexical-markdown/src/MarkdownTransformers.ts @@ -198,17 +198,20 @@ export type TextMatchTransformer = Readonly<{ type: 'text-match'; }>; -const ORDERED_LIST_REGEX = /^(\s*)(\d{1,})\.\s/; +const EMPTY_OR_WHITESPACE_ONLY = /^[\t ]*$/; +const ORDERED_LIST_REGEX = /^(\s*)(\d+)\.\s/; const UNORDERED_LIST_REGEX = /^(\s*)[-*+]\s/; const CHECK_LIST_REGEX = /^(\s*)(?:-\s)?\s?(\[(\s|x)?\])\s/i; const HEADING_REGEX = /^(#{1,6})\s/; const QUOTE_REGEX = /^>\s/; -const CODE_START_REGEX = /^[ \t]*```([\w-]+)?/; -const CODE_END_REGEX = /[ \t]*```$/; +const CODE_START_REGEX = /^[ \t]*(?:```|\\`\\`\\`)([\w-]+)?/; +const CODE_END_REGEX = /^[ \t]*(?:```|\\`\\`\\`)$/; const CODE_SINGLE_LINE_REGEX = /^[ \t]*```[^`]+(?:(?:`{1,2}|`{4,})[^`]+)*```(?:[^`]|$)/; -const TABLE_ROW_REG_EXP = /^(?:\|)(.+)(?:\|)\s?$/; +const TABLE_ROW_REG_EXP = /^\|(.+)\|\s?$/; const TABLE_ROW_DIVIDER_REG_EXP = /^(\| ?:?-*:? ?)+\|\s?$/; +const TAG_START_REGEX = /^[ \t]*<[a-z_][\w-]*(?:\s[^<>]*)?\/?>/i; +const TAG_END_REGEX = /^[ \t]*<\/[a-z_][\w-]*\s*>/i; const createBlockNode = ( createNode: (match: Array) => ElementNode, @@ -602,6 +605,7 @@ export function normalizeMarkdown( const lines = input.split('\n'); let inCodeBlock = false; const sanitizedLines: string[] = []; + let nestedDeepCodeBlock = 0; for (let i = 0; i < lines.length; i++) { const line = lines[i]; @@ -613,9 +617,24 @@ export function normalizeMarkdown( continue; } - // Detect the start or end of a code block - if (CODE_START_REGEX.test(line) || CODE_END_REGEX.test(line)) { - inCodeBlock = !inCodeBlock; + if (CODE_END_REGEX.test(line)) { + if (nestedDeepCodeBlock === 0) { + inCodeBlock = true; + } + if (nestedDeepCodeBlock === 1) { + inCodeBlock = false; + } + if (nestedDeepCodeBlock > 0) { + nestedDeepCodeBlock--; + } + sanitizedLines.push(line); + continue; + } + + // Toggle inCodeBlock state when encountering start or end of a code block + if (CODE_START_REGEX.test(line)) { + inCodeBlock = true; + nestedDeepCodeBlock++; sanitizedLines.push(line); continue; } @@ -629,8 +648,8 @@ export function normalizeMarkdown( // In markdown the concept of "empty paragraphs" does not exist. // Blocks must be separated by an empty line. Non-empty adjacent lines must be merged. if ( - line === '' || - lastLine === '' || + EMPTY_OR_WHITESPACE_ONLY.test(line) || + EMPTY_OR_WHITESPACE_ONLY.test(lastLine!) || !lastLine || HEADING_REGEX.test(lastLine) || HEADING_REGEX.test(line) || @@ -640,11 +659,16 @@ export function normalizeMarkdown( CHECK_LIST_REGEX.test(line) || TABLE_ROW_REG_EXP.test(line) || TABLE_ROW_DIVIDER_REG_EXP.test(line) || - !shouldMergeAdjacentLines + !shouldMergeAdjacentLines || + TAG_START_REGEX.test(line) || + TAG_END_REGEX.test(line) || + TAG_START_REGEX.test(lastLine) || + TAG_END_REGEX.test(lastLine) || + CODE_END_REGEX.test(lastLine) ) { sanitizedLines.push(line); } else { - sanitizedLines[sanitizedLines.length - 1] = lastLine + line; + sanitizedLines[sanitizedLines.length - 1] = lastLine + ' ' + line.trim(); } } diff --git a/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts b/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts index ca1b5a9175d..5f19b104aa7 100644 --- a/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts +++ b/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts @@ -277,7 +277,7 @@ describe('Markdown', () => { }, { // Multiline paragraphs: https://spec.commonmark.org/dingus/?text=Hello%0Aworld%0A! - html: '

Helloworld!

', + html: '

Hello world !

', md: ['Hello', 'world', '!'].join('\n'), shouldMergeAdjacentLines: true, skipExport: true, @@ -303,7 +303,7 @@ describe('Markdown', () => { // }, { // Multiline list items: https://spec.commonmark.org/dingus/?text=-%20Hello%0A-%20world%0A!%0A! - html: '', + html: '', md: '- Hello\n- world\n!\n!', shouldMergeAdjacentLines: true, skipExport: true, @@ -396,7 +396,7 @@ describe('Markdown', () => { mdAfterExport: '*Hello **world**!*', }, { - html: '

helloworld

', + html: '

hello world

', md: 'hello\nworld', shouldMergeAdjacentLines: true, skipExport: true, @@ -504,7 +504,7 @@ describe('Markdown', () => { }, { // https://spec.commonmark.org/dingus/?text=%3E%20Hello%0Aworld%0A! - html: '
Helloworld!
', + html: '
Hello world !
', md: '> Hello\nworld\n!', shouldMergeAdjacentLines: true, skipExport: true, @@ -953,7 +953,7 @@ E2 E3 `; expect(normalizeMarkdown(markdown, true)).toBe(` -A1A2 +A1 A2 A3 @@ -964,7 +964,7 @@ B2 B3 \`\`\` -C1C2 +C1 C2 C3 @@ -977,7 +977,7 @@ D3 \`\`\`single line code\`\`\` -E1E2 +E1 E2 E3 `); @@ -1070,3 +1070,64 @@ E3 expect(normalizeMarkdown(markdown, false)).toBe(markdown); }); }); + +describe.skip('normalizeMarkdown – new behaviors', () => { + it('merges adjacent plain text lines with a single space', () => { + const md = `Hello +world`; + expect(normalizeMarkdown(md, true)).toBe(`Hello world`); + }); + + it('merges while trimming the next line and inserting a single space', () => { + const md = `Hello + world `; + expect(normalizeMarkdown(md, true)).toBe(`Hello world`); + }); + + it('does not merge across HTML-like tags (opening, content, closing, after)', () => { + const md = `
+content +
+after`; + // Nothing should be merged + expect(normalizeMarkdown(md, true)).toBe(md); + }); + + it('does not merge the fence line with the first line after a code block', () => { + const md = '```\ncode\n```\nNext line'; + // The closing ``` must remain on its own line; "Next line" must not be glued to it + expect(normalizeMarkdown(md, true)).toBe('```\ncode\n```\nNext line'); + }); + + it('treats whitespace-only lines as empty separators (no merge across them)', () => { + const md = `A1 + +A2`; + // The middle line is spaces only; should be treated as an empty separator + expect(normalizeMarkdown(md, true)).toBe(`A1 + +A2`); + }); + + it('handles a code block that contains a literal ``` line without breaking merging outside', () => { + const md = `Intro +para +\`\`\`md +some code +\`\`\` +still code +\`\`\` +Outro +text`; + // Outside the fenced block, adjacent non-empty lines should merge with a space + expect(normalizeMarkdown(md, true)).toBe( + `Intro para +\`\`\`md +some code +\`\`\` +still code +\`\`\` +Outro text`, + ); + }); +});