From 8d6189a59a2521128f5179a0ca5c475f6148fb05 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Fri, 26 Apr 2019 10:21:05 -0700 Subject: [PATCH 01/21] Add `cmark_strbuf_remove` that removes a subrange of characters. --- src/buffer.c | 5 +++++ src/buffer.h | 9 +++++++++ 2 files changed, 14 insertions(+) mode change 100644 => 100755 src/buffer.c mode change 100644 => 100755 src/buffer.h diff --git a/src/buffer.c b/src/buffer.c old mode 100644 new mode 100755 index d94649310..9300c4553 --- a/src/buffer.c +++ b/src/buffer.c @@ -242,6 +242,11 @@ void cmark_strbuf_trim(cmark_strbuf *buf) { cmark_strbuf_rtrim(buf); } +void cmark_strbuf_remove(cmark_strbuf *buf, bufsize_t start_offset, bufsize_t len) { + memmove(buf->ptr + start_offset, buf->ptr + start_offset + len, buf->size - (start_offset + len)); + buf->size -= len; +} + // Destructively modify string, collapsing consecutive // space and newline characters into a single space. void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) { diff --git a/src/buffer.h b/src/buffer.h old mode 100644 new mode 100755 index e8780753f..4a6db433d --- a/src/buffer.h +++ b/src/buffer.h @@ -72,6 +72,15 @@ void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n); void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len); void cmark_strbuf_rtrim(cmark_strbuf *buf); void cmark_strbuf_trim(cmark_strbuf *buf); + +/** + Removes the characters in the given range. + + @param buf The string buffer. + @param start_offset The starting character offset. + @param len The length of characters to remove. + */ +void cmark_strbuf_remove(cmark_strbuf *buf, bufsize_t start_offset, bufsize_t len); void cmark_strbuf_normalize_whitespace(cmark_strbuf *s); void cmark_strbuf_unescape(cmark_strbuf *s); From e3d06246e079630491bb6c838e23c272d8557ba8 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Fri, 26 Apr 2019 10:29:53 -0700 Subject: [PATCH 02/21] Fix source positions for inlines inside inconsistently indented blocks. --- src/blocks.c | 39 ++++++++++++++++++++++++-- src/inlines.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 109 insertions(+), 8 deletions(-) mode change 100644 => 100755 src/blocks.c mode change 100644 => 100755 src/inlines.c diff --git a/src/blocks.c b/src/blocks.c old mode 100644 new mode 100755 index b6077eb96..b86e83287 --- a/src/blocks.c +++ b/src/blocks.c @@ -183,8 +183,37 @@ static void add_line(cmark_node *node, cmark_chunk *ch, cmark_parser *parser) { cmark_strbuf_putc(&node->content, ' '); } } - cmark_strbuf_put(&node->content, ch->data + parser->offset, - ch->len - parser->offset); + + // If inserting the initial line to the node... + if (node->content.size == 0 + // OR the node is a code block... + || node->type == CMARK_NODE_CODE_BLOCK + // OR the node is a HTML block. + || node->type == CMARK_NODE_HTML_BLOCK) { + + // Then do not insert the leading trivia. + cmark_strbuf_put(&node->content, ch->data + parser->offset, + ch->len - parser->offset); + } else { + // Special case for maintaining the source position of block quotes + // as they can be lazy (i.e. the block quote marker can be omitted). + // + // The simple solution is to replace any block quote markers (">") + // present in the leading trivia with whitespace. + // + // Note: Using `parser->offset` and not `parser->first_nonspace` + // because the latter encompasses the former with the addition of + // whitespace (which we are not interested in). + assert(parser->offset <= parser->first_nonspace); + for (int i = 0; i < parser->offset; i++) { + if (peek_at(ch, i) == '>') + ch->data[i] = ' '; + } + + // Otherwise, do not remove leading trivia for appends (i.e. lines + // other than the first). + cmark_strbuf_put(&node->content, ch->data, ch->len); + } } static void remove_trailing_blank_lines(cmark_strbuf *ln) { @@ -242,6 +271,12 @@ static bool resolve_reference_link_definitions( chunk.data += pos; chunk.len -= pos; + + // Leading whitespace is not stripped. + while (cmark_isspace(peek_at(&chunk, 0))) { + chunk.data += 1; + chunk.len -= 1; + } } cmark_strbuf_drop(node_content, (node_content->size - chunk.len)); return !is_blank(&b->content, 0); diff --git a/src/inlines.c b/src/inlines.c old mode 100644 new mode 100755 index e6b491ffa..c83a259e4 --- a/src/inlines.c +++ b/src/inlines.c @@ -327,10 +327,10 @@ static bufsize_t scan_to_closing_backticks(subject *subj, // spaces, then removing a single leading + trailing space, // unless the code span consists entirely of space characters. static void S_normalize_code(cmark_strbuf *s) { - bufsize_t r, w; + bufsize_t r, w, last_char_after_nl; bool contains_nonspace = false; - for (r = 0, w = 0; r < s->size; ++r) { + for (r = 0, w = 0, last_char_after_nl = 0; r < s->size; ++r) { switch (s->ptr[r]) { case '\r': if (s->ptr[r + 1] != '\n') { @@ -339,8 +339,25 @@ static void S_normalize_code(cmark_strbuf *s) { break; case '\n': s->ptr[w++] = ' '; + last_char_after_nl = w; + break; + case ' ': + s->ptr[w++] = s->ptr[r]; break; default: + if (last_char_after_nl) { + // Remove leading whitespace. + bufsize_t remove_len = r - last_char_after_nl; + + if (remove_len) { + cmark_strbuf_remove(s, last_char_after_nl, remove_len); + w -= remove_len; + r -= remove_len; + } + + last_char_after_nl = 0; + } + s->ptr[w++] = s->ptr[r]; } if (s->ptr[r] != ' ') { @@ -348,6 +365,20 @@ static void S_normalize_code(cmark_strbuf *s) { } } + if (last_char_after_nl) { + // Remove leading whitespace. Only reach here if the closing backquote + // delimiter is on its own line. + bufsize_t remove_len = r - last_char_after_nl; + + if (remove_len) { + cmark_strbuf_remove(s, last_char_after_nl, remove_len); + w -= remove_len; + r -= remove_len; + } + + last_char_after_nl = 0; + } + // begins and ends with space? if (contains_nonspace && s->ptr[0] == ' ' && s->ptr[w - 1] == ' ') { @@ -363,13 +394,15 @@ static void S_normalize_code(cmark_strbuf *s) { // Parse backtick code section or raw backticks, return an inline. // Assumes that the subject has a backtick at the current position. static cmark_node *handle_backticks(subject *subj, int options) { + // Save the current source position in case of need to rewind. + bufsize_t subjpos = subj->pos; cmark_chunk openticks = take_while(subj, isbacktick); bufsize_t startpos = subj->pos; bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len); if (endpos == 0) { // not found subj->pos = startpos; // rewind - return make_str(subj, subj->pos, subj->pos, openticks); + return make_str(subj, subjpos, subjpos, openticks); } else { cmark_strbuf buf = CMARK_BUF_INIT(subj->mem); @@ -772,6 +805,10 @@ static cmark_node *handle_backslash(subject *subj) { advance(subj); return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1)); } else if (!is_eof(subj) && skip_line_end(subj)) { + // Adjust the subject source position state. + ++subj->line; + subj->column_offset = -subj->pos; + return make_linebreak(subj->mem); } else { return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("\\")); @@ -852,7 +889,8 @@ static cmark_node *handle_pointy_brace(subject *subj, int options) { contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; - return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 0); + return make_autolink(subj, subj->pos + subj->column_offset - 1 - matchlen, + subj->pos + subj->column_offset - 1, contents, 0); } // next try to match an email autolink @@ -861,7 +899,8 @@ static cmark_node *handle_pointy_brace(subject *subj, int options) { contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; - return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 1); + return make_autolink(subj, subj->pos + subj->column_offset - 1 - matchlen, + subj->pos + subj->column_offset - 1, contents, 1); } // finally, try to match an html tag @@ -1106,7 +1145,8 @@ static cmark_node *handle_close_bracket(subject *subj) { inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK); inl->as.link.url = url; inl->as.link.title = title; - inl->start_line = inl->end_line = subj->line; + inl->start_line = opener->inl_text->start_line; + inl->end_line = subj->line; inl->start_column = opener->inl_text->start_column; inl->end_column = subj->pos + subj->column_offset + subj->block_offset; cmark_node_insert_before(opener->inl_text, inl); @@ -1217,10 +1257,21 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { cmark_chunk contents; unsigned char c; bufsize_t startpos, endpos; + int saved_block_offset = subj->block_offset; + c = peek_char(subj); if (c == 0) { return 0; } + + // If NOT the subject's initial line... + if (subj->column_offset != 0) { + // Reset the block offset. The line's leading trivia was not trimmed, + // so the source position will be computed appropriately without the + // block offset. + subj->block_offset = 0; + } + switch (c) { case '\r': case '\n': @@ -1279,12 +1330,27 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { cmark_chunk_rtrim(&contents); } + // If not the initial line (in the subject) AND at the beginning of another line. + if (subj->column_offset != 0 && startpos + subj->column_offset == 0) { + // Trim leading whitespace. + bufsize_t before_trim = contents.len; + cmark_chunk_ltrim(&contents); + + if (contents.len == 0) + break; // The contents were only whitespaces. + + // Update the start source position. + startpos += before_trim - contents.len; + } + new_inl = make_str(subj, startpos, endpos - 1, contents); } if (new_inl != NULL) { cmark_node_append_child(parent, new_inl); } + subj->block_offset = saved_block_offset; + return 1; } From 9e40723015a6909d94ee7bf218cb6175de86cba5 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Fri, 26 Apr 2019 10:38:37 -0700 Subject: [PATCH 03/21] Add three additional source position tests. --- api_test/main.c | 251 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 190 insertions(+), 61 deletions(-) mode change 100644 => 100755 api_test/main.c diff --git a/api_test/main.c b/api_test/main.c old mode 100644 new mode 100755 index 83afbff0c..caa3eb0cb --- a/api_test/main.c +++ b/api_test/main.c @@ -889,68 +889,170 @@ static void test_feed_across_line_ending(test_batch_runner *runner) { } static void source_pos(test_batch_runner *runner) { - static const char markdown[] = - "# Hi *there*.\n" - "\n" - "Hello “ \n" - "there `hi` -- [okay](www.google.com (ok)).\n" - "\n" - "> 1. Okay.\n" - "> Sure.\n" - ">\n" - "> 2. Yes, okay.\n" - "> ![ok](hi \"yes\")\n"; + { + static const char markdown[] = + "# Hi *there*.\n" + "\n" + "Hello “ \n" + "there `hi` -- [okay](www.google.com (ok)).\n" + "\n" + "> 1. Okay.\n" + "> Sure.\n" + ">\n" + "> 2. Yes, okay.\n" + "> ![ok](hi \"yes\")\n"; - cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); - char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); - STR_EQ(runner, xml, "\n" - "\n" - "\n" - " \n" - " Hi \n" - " \n" - " there\n" - " \n" - " .\n" - " \n" - " \n" - " Hello “ \n" - " \n" - " http://www.google.com\n" - " \n" - " \n" - " there \n" - " hi\n" - " -- \n" - " \n" - " okay\n" - " \n" - " .\n" - " \n" - " \n" - " \n" - " \n" - " \n" - " Okay.\n" - " \n" - " Sure.\n" - " \n" - " \n" - " \n" - " \n" - " Yes, okay.\n" - " \n" - " \n" - " ok\n" - " \n" - " \n" - " \n" - " \n" - " \n" - "\n", - "sourcepos are as expected"); - free(xml); - cmark_node_free(doc); + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " Hi \n" + " \n" + " there\n" + " \n" + " .\n" + " \n" + " \n" + " Hello \xe2\x80\x9c \n" + " \n" + " http://www.google.com\n" + " \n" + " \n" + " there \n" + " hi\n" + " -- \n" + " \n" + " okay\n" + " \n" + " .\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " Okay.\n" + " \n" + " Sure.\n" + " \n" + " \n" + " \n" + " \n" + " Yes, okay.\n" + " \n" + " \n" + " ok\n" + " \n" + " \n" + " \n" + " \n" + " \n" + "\n", + "sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } + { + static const char markdown[] = + "1. **Start condition:** line begins with the string ``, or the end of the line.\\\n" + " **End condition:** line contains an end tag\n" + " ``, ``, or `` (case-insensitive; it\n" + " need not match the start tag).\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " \n" + " \n" + " \n" + " Start condition:\n" + " \n" + " line begins with the string \n" + " <script\n" + " ,\n" + " \n" + " <pre\n" + " , or \n" + " <style\n" + " (case-insensitive), followed by whitespace,\n" + " \n" + " the string \n" + " >\n" + " , or the end of the line.\n" + " \n" + " \n" + " End condition:\n" + " \n" + " line contains an end tag\n" + " \n" + " </script>\n" + " , \n" + " </pre>\n" + " , or \n" + " </style>\n" + " (case-insensitive; it\n" + " \n" + " need not match the start tag).\n" + " \n" + " \n" + " \n" + "\n", + "list (with EOL backslash) sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } + { + static const char markdown[] = + "> The overriding design goal for Markdown's formatting syntax is\n" + " > to make it as **readable as possible**. The idea is that a\n" + "> Markdown-formatted document should be publishable as-is, as\n" + " > plain text, without *looking like* it's been marked up with tags\n" + " > or formatting instructions.\n" + "> ()\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " \n" + " The overriding design goal for Markdown's formatting syntax is\n" + " \n" + " to make it as \n" + " \n" + " readable as possible\n" + " \n" + " . The idea is that a\n" + " \n" + " Markdown-formatted document should be publishable as-is, as\n" + " \n" + " plain text, without \n" + " \n" + " looking like\n" + " \n" + " it's been marked up with tags\n" + " \n" + " or formatting instructions.\n" + " \n" + " (\n" + " \n" + " http://daringfireball.net/projects/markdown/\n" + " \n" + " )\n" + " \n" + " \n" + "\n", + "inconsistently indented blockquote sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } } static void source_pos_inlines(test_batch_runner *runner) { @@ -998,6 +1100,33 @@ static void source_pos_inlines(test_batch_runner *runner) { free(xml); cmark_node_free(doc); } + { + static const char markdown[] = + "This link will have two [soft \n" + "line\n" + " breaks](https://commonmark.org)."; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " This link will have three \n" + " \n" + " soft\n" + " \n" + " line\n" + " \n" + " breaks\n" + " \n" + " .\n" + " \n" + "\n", + "autolink sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } } static void ref_source_pos(test_batch_runner *runner) { From 01dd296ddfaa52b28c3a730bd65d0cd37030e1f7 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Fri, 26 Apr 2019 12:07:29 -0700 Subject: [PATCH 04/21] Fix outdated expected test result. --- api_test/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api_test/main.c b/api_test/main.c index caa3eb0cb..b7247808b 100755 --- a/api_test/main.c +++ b/api_test/main.c @@ -1112,9 +1112,9 @@ static void source_pos_inlines(test_batch_runner *runner) { "\n" "\n" " \n" - " This link will have three \n" - " \n" - " soft\n" + " This link will have two \n" + " \n" + " soft\n" " \n" " line\n" " \n" From 0df0155ba1bdf7771f761730d8ffef4119ad9266 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Tue, 30 Apr 2019 19:37:52 -0700 Subject: [PATCH 05/21] Fix autolink source position. --- src/inlines.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/inlines.c b/src/inlines.c index c83a259e4..7c8a32f41 100755 --- a/src/inlines.c +++ b/src/inlines.c @@ -156,8 +156,8 @@ static CMARK_INLINE cmark_node *make_autolink(subject *subj, link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email); link->as.link.title = cmark_chunk_literal(""); link->start_line = link->end_line = subj->line; - link->start_column = start_column + 1; - link->end_column = end_column + 1; + link->start_column = subj->column_offset + subj->block_offset + start_column + 1; + link->end_column = subj->column_offset + subj->block_offset + end_column + 1; cmark_node_append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url)); return link; } @@ -889,8 +889,7 @@ static cmark_node *handle_pointy_brace(subject *subj, int options) { contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; - return make_autolink(subj, subj->pos + subj->column_offset - 1 - matchlen, - subj->pos + subj->column_offset - 1, contents, 0); + return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 0); } // next try to match an email autolink @@ -899,8 +898,7 @@ static cmark_node *handle_pointy_brace(subject *subj, int options) { contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; - return make_autolink(subj, subj->pos + subj->column_offset - 1 - matchlen, - subj->pos + subj->column_offset - 1, contents, 1); + return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 1); } // finally, try to match an html tag From a0b8f1e3f148b086a45727033dea95abd9d7966f Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Tue, 30 Apr 2019 19:39:16 -0700 Subject: [PATCH 06/21] Fix expected autolink test fixture, and add an additional autolink source position test case. --- api_test/main.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/api_test/main.c b/api_test/main.c index b7247808b..d8e5035a4 100755 --- a/api_test/main.c +++ b/api_test/main.c @@ -1043,7 +1043,7 @@ static void source_pos(test_batch_runner *runner) { " \n" " (\n" " \n" - " http://daringfireball.net/projects/markdown/\n" + " http://daringfireball.net/projects/markdown/\n" " \n" " )\n" " \n" @@ -1127,6 +1127,34 @@ static void source_pos_inlines(test_batch_runner *runner) { free(xml); cmark_node_free(doc); } + { + static const char markdown[] = + " 1. \n" + " \n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " \n" + " \n" + " \n" + " http://www.google.com\n" + " \n" + " \n" + " \n" + " http://www.google.com\n" + " \n" + " \n" + " \n" + " \n" + "\n", + "autolink (in list) sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } } static void ref_source_pos(test_batch_runner *runner) { From 923d781ff15397a9c5371097193761273b258e34 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Thu, 2 May 2019 19:00:17 -0700 Subject: [PATCH 07/21] Fix source position of setext headings. --- src/blocks.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index b86e83287..c74e9d217 100755 --- a/src/blocks.c +++ b/src/blocks.c @@ -299,8 +299,7 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { b->end_line = parser->line_number; b->end_column = parser->last_line_length; } else if (S_type(b) == CMARK_NODE_DOCUMENT || - (S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced) || - (S_type(b) == CMARK_NODE_HEADING && b->as.heading.setext)) { + (S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced)) { b->end_line = parser->line_number; b->end_column = parser->curline.size; if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n') From 8da9153650dc36322b539ad864f139470a915cc7 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Thu, 2 May 2019 19:00:50 -0700 Subject: [PATCH 08/21] Add setext heading test case. --- api_test/main.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/api_test/main.c b/api_test/main.c index d8e5035a4..0c2189bf2 100755 --- a/api_test/main.c +++ b/api_test/main.c @@ -1155,6 +1155,28 @@ static void source_pos_inlines(test_batch_runner *runner) { free(xml); cmark_node_free(doc); } + { + static const char markdown[] = + "Level 1 Heading\n" + "===============\n" + "A paragraph.\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " Level 1 Heading\n" + " \n" + " \n" + " A paragraph.\n" + " \n" + "\n", + "heading sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } } static void ref_source_pos(test_batch_runner *runner) { From 7bc130ce44f6e978a9c83faf75fd05e6c7cf9e03 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Fri, 3 May 2019 13:43:43 -0700 Subject: [PATCH 09/21] Fix source position for ATX-style headings. --- src/blocks.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/blocks.c b/src/blocks.c index c74e9d217..691ba93f5 100755 --- a/src/blocks.c +++ b/src/blocks.c @@ -294,6 +294,10 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { CMARK_NODE__OPEN); // shouldn't call finalize on closed blocks b->flags &= ~CMARK_NODE__OPEN; + if (S_type(b) == CMARK_NODE_HEADING && !b->as.heading.setext) { + parser->last_line_length += b->end_column; + } + if (parser->curline.size == 0) { // end of input - line number has not been incremented b->end_line = parser->line_number; @@ -1219,7 +1223,10 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container, } else if (accepts_lines(S_type(container))) { if (S_type(container) == CMARK_NODE_HEADING && container->as.heading.setext == false) { + bufsize_t original_len = input->len; chop_trailing_hashtags(input); + // Substract one to exclude the trailing newline. + container->end_column += original_len - input->len - 1; } S_advance_offset(parser, input, parser->first_nonspace - parser->offset, false); From 8566ec6fe27dc02d9820bc1e414aadcd39ccf6d9 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Fri, 3 May 2019 13:44:20 -0700 Subject: [PATCH 10/21] Add ATX-style heading source position test case. --- api_test/main.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/api_test/main.c b/api_test/main.c index 0c2189bf2..43a1c4060 100755 --- a/api_test/main.c +++ b/api_test/main.c @@ -1177,6 +1177,30 @@ static void source_pos_inlines(test_batch_runner *runner) { free(xml); cmark_node_free(doc); } + { + static const char markdown[] = + "# This is an H1 #\n" + "\n" + "> # Header 1 #\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " This is an H1\n" + " \n" + " \n" + " \n" + " Header 1\n" + " \n" + " \n" + "\n", + "atx heading sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } } static void ref_source_pos(test_batch_runner *runner) { From d16c3d06bb1214eac93576756514408f7903ac54 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Mon, 6 May 2019 10:28:34 -0700 Subject: [PATCH 11/21] Fix HTMl block source position. --- src/blocks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/blocks.c b/src/blocks.c index 691ba93f5..e5b0c3e33 100755 --- a/src/blocks.c +++ b/src/blocks.c @@ -303,7 +303,8 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { b->end_line = parser->line_number; b->end_column = parser->last_line_length; } else if (S_type(b) == CMARK_NODE_DOCUMENT || - (S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced)) { + (S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced) || + (S_type(b) == CMARK_NODE_HTML_BLOCK)) { b->end_line = parser->line_number; b->end_column = parser->curline.size; if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n') From 33f34518b362778c99ab3431ab4b39691cb28ced Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Mon, 6 May 2019 10:34:40 -0700 Subject: [PATCH 12/21] Add HTML block source position test case. --- api_test/main.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/api_test/main.c b/api_test/main.c index 43a1c4060..92b9d2b50 100755 --- a/api_test/main.c +++ b/api_test/main.c @@ -1053,6 +1053,54 @@ static void source_pos(test_batch_runner *runner) { free(xml); cmark_node_free(doc); } + { + static const char markdown[] = + "\n" + "\n" + "
```javascript\n"
+    "var s = \"JavaScript syntax highlighting\";\n"
+    "alert(s);\n"
+    "```\n"
+    " \n"
+    "```python\n"
+    "s = \"Python syntax highlighting\"\n"
+    "print s\n"
+    "```\n"
+    " \n"
+    "```\n"
+    "No language indicated, so no syntax highlighting. \n"
+    "But let's throw in a <b>tag</b>.\n"
+    "```\n"
+    "
\n" + "\n" + "\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " <pre lang="no-highlight"><code>```javascript\n" + "var s = "JavaScript syntax highlighting";\n" + "alert(s);\n" + "```\n" + " \n" + "```python\n" + "s = "Python syntax highlighting"\n" + "print s\n" + "```\n" + " \n" + "```\n" + "No language indicated, so no syntax highlighting. \n" + "But let's throw in a &lt;b&gt;tag&lt;/b&gt;.\n" + "```\n" + "</code></pre>\n" + "\n" + "\n", + "html block sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } } static void source_pos_inlines(test_batch_runner *runner) { From 52b5ec814db2b81cdae5e88660818183dc222973 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Mon, 6 May 2019 10:47:21 -0700 Subject: [PATCH 13/21] Fix thematic break source position. --- src/blocks.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/blocks.c b/src/blocks.c index e5b0c3e33..f261c7603 100755 --- a/src/blocks.c +++ b/src/blocks.c @@ -294,6 +294,11 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { CMARK_NODE__OPEN); // shouldn't call finalize on closed blocks b->flags &= ~CMARK_NODE__OPEN; + if (S_type(b) == CMARK_NODE_THEMATIC_BREAK) { + // Already been "finalized". + return parent; + } + if (S_type(b) == CMARK_NODE_HEADING && !b->as.heading.setext) { parser->last_line_length += b->end_column; } @@ -1046,6 +1051,10 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, // it's only now that we know the line is not part of a setext heading: *container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK, parser->first_nonspace + 1); + // A thematic break can only be on a single line, so we can set the + // end source position here. + (*container)->end_line = parser->line_number; + (*container)->end_column = input->len - 1; S_advance_offset(parser, input, input->len - 1 - parser->offset, false); } else if ((!indented || cont_type == CMARK_NODE_LIST) && parser->indent < 4 && From 3b10dbcfb7335c3a8ab31fdde2a12d5e7a58f15d Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Mon, 6 May 2019 10:47:57 -0700 Subject: [PATCH 14/21] Add thematic break source position test case. --- api_test/main.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/api_test/main.c b/api_test/main.c index 92b9d2b50..4d7cae909 100755 --- a/api_test/main.c +++ b/api_test/main.c @@ -1101,6 +1101,24 @@ static void source_pos(test_batch_runner *runner) { free(xml); cmark_node_free(doc); } + { + static const char markdown[] = + "---\n" + "\n" + "\n" + "\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + "\n", + "thematic break sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } } static void source_pos_inlines(test_batch_runner *runner) { From 3b2001622e6d4b55c0a4fa6a64f14570b115b7ce Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Tue, 7 May 2019 17:41:25 -0700 Subject: [PATCH 15/21] Fix ending source position for lists and list items. --- src/blocks.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/blocks.c b/src/blocks.c index f261c7603..f8e993266 100755 --- a/src/blocks.c +++ b/src/blocks.c @@ -303,7 +303,11 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { parser->last_line_length += b->end_column; } - if (parser->curline.size == 0) { + if ((S_type(b) == CMARK_NODE_ITEM || S_type(b) == CMARK_NODE_LIST) + && b->last_child) { + b->end_line = b->last_child->end_line; + b->end_column = b->last_child->end_column; + } else if (parser->curline.size == 0) { // end of input - line number has not been incremented b->end_line = parser->line_number; b->end_column = parser->last_line_length; @@ -1111,6 +1115,8 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, // add the list item *container = add_child(parser, *container, CMARK_NODE_ITEM, parser->first_nonspace + 1); +// (*container)->end_line = parser->line_number; +// (*container)->end_column = parser->column; /* TODO: static */ memcpy(&((*container)->as.list), data, sizeof(*data)); parser->mem->free(data); From d15b5d82e5e419383b730f584da90418316fda7c Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Tue, 7 May 2019 18:32:14 -0700 Subject: [PATCH 16/21] Add list/list item source position test case. --- api_test/main.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/api_test/main.c b/api_test/main.c index 4d7cae909..b89b9e202 100755 --- a/api_test/main.c +++ b/api_test/main.c @@ -930,7 +930,7 @@ static void source_pos(test_batch_runner *runner) { " \n" " \n" " \n" - " \n" + " \n" " \n" " Okay.\n" " \n" @@ -1119,6 +1119,51 @@ static void source_pos(test_batch_runner *runner) { free(xml); cmark_node_free(doc); } + { + static const char markdown[] = + "1. List 1, Item A.\n" + " 1. List 2, Item A.\n" + " Second line.\n" + "\n" + "2. List 1, Item B.\n" + " Second line.\n" + "\n" + "\n" + "\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " \n" + " \n" + " List 1, Item A.\n" + " \n" + " \n" + " \n" + " \n" + " List 2, Item A.\n" + " \n" + " Second line.\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " List 1, Item B.\n" + " \n" + " Second line.\n" + " \n" + " \n" + " \n" + "\n", + "list sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } } static void source_pos_inlines(test_batch_runner *runner) { From 712569692cdee67519c76dd8a3f9a75bd372d84a Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Tue, 7 May 2019 18:43:41 -0700 Subject: [PATCH 17/21] Remove commented out code. --- src/blocks.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index f8e993266..5ec185fbb 100755 --- a/src/blocks.c +++ b/src/blocks.c @@ -1115,8 +1115,6 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, // add the list item *container = add_child(parser, *container, CMARK_NODE_ITEM, parser->first_nonspace + 1); -// (*container)->end_line = parser->line_number; -// (*container)->end_column = parser->column; /* TODO: static */ memcpy(&((*container)->as.list), data, sizeof(*data)); parser->mem->free(data); From a8ba56734fcce5512cd3887cea53d92a643b55bd Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Tue, 7 May 2019 19:27:22 -0700 Subject: [PATCH 18/21] Correct list source position. --- src/blocks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/blocks.c b/src/blocks.c index 5ec185fbb..207461749 100755 --- a/src/blocks.c +++ b/src/blocks.c @@ -307,6 +307,12 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { && b->last_child) { b->end_line = b->last_child->end_line; b->end_column = b->last_child->end_column; + + if (S_type(b) == CMARK_NODE_ITEM && b->parent) { + // The finalization order is not deterministic... + b->parent->end_line = b->end_line; + b->parent->end_column = b->end_column; + } } else if (parser->curline.size == 0) { // end of input - line number has not been incremented b->end_line = parser->line_number; From 62f4c3f37e4a6acbf86de1b05bb1f76b789bfef9 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Wed, 15 May 2019 08:22:51 -0700 Subject: [PATCH 19/21] Fix source position for HTML blocks without a matching end condition. --- src/blocks.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/blocks.c b/src/blocks.c index 207461749..e64d1cad7 100755 --- a/src/blocks.c +++ b/src/blocks.c @@ -319,7 +319,7 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { b->end_column = parser->last_line_length; } else if (S_type(b) == CMARK_NODE_DOCUMENT || (S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced) || - (S_type(b) == CMARK_NODE_HTML_BLOCK)) { + (S_type(b) == CMARK_NODE_HTML_BLOCK && b->end_line == b->start_line && b->end_column == 0)) { b->end_line = parser->line_number; b->end_column = parser->curline.size; if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n') @@ -1193,6 +1193,12 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container, } else { // not a lazy continuation // Finalize any blocks that were not matched and set cur to container: while (parser->current != last_matched_container) { + if (S_type(parser->current) == CMARK_NODE_HTML_BLOCK) { + // Edge case: Closing an HTML block without a matching end condition. + parser->current->end_line = parser->line_number - 1; + parser->current->end_column = parser->last_line_length; + } + parser->current = finalize(parser, parser->current); assert(parser->current != NULL); } From b37b6743dd58794c3dd5c51e5b7b8f5a562067d7 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Wed, 15 May 2019 08:23:57 -0700 Subject: [PATCH 20/21] Add test case for source position of a HTML block without a matching end condition. --- api_test/main.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/api_test/main.c b/api_test/main.c index b89b9e202..a8cfaa813 100755 --- a/api_test/main.c +++ b/api_test/main.c @@ -1164,6 +1164,39 @@ static void source_pos(test_batch_runner *runner) { free(xml); cmark_node_free(doc); } + { + static const char markdown[] = + "* List 1, item A.\n" + "\n" + "