diff --git a/src/htmldiff.coffee b/src/htmldiff.coffee index dd5c658..46cb5a1 100644 --- a/src/htmldiff.coffee +++ b/src/htmldiff.coffee @@ -1,23 +1,121 @@ +### + * htmldiff.js is a library that compares HTML content. It creates a diff between two + * HTML documents by combining the two documents and wrapping the differences with + * and tags. Here is a high-level overview of how the diff works. + * + * 1. Tokenize the before and after HTML with html_to_tokens. + * 2. Generate a list of operations that convert the before list of tokens to the after + * list of tokens with calculate_operations, which does the following: + * a. Find all the matching blocks of tokens between the before and after lists of + * tokens with find_matching_blocks. This is done by finding the single longest + * matching block with find_match, then recursively finding the next longest + * matching block that precede and follow the longest matching block with + * recursively_find_matching_blocks. + * b. Determine insertions, deletions, and replacements from the matching blocks. + * This is done in calculate_operations. + * 3. Render the list of operations by wrapping tokens with and tags where + * appropriate with render_operations. + * + * Example usage: + * + * htmldiff = require 'htmldiff.js' + * + * htmldiff '

this is some text

', '

this is some more text

' + * == '

this is some more text

' + * + * htmldiff '

this is some text

', '

this is some more text

', 'diff-class' + * == '

this is some more text

' +### + is_end_of_tag = (char)-> char is '>' is_start_of_tag = (char)-> char is '<' is_whitespace = (char)-> /^\s+$/.test char is_tag = (token)-> /^\s*<[^>]+>\s*$/.test token isnt_tag = (token)-> not is_tag token +### + * Checks if the current word is the beginning of an atomic tag. An atomic tag is one whose + * child nodes should not be compared - the entire tag should be treated as one token. This + * is useful for tags where it does not make sense to insert and tags. + * + * @param {string} word The characters of the current token read so far. + * + * @return {string|null} The name of the atomic tag if the word will be an atomic tag, + * null otherwise +### +is_start_of_atomic_tag = (word)-> + result = /^<(iframe|object|math|svg|script)/.exec word + result = result[1] if result + return result + +### + * Checks if the current word is the end of an atomic tag (i.e. it has all the characters, + * except for the end bracket of the closing tag, such as "

') + .eql ['

', '', '

'] + + it 'should identify an object tag as a single token', -> + (expect @cut '

') + .eql ['

', '', '

'] + + it 'should identify a math tag as a single token', -> + (expect @cut '

' + + 'π' + + '' + + 'r2

') + .eql [ + '

', + '' + + 'π' + + '' + + 'r2', + '

'] + + it 'should identify an svg tag as a single token', -> + (expect @cut '

' + + '' + + '

') + .eql [ + '

', + '' + + '' + + '', + '

'] + + it 'should identify a script tag as a single token', -> + (expect @cut '

') + .eql ['

', '', '

'] diff --git a/test/mocha.opts b/test/mocha.opts index b0f5199..ab28531 100644 --- a/test/mocha.opts +++ b/test/mocha.opts @@ -1,4 +1,4 @@ ---compilers coffee:coffee-script +--compilers coffee:coffee-script/register --require test/config.js --ui bdd --reporter spec diff --git a/test/render_operations.spec.coffee b/test/render_operations.spec.coffee index a233315..c7f0394 100644 --- a/test/render_operations.spec.coffee +++ b/test/render_operations.spec.coffee @@ -63,3 +63,47 @@ describe 'render_operations', -> it 'should keep the change inside the

', -> (expect @res).to.equal '

thisI is awesome

' + + describe 'empty tokens', -> + it 'should not be wrapped', -> + before = ['text'] + after = ['text', ' '] + + @res = @cut before, after + + (expect @res).to.equal 'text' + + describe 'tags with attributes', -> + it 'should treat attribute changes as equal and output the after tag', -> + before = ['

', 'this', ' ', 'is', ' ', 'awesome', '

'] + after = ['

', 'this', ' ', 'is', ' ', 'awesome', '

'] + + @res = @cut before, after + + (expect @res).to.equal '

this is awesome

' + + it 'should show changes within tags with different attributes', -> + before = ['

', 'this', ' ', 'is', ' ', 'awesome', '

'] + after = ['

', 'that', ' ', 'is', ' ', 'awesome', '

'] + + @res = @cut before, after + + (expect @res).to.equal \ + '

thisthat is awesome

' + + describe 'wrappable tags', -> + it 'should wrap void tags', -> + before = ['old', ' ', 'text'] + after = ['new', '
', ' ', 'text'] + + @res = @cut before, after + + (expect @res).to.equal 'oldnew
text' + + it 'should wrap atomic tags', -> + before = ['old', '', ' ', 'text'] + after = ['new', ' ', 'text'] + + @res = @cut before, after + + (expect @res).to.equal 'oldnew text'