diff --git a/.gitignore b/.gitignore
index 06f62bf..3c3629e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1 @@
-*.js
node_modules
diff --git a/README.md b/README.md
index 7da4ee1..fce14f0 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
# htmldiff.js
### HTML Diffing in JavaScript (ok, CoffeeScript actually.)
-[![Build Status](https://secure.travis-ci.org/tnwinc/htmldiff.js.png)](http://travis-ci.org/tnwinc/htmldiff.js)
+[![Build Status](https://travis-ci.org/keanulee/htmldiff.js.svg?branch=master)](https://travis-ci.org/keanulee/htmldiff.js)
`htmldiff.js` is a CoffeeScript port of https://github.com/myobie/htmldiff
(This one has a few more tests.)
diff --git a/js/htmldiff.js b/js/htmldiff.js
new file mode 100644
index 0000000..e4c56f0
--- /dev/null
+++ b/js/htmldiff.js
@@ -0,0 +1,482 @@
+// Generated by CoffeeScript 1.7.1
+(function() {
+ var Match, calculate_operations, consecutive_where, create_index, diff, find_match, find_matching_blocks, get_key_for_token, html_to_tokens, is_end_of_atomic_tag, is_end_of_tag, is_start_of_atomic_tag, is_start_of_tag, is_tag, is_whitespace, isnt_tag, op_map, recursively_find_matching_blocks, render_operations, wrap;
+
+ is_end_of_tag = function(char) {
+ return char === '>';
+ };
+
+ is_start_of_tag = function(char) {
+ return char === '<';
+ };
+
+ is_whitespace = function(char) {
+ return /^\s+$/.test(char);
+ };
+
+ is_tag = function(token) {
+ return /^\s*<[^>]+>\s*$/.test(token);
+ };
+
+ isnt_tag = function(token) {
+ return !is_tag(token);
+ };
+
+
+ /*
+ * Checks if the current word is the beginning of an atomic tag. An atomic tag is one whose
+ * child nodes should not be compared - the entire tag should be treated as one token.
+ *
+ * @param {string} word The characters of the current token read so far.
+ *
+ * @return {string|null} The name of the atomic tag if the word will be an atomic tag,
+ * null otherwise
+ */
+
+ is_start_of_atomic_tag = function(word) {
+ var result;
+ result = /^<(iframe|object|math|svg)/.exec(word);
+ if (result) {
+ result = result[1];
+ }
+ return result;
+ };
+
+
+ /*
+ * Checks if the current word is the end of an atomic tag (i.e. it has all the characters,
+ * except for the end bracket of the closing tag, such as "} The list of tokens.
+ */
+
+ html_to_tokens = function(html) {
+ var atomic_tag, char, current_atomic_tag, current_word, mode, words, _i, _len;
+ mode = 'char';
+ current_word = '';
+ current_atomic_tag = '';
+ words = [];
+ for (_i = 0, _len = html.length; _i < _len; _i++) {
+ char = html[_i];
+ switch (mode) {
+ case 'tag':
+ atomic_tag = is_start_of_atomic_tag(current_word);
+ if (atomic_tag) {
+ mode = 'atomic_tag';
+ current_atomic_tag = atomic_tag;
+ current_word += char;
+ } else if (is_end_of_tag(char)) {
+ current_word += '>';
+ words.push(current_word);
+ current_word = '';
+ if (is_whitespace(char)) {
+ mode = 'whitespace';
+ } else {
+ mode = 'char';
+ }
+ } else {
+ current_word += char;
+ }
+ break;
+ case 'atomic_tag':
+ if ((is_end_of_tag(char)) && (is_end_of_atomic_tag(current_word, current_atomic_tag))) {
+ current_word += '>';
+ words.push(current_word);
+ current_word = '';
+ current_atomic_tag = '';
+ mode = 'char';
+ } else {
+ current_word += char;
+ }
+ break;
+ case 'char':
+ if (is_start_of_tag(char)) {
+ if (current_word) {
+ words.push(current_word);
+ }
+ current_word = '<';
+ mode = 'tag';
+ } else if (/\s/.test(char)) {
+ if (current_word) {
+ words.push(current_word);
+ }
+ current_word = char;
+ mode = 'whitespace';
+ } else if (/[\w\d\#@]/.test(char)) {
+ current_word += char;
+ } else if (/&/.test(char)) {
+ if (current_word) {
+ words.push(current_word);
+ }
+ current_word = char;
+ } else {
+ current_word += char;
+ words.push(current_word);
+ current_word = '';
+ }
+ break;
+ case 'whitespace':
+ if (is_start_of_tag(char)) {
+ if (current_word) {
+ words.push(current_word);
+ }
+ current_word = '<';
+ mode = 'tag';
+ } else if (is_whitespace(char)) {
+ current_word += char;
+ } else {
+ if (current_word) {
+ words.push(current_word);
+ }
+ current_word = char;
+ mode = 'char';
+ }
+ break;
+ default:
+ throw new Error("Unknown mode " + mode);
+ }
+ }
+ if (current_word) {
+ words.push(current_word);
+ }
+ return words;
+ };
+
+
+ /*
+ * Creates a key that should be used to match tokens. This is useful, for example, if we want
+ * to consider two open tag tokens as equal, even if they don't have the same attributes. We
+ * use a key instead of overwriting the token because we may want to render original string
+ * without losing the attributes.
+ *
+ * @param {string} token The token to create the key for.
+ *
+ * @return {string} The identifying key that should be used to match before and after tokens.
+ */
+
+ get_key_for_token = function(token) {
+ var tag_name;
+ tag_name = /<([^\s>]+)[\s>]/.exec(token);
+ if (tag_name) {
+ return "<" + (tag_name[1].toLowerCase()) + ">";
+ }
+ if (token) {
+ return token.replace(/(\s+| | )/g, ' ');
+ }
+ return token;
+ };
+
+ find_match = function(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after) {
+ var best_match_in_after, best_match_in_before, best_match_length, index_in_after, index_in_before, locations_in_after, looking_for, match, match_length_at, new_match_length, new_match_length_at, _i, _j, _len;
+ best_match_in_before = start_in_before;
+ best_match_in_after = start_in_after;
+ best_match_length = 0;
+ match_length_at = {};
+ for (index_in_before = _i = start_in_before; start_in_before <= end_in_before ? _i < end_in_before : _i > end_in_before; index_in_before = start_in_before <= end_in_before ? ++_i : --_i) {
+ new_match_length_at = {};
+ looking_for = get_key_for_token(before_tokens[index_in_before]);
+ locations_in_after = index_of_before_locations_in_after_tokens[looking_for];
+ for (_j = 0, _len = locations_in_after.length; _j < _len; _j++) {
+ index_in_after = locations_in_after[_j];
+ if (index_in_after < start_in_after) {
+ continue;
+ }
+ if (index_in_after >= end_in_after) {
+ break;
+ }
+ if (match_length_at[index_in_after - 1] == null) {
+ match_length_at[index_in_after - 1] = 0;
+ }
+ new_match_length = match_length_at[index_in_after - 1] + 1;
+ new_match_length_at[index_in_after] = new_match_length;
+ if (new_match_length > best_match_length) {
+ best_match_in_before = index_in_before - new_match_length + 1;
+ best_match_in_after = index_in_after - new_match_length + 1;
+ best_match_length = new_match_length;
+ }
+ }
+ match_length_at = new_match_length_at;
+ }
+ if (best_match_length !== 0) {
+ match = new Match(best_match_in_before, best_match_in_after, best_match_length);
+ }
+ return match;
+ };
+
+ recursively_find_matching_blocks = function(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after, matching_blocks) {
+ var match;
+ match = find_match(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after);
+ if (match != null) {
+ if (start_in_before < match.start_in_before && start_in_after < match.start_in_after) {
+ recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, match.start_in_before, start_in_after, match.start_in_after, matching_blocks);
+ }
+ matching_blocks.push(match);
+ if (match.end_in_before <= end_in_before && match.end_in_after <= end_in_after) {
+ recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, match.end_in_before + 1, end_in_before, match.end_in_after + 1, end_in_after, matching_blocks);
+ }
+ }
+ return matching_blocks;
+ };
+
+
+ /*
+ * Creates an index (A.K.A. hash table) that will be used to match the list of before
+ * tokens with the list of after tokens.
+ *
+ * @param {Object} options An object with the following:
+ * - {Array.} find_these The list of tokens that will be used to search.
+ * - {Array.} in_these The list of tokens that will be returned.
+ *
+ * @return {Object} An index that can be used to search for tokens.
+ */
+
+ create_index = function(options) {
+ var idx, index, queries, query, results, _i, _len;
+ if (options.find_these == null) {
+ throw new Error('params must have find_these key');
+ }
+ if (options.in_these == null) {
+ throw new Error('params must have in_these key');
+ }
+ queries = options.find_these.map(function(token) {
+ return get_key_for_token(token);
+ });
+ results = options.in_these.map(function(token) {
+ return get_key_for_token(token);
+ });
+ index = {};
+ for (_i = 0, _len = queries.length; _i < _len; _i++) {
+ query = queries[_i];
+ index[query] = [];
+ idx = results.indexOf(query);
+ while (idx !== -1) {
+ index[query].push(idx);
+ idx = results.indexOf(query, idx + 1);
+ }
+ }
+ return index;
+ };
+
+ find_matching_blocks = function(before_tokens, after_tokens) {
+ var index_of_before_locations_in_after_tokens, matching_blocks;
+ matching_blocks = [];
+ index_of_before_locations_in_after_tokens = create_index({
+ find_these: before_tokens,
+ in_these: after_tokens
+ });
+ return recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, 0, before_tokens.length, 0, after_tokens.length, matching_blocks);
+ };
+
+ calculate_operations = function(before_tokens, after_tokens) {
+ var action_map, action_up_to_match_positions, index, is_single_whitespace, last_op, match, match_starts_at_current_position_in_after, match_starts_at_current_position_in_before, matches, op, operations, position_in_after, position_in_before, post_processed, _i, _j, _len, _len1;
+ if (before_tokens == null) {
+ throw new Error('before_tokens?');
+ }
+ if (after_tokens == null) {
+ throw new Error('after_tokens?');
+ }
+ position_in_before = position_in_after = 0;
+ operations = [];
+ action_map = {
+ 'false,false': 'replace',
+ 'true,false': 'insert',
+ 'false,true': 'delete',
+ 'true,true': 'none'
+ };
+ matches = find_matching_blocks(before_tokens, after_tokens);
+ matches.push(new Match(before_tokens.length, after_tokens.length, 0));
+ for (index = _i = 0, _len = matches.length; _i < _len; index = ++_i) {
+ match = matches[index];
+ match_starts_at_current_position_in_before = position_in_before === match.start_in_before;
+ match_starts_at_current_position_in_after = position_in_after === match.start_in_after;
+ action_up_to_match_positions = action_map[[match_starts_at_current_position_in_before, match_starts_at_current_position_in_after].toString()];
+ if (action_up_to_match_positions !== 'none') {
+ operations.push({
+ action: action_up_to_match_positions,
+ start_in_before: position_in_before,
+ end_in_before: (action_up_to_match_positions !== 'insert' ? match.start_in_before - 1 : void 0),
+ start_in_after: position_in_after,
+ end_in_after: (action_up_to_match_positions !== 'delete' ? match.start_in_after - 1 : void 0)
+ });
+ }
+ if (match.length !== 0) {
+ operations.push({
+ action: 'equal',
+ start_in_before: match.start_in_before,
+ end_in_before: match.end_in_before,
+ start_in_after: match.start_in_after,
+ end_in_after: match.end_in_after
+ });
+ }
+ position_in_before = match.end_in_before + 1;
+ position_in_after = match.end_in_after + 1;
+ }
+ post_processed = [];
+ last_op = {
+ action: 'none'
+ };
+ is_single_whitespace = function(op) {
+ if (op.action !== 'equal') {
+ return false;
+ }
+ if (op.end_in_before - op.start_in_before !== 0) {
+ return false;
+ }
+ return /^\s$/.test(before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9));
+ };
+ for (_j = 0, _len1 = operations.length; _j < _len1; _j++) {
+ op = operations[_j];
+ if (((is_single_whitespace(op)) && last_op.action === 'replace') || (op.action === 'replace' && last_op.action === 'replace')) {
+ last_op.end_in_before = op.end_in_before;
+ last_op.end_in_after = op.end_in_after;
+ } else {
+ post_processed.push(op);
+ last_op = op;
+ }
+ }
+ return post_processed;
+ };
+
+ consecutive_where = function(start, content, predicate) {
+ var answer, index, last_matching_index, token, _i, _len;
+ content = content.slice(start, +content.length + 1 || 9e9);
+ last_matching_index = void 0;
+ for (index = _i = 0, _len = content.length; _i < _len; index = ++_i) {
+ token = content[index];
+ answer = predicate(token);
+ if (answer === true) {
+ last_matching_index = index;
+ }
+ if (answer === false) {
+ break;
+ }
+ }
+ if (last_matching_index != null) {
+ return content.slice(0, +last_matching_index + 1 || 9e9);
+ }
+ return [];
+ };
+
+ wrap = function(tag, content) {
+ var length, non_tags, position, rendering, tags, val;
+ rendering = '';
+ position = 0;
+ length = content.length;
+ while (true) {
+ if (position >= length) {
+ break;
+ }
+ non_tags = consecutive_where(position, content, isnt_tag);
+ position += non_tags.length;
+ if (non_tags.length !== 0) {
+ val = non_tags.join('');
+ if (val.trim()) {
+ rendering += "<" + tag + ">" + val + "" + tag + ">";
+ }
+ }
+ if (position >= length) {
+ break;
+ }
+ tags = consecutive_where(position, content, is_tag);
+ position += tags.length;
+ rendering += tags.join('');
+ }
+ return rendering;
+ };
+
+ op_map = {
+ equal: function(op, before_tokens, after_tokens) {
+ return after_tokens.slice(op.start_in_after, +op.end_in_after + 1 || 9e9).join('');
+ },
+ insert: function(op, before_tokens, after_tokens) {
+ var val;
+ val = after_tokens.slice(op.start_in_after, +op.end_in_after + 1 || 9e9);
+ return wrap('ins', val);
+ },
+ "delete": function(op, before_tokens, after_tokens) {
+ var val;
+ val = before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9);
+ return wrap('del', val);
+ }
+ };
+
+ op_map.replace = function(op, before_tokens, after_tokens) {
+ return (op_map["delete"](op, before_tokens, after_tokens)) + (op_map.insert(op, before_tokens, after_tokens));
+ };
+
+ render_operations = function(before_tokens, after_tokens, operations) {
+ var op, rendering, _i, _len;
+ rendering = '';
+ for (_i = 0, _len = operations.length; _i < _len; _i++) {
+ op = operations[_i];
+ rendering += op_map[op.action](op, before_tokens, after_tokens);
+ }
+ return rendering;
+ };
+
+ diff = function(before, after) {
+ var ops;
+ if (before === after) {
+ return before;
+ }
+ before = html_to_tokens(before);
+ after = html_to_tokens(after);
+ ops = calculate_operations(before, after);
+ return render_operations(before, after, ops);
+ };
+
+ diff.html_to_tokens = html_to_tokens;
+
+ diff.find_matching_blocks = find_matching_blocks;
+
+ find_matching_blocks.find_match = find_match;
+
+ find_matching_blocks.create_index = create_index;
+
+ find_matching_blocks.get_key_for_token = get_key_for_token;
+
+ diff.calculate_operations = calculate_operations;
+
+ diff.render_operations = render_operations;
+
+ if (typeof define === 'function') {
+ define([], function() {
+ return diff;
+ });
+ } else if (typeof module !== "undefined" && module !== null) {
+ module.exports = diff;
+ } else {
+ this.htmldiff = diff;
+ }
+
+}).call(this);
diff --git a/package.json b/package.json
index aaf09cc..893846a 100644
--- a/package.json
+++ b/package.json
@@ -4,8 +4,8 @@
"description": "HTML Diffing in JavaScript (CoffeeScript)",
"main": "htmldiff.js",
"scripts": {
- "test": "mocha -R min",
- "install": "coffee --compile src"
+ "test": "mocha -R min --compilers coffee:coffee-script/register",
+ "install": "coffee --output js/ --compile src/"
},
"repository": {
"type": "git",
diff --git a/src/htmldiff.coffee b/src/htmldiff.coffee
index dd5c658..d746473 100644
--- a/src/htmldiff.coffee
+++ b/src/htmldiff.coffee
@@ -4,20 +4,61 @@ is_whitespace = (char)-> /^\s+$/.test char
is_tag = (token)-> /^\s*<[^>]+>\s*$/.test token
isnt_tag = (token)-> not is_tag token
+###
+ * Checks if the current word is the beginning of an atomic tag. An atomic tag is one whose
+ * child nodes should not be compared - the entire tag should be treated as one token. This
+ * is useful for tags where it does not make sense to insert and tags.
+ *
+ * @param {string} word The characters of the current token read so far.
+ *
+ * @return {string|null} The name of the atomic tag if the word will be an atomic tag,
+ * null otherwise
+###
+is_start_of_atomic_tag = (word)->
+ result = /^<(iframe|object|math|svg)/.exec word
+ result = result[1] if result
+ return result
+
+###
+ * Checks if the current word is the end of an atomic tag (i.e. it has all the characters,
+ * except for the end bracket of the closing tag, such as "
+ (word.substring word.length - tag.length - 2) is "#{tag}"
+
class Match
constructor: (@start_in_before, @start_in_after, @length)->
@end_in_before = (@start_in_before + @length) - 1
@end_in_after = (@start_in_after + @length) - 1
+###
+ * Tokenizes a string of HTML.
+ *
+ * @param {string} html The string to tokenize.
+ *
+ * @return {Array.} The list of tokens.
+###
html_to_tokens = (html)->
mode = 'char'
current_word = ''
+ current_atomic_tag = ''
words = []
for char in html
switch mode
when 'tag'
- if is_end_of_tag char
+ atomic_tag = is_start_of_atomic_tag current_word
+ if atomic_tag
+ mode = 'atomic_tag'
+ current_atomic_tag = atomic_tag
+ current_word += char
+ else if is_end_of_tag char
current_word += '>'
words.push current_word
current_word = ''
@@ -27,6 +68,16 @@ html_to_tokens = (html)->
mode = 'char'
else
current_word += char
+ when 'atomic_tag'
+ if (is_end_of_tag char) \
+ and (is_end_of_atomic_tag current_word, current_atomic_tag)
+ current_word += '>'
+ words.push current_word
+ current_word = ''
+ current_atomic_tag = ''
+ mode = 'char'
+ else
+ current_word += char
when 'char'
if is_start_of_tag char
words.push current_word if current_word
@@ -36,11 +87,19 @@ html_to_tokens = (html)->
words.push current_word if current_word
current_word = char
mode = 'whitespace'
- else if /[\w\#@]+/i.test char
+ else if /[\w\d\#@]/.test char
+ # Consider '#' as part of the same word, since it might be part of an HTML escaped
+ # character (e.g. ' ').
current_word += char
- else
+ else if /&/.test char
+ # Consider '&' as the start of a new word, since it might be the start of an HTML
+ # escaped character (e.g. ' ').
words.push current_word if current_word
current_word = char
+ else
+ current_word += char
+ words.push current_word
+ current_word = ''
when 'whitespace'
if is_start_of_tag char
words.push current_word if current_word
@@ -57,6 +116,28 @@ html_to_tokens = (html)->
words.push current_word if current_word
return words
+###
+ * Creates a key that should be used to match tokens. This is useful, for example, if we want
+ * to consider two open tag tokens as equal, even if they don't have the same attributes. We
+ * use a key instead of overwriting the token because we may want to render the original string
+ * without losing the attributes.
+ *
+ * @param {string} token The token to create the key for.
+ *
+ * @return {string} The identifying key that should be used to match before and after tokens.
+###
+get_key_for_token = (token)->
+ # If the token is a tag, return just the tag with no attributes since we do not compare
+ # attributes yet.
+ tag_name = /<([^\s>]+)[\s>]/.exec token
+ return "<#{tag_name[1].toLowerCase()}>" if tag_name
+
+ # If the token is text, collapse adjacent whitespace and replace non-breaking spaces with
+ # regular spaces.
+ return token.replace /(\s+| | )/g, ' ' if token
+
+ return token
+
find_match = (before_tokens, after_tokens,
index_of_before_locations_in_after_tokens,
start_in_before, end_in_before,
@@ -70,7 +151,7 @@ find_match = (before_tokens, after_tokens,
for index_in_before in [start_in_before...end_in_before]
new_match_length_at = {}
- looking_for = before_tokens[index_in_before]
+ looking_for = get_key_for_token before_tokens[index_in_before]
locations_in_after =
index_of_before_locations_in_after_tokens[looking_for]
@@ -128,17 +209,32 @@ recursively_find_matching_blocks = (before_tokens, after_tokens,
return matching_blocks
-create_index = (p)->
- throw new Error 'params must have find_these key' unless p.find_these?
- throw new Error 'params must have in_these key' unless p.in_these?
+###
+ * Creates an index (A.K.A. hash table) that will be used to match the list of before
+ * tokens with the list of after tokens.
+ *
+ * @param {Object} options An object with the following:
+ * - {Array.} find_these The list of tokens that will be used to search.
+ * - {Array.} in_these The list of tokens that will be returned.
+ *
+ * @return {Object} An index that can be used to search for tokens.
+###
+create_index = (options)->
+ throw new Error 'params must have find_these key' unless options.find_these?
+ throw new Error 'params must have in_these key' unless options.in_these?
+
+ queries = options.find_these.map (token)->
+ return get_key_for_token token
+ results = options.in_these.map (token)->
+ return get_key_for_token token
index = {}
- for token in p.find_these
- index[token] = []
- idx = p.in_these.indexOf token
+ for query in queries
+ index[query] = []
+ idx = results.indexOf query
while idx isnt -1
- index[token].push idx
- idx = p.in_these.indexOf token, idx+1
+ index[query].push idx
+ idx = results.indexOf query, idx+1
return index
@@ -240,7 +336,8 @@ wrap = (tag, content)->
non_tags = consecutive_where position, content, isnt_tag
position += non_tags.length
if non_tags.length isnt 0
- rendering += "<#{tag}>#{non_tags.join ''}#{tag}>"
+ val = non_tags.join ''
+ rendering += "<#{tag}>#{val}#{tag}>" if val.trim()
break if position >= length
tags = consecutive_where position, content, is_tag
@@ -251,7 +348,7 @@ wrap = (tag, content)->
op_map =
equal: (op, before_tokens, after_tokens)->
- before_tokens[op.start_in_before..op.end_in_before].join ''
+ after_tokens[op.start_in_after..op.end_in_after].join ''
insert: (op, before_tokens, after_tokens)->
val = after_tokens[op.start_in_after..op.end_in_after]
@@ -287,6 +384,7 @@ diff.html_to_tokens = html_to_tokens
diff.find_matching_blocks = find_matching_blocks
find_matching_blocks.find_match = find_match
find_matching_blocks.create_index = create_index
+find_matching_blocks.get_key_for_token = get_key_for_token
diff.calculate_operations = calculate_operations
diff.render_operations = render_operations
diff --git a/test/diff.spec.coffee b/test/diff.spec.coffee
index 68d0cc9..c0ad77c 100644
--- a/test/diff.spec.coffee
+++ b/test/diff.spec.coffee
@@ -9,9 +9,19 @@ describe 'Diff', ->
it 'should return the text', ->
(expect @res).equal 'input text'
- xdescribe 'When a letter is added', ->
+ describe 'When a letter is added', ->
beforeEach ->
@res = @cut 'input', 'input 2'
it 'should mark the new letter', ->
- (expect @res).to.equal 'input 2'
+ (expect @res).to.equal 'input 2'
+
+ describe 'Whitespace differences', ->
+ it 'should collapse adjacent whitespace', ->
+ (expect @cut 'Much \n\t spaces', 'Much spaces').to.equal 'Much spaces'
+
+ it 'should consider non-breaking spaces as equal', ->
+ (expect @cut 'Hello world', 'Hello world').to.equal 'Hello world'
+
+ it 'should consider non-breaking spaces and non-adjacent regular spaces as equal', ->
+ (expect @cut 'Hello world', 'Hello world').to.equal 'Hello world'
diff --git a/test/html_to_tokens.spec.coffee b/test/html_to_tokens.spec.coffee
index ac6c235..8455d61 100644
--- a/test/html_to_tokens.spec.coffee
+++ b/test/html_to_tokens.spec.coffee
@@ -28,3 +28,40 @@ describe 'html_to_tokens', ->
it 'should identify self closing tags as tokens', ->
(expect @cut '
hellogoodbye
')
.eql ['
', 'hello', '', 'goodbye', '
']
+
+ describe 'when encountering atomic tags', ->
+ it 'should identify an image tag as a single token', ->
+ (expect @cut '')
+ .eql ['
', '', '', '
']
+
+ it 'should identify an iframe tag as a single token', ->
+ (expect @cut '')
+ .eql ['
', '', '
']
+
+ it 'should identify an object tag as a single token', ->
+ (expect @cut '')
+ .eql ['
', '', '
']
+
+ it 'should identify a math tag as a single token', ->
+ (expect @cut '')
+ .eql [
+ '
',
+ '',
+ '
']
+
+ it 'should identify an svg tag as a single token', ->
+ (expect @cut '')
+ .eql [
+ '
',
+ '',
+ '
']
diff --git a/test/render_operations.spec.coffee b/test/render_operations.spec.coffee
index a233315..179889a 100644
--- a/test/render_operations.spec.coffee
+++ b/test/render_operations.spec.coffee
@@ -63,3 +63,30 @@ describe 'render_operations', ->
it 'should keep the change inside the
', ->
(expect @res).to.equal '
thisI is awesome
'
+
+ describe 'empty tokens', ->
+ it 'should not be wrapped', ->
+ before = ['text']
+ after = ['text', ' ']
+
+ @res = @cut before, after
+
+ (expect @res).to.equal 'text'
+
+ describe 'tags with attributes', ->
+ it 'should treat attribute changes as equal and output the after tag', ->
+ before = ['