From 7381a2bfa5067855a8bc027cb6b2611c2bb94c73 Mon Sep 17 00:00:00 2001
From: Keanu Lee <keanu@inkling.com>
Date: Mon, 7 Apr 2014 16:13:21 -0700
Subject: [PATCH 1/2] Added support for atomic tags; ignore tag attributes;
 interpret whitespace.

---
 .gitignore                         |   1 -
 README.md                          |   2 +-
 js/htmldiff.js                     | 482 +++++++++++++++++++++++++++++
 package.json                       |   4 +-
 src/htmldiff.coffee                | 125 +++++++-
 test/diff.spec.coffee              |  14 +-
 test/html_to_tokens.spec.coffee    |  37 +++
 test/render_operations.spec.coffee |  27 ++
 8 files changed, 672 insertions(+), 20 deletions(-)
 create mode 100644 js/htmldiff.js
diff --git a/.gitignore b/.gitignore
index 06f62bf..3c3629e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1 @@
-*.js
 node_modules
diff --git a/README.md b/README.md
index 7da4ee1..fce14f0 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # htmldiff.js
 ### HTML Diffing in JavaScript (ok, CoffeeScript actually.)
 
-[![Build Status](https://secure.travis-ci.org/tnwinc/htmldiff.js.png)](http://travis-ci.org/tnwinc/htmldiff.js)
+[![Build Status](https://travis-ci.org/keanulee/htmldiff.js.svg?branch=master)](https://travis-ci.org/keanulee/htmldiff.js)
 
 `htmldiff.js` is a CoffeeScript port of https://github.com/myobie/htmldiff
 (This one has a few more tests.)
diff --git a/js/htmldiff.js b/js/htmldiff.js
new file mode 100644
index 0000000..e4c56f0
--- /dev/null
+++ b/js/htmldiff.js
@@ -0,0 +1,482 @@
+// Generated by CoffeeScript 1.7.1
+(function() {
+  var Match, calculate_operations, consecutive_where, create_index, diff, find_match, find_matching_blocks, get_key_for_token, html_to_tokens, is_end_of_atomic_tag, is_end_of_tag, is_start_of_atomic_tag, is_start_of_tag, is_tag, is_whitespace, isnt_tag, op_map, recursively_find_matching_blocks, render_operations, wrap;
+
+  is_end_of_tag = function(char) {
+    return char === '>';
+  };
+
+  is_start_of_tag = function(char) {
+    return char === '<';
+  };
+
+  is_whitespace = function(char) {
+    return /^\s+$/.test(char);
+  };
+
+  is_tag = function(token) {
+    return /^\s*<[^>]+>\s*$/.test(token);
+  };
+
+  isnt_tag = function(token) {
+    return !is_tag(token);
+  };
+
+
+  /*
+   * Checks if the current word is the beginning of an atomic tag. An atomic tag is one whose
+   * child nodes should not be compared - the entire tag should be treated as one token.
+   *
+   * @param {string} word The characters of the current token read so far.
+   *
+   * @return {string|null} The name of the atomic tag if the word will be an atomic tag,
+   *    null otherwise
+   */
+
+  is_start_of_atomic_tag = function(word) {
+    var result;
+    result = /^<(iframe|object|math|svg)/.exec(word);
+    if (result) {
+      result = result[1];
+    }
+    return result;
+  };
+
+
+  /*
+   * Checks if the current word is the end of an atomic tag (i.e. it has all the characters,
+   * except for the end bracket of the closing tag, such as "<iframe></iframe").
+   *
+   * @param {string} word The characters of the current token read so far.
+   * @param {string} tag The ending tag to look for.
+   *
+   * @return {boolean} True if the word is now a complete token (including the end tag),
+   *    false otherwise.
+   */
+
+  is_end_of_atomic_tag = function(word, tag) {
+    return (word.substring(word.length - tag.length - 2)) === ("</" + tag);
+  };
+
+  Match = (function() {
+    function Match(start_in_before, start_in_after, length) {
+      this.start_in_before = start_in_before;
+      this.start_in_after = start_in_after;
+      this.length = length;
+      this.end_in_before = (this.start_in_before + this.length) - 1;
+      this.end_in_after = (this.start_in_after + this.length) - 1;
+    }
+
+    return Match;
+
+  })();
+
+
+  /*
+   * Tokenizes a string of HTML.
+   *
+   * @param {string} html The string to tokenize.
+   *
+   * @return {Array.<string>} The list of tokens.
+   */
+
+  html_to_tokens = function(html) {
+    var atomic_tag, char, current_atomic_tag, current_word, mode, words, _i, _len;
+    mode = 'char';
+    current_word = '';
+    current_atomic_tag = '';
+    words = [];
+    for (_i = 0, _len = html.length; _i < _len; _i++) {
+      char = html[_i];
+      switch (mode) {
+        case 'tag':
+          atomic_tag = is_start_of_atomic_tag(current_word);
+          if (atomic_tag) {
+            mode = 'atomic_tag';
+            current_atomic_tag = atomic_tag;
+            current_word += char;
+          } else if (is_end_of_tag(char)) {
+            current_word += '>';
+            words.push(current_word);
+            current_word = '';
+            if (is_whitespace(char)) {
+              mode = 'whitespace';
+            } else {
+              mode = 'char';
+            }
+          } else {
+            current_word += char;
+          }
+          break;
+        case 'atomic_tag':
+          if ((is_end_of_tag(char)) && (is_end_of_atomic_tag(current_word, current_atomic_tag))) {
+            current_word += '>';
+            words.push(current_word);
+            current_word = '';
+            current_atomic_tag = '';
+            mode = 'char';
+          } else {
+            current_word += char;
+          }
+          break;
+        case 'char':
+          if (is_start_of_tag(char)) {
+            if (current_word) {
+              words.push(current_word);
+            }
+            current_word = '<';
+            mode = 'tag';
+          } else if (/\s/.test(char)) {
+            if (current_word) {
+              words.push(current_word);
+            }
+            current_word = char;
+            mode = 'whitespace';
+          } else if (/[\w\d\#@]/.test(char)) {
+            current_word += char;
+          } else if (/&/.test(char)) {
+            if (current_word) {
+              words.push(current_word);
+            }
+            current_word = char;
+          } else {
+            current_word += char;
+            words.push(current_word);
+            current_word = '';
+          }
+          break;
+        case 'whitespace':
+          if (is_start_of_tag(char)) {
+            if (current_word) {
+              words.push(current_word);
+            }
+            current_word = '<';
+            mode = 'tag';
+          } else if (is_whitespace(char)) {
+            current_word += char;
+          } else {
+            if (current_word) {
+              words.push(current_word);
+            }
+            current_word = char;
+            mode = 'char';
+          }
+          break;
+        default:
+          throw new Error("Unknown mode " + mode);
+      }
+    }
+    if (current_word) {
+      words.push(current_word);
+    }
+    return words;
+  };
+
+
+  /*
+   * Creates a key that should be used to match tokens. This is useful, for example, if we want
+   * to consider two open tag tokens as equal, even if they don't have the same attributes. We
+   * use a key instead of overwriting the token because we may want to render original string
+   * without losing the attributes.
+   *
+   * @param {string} token The token to create the key for.
+   *
+   * @return {string} The identifying key that should be used to match before and after tokens.
+   */
+
+  get_key_for_token = function(token) {
+    var tag_name;
+    tag_name = /<([^\s>]+)[\s>]/.exec(token);
+    if (tag_name) {
+      return "<" + (tag_name[1].toLowerCase()) + ">";
+    }
+    if (token) {
+      return token.replace(/(\s+|&nbsp;|&#160;)/g, ' ');
+    }
+    return token;
+  };
+
+  find_match = function(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after) {
+    var best_match_in_after, best_match_in_before, best_match_length, index_in_after, index_in_before, locations_in_after, looking_for, match, match_length_at, new_match_length, new_match_length_at, _i, _j, _len;
+    best_match_in_before = start_in_before;
+    best_match_in_after = start_in_after;
+    best_match_length = 0;
+    match_length_at = {};
+    for (index_in_before = _i = start_in_before; start_in_before <= end_in_before ? _i < end_in_before : _i > end_in_before; index_in_before = start_in_before <= end_in_before ? ++_i : --_i) {
+      new_match_length_at = {};
+      looking_for = get_key_for_token(before_tokens[index_in_before]);
+      locations_in_after = index_of_before_locations_in_after_tokens[looking_for];
+      for (_j = 0, _len = locations_in_after.length; _j < _len; _j++) {
+        index_in_after = locations_in_after[_j];
+        if (index_in_after < start_in_after) {
+          continue;
+        }
+        if (index_in_after >= end_in_after) {
+          break;
+        }
+        if (match_length_at[index_in_after - 1] == null) {
+          match_length_at[index_in_after - 1] = 0;
+        }
+        new_match_length = match_length_at[index_in_after - 1] + 1;
+        new_match_length_at[index_in_after] = new_match_length;
+        if (new_match_length > best_match_length) {
+          best_match_in_before = index_in_before - new_match_length + 1;
+          best_match_in_after = index_in_after - new_match_length + 1;
+          best_match_length = new_match_length;
+        }
+      }
+      match_length_at = new_match_length_at;
+    }
+    if (best_match_length !== 0) {
+      match = new Match(best_match_in_before, best_match_in_after, best_match_length);
+    }
+    return match;
+  };
+
+  recursively_find_matching_blocks = function(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after, matching_blocks) {
+    var match;
+    match = find_match(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after);
+    if (match != null) {
+      if (start_in_before < match.start_in_before && start_in_after < match.start_in_after) {
+        recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, match.start_in_before, start_in_after, match.start_in_after, matching_blocks);
+      }
+      matching_blocks.push(match);
+      if (match.end_in_before <= end_in_before && match.end_in_after <= end_in_after) {
+        recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, match.end_in_before + 1, end_in_before, match.end_in_after + 1, end_in_after, matching_blocks);
+      }
+    }
+    return matching_blocks;
+  };
+
+
+  /*
+   * Creates an index (A.K.A. hash table) that will be used to match the list of before
+   * tokens with the list of after tokens.
+   *
+   * @param {Object} options An object with the following:
+   *    - {Array.<string>} find_these The list of tokens that will be used to search.
+   *    - {Array.<string>} in_these The list of tokens that will be returned.
+   *
+   * @return {Object} An index that can be used to search for tokens.
+   */
+
+  create_index = function(options) {
+    var idx, index, queries, query, results, _i, _len;
+    if (options.find_these == null) {
+      throw new Error('params must have find_these key');
+    }
+    if (options.in_these == null) {
+      throw new Error('params must have in_these key');
+    }
+    queries = options.find_these.map(function(token) {
+      return get_key_for_token(token);
+    });
+    results = options.in_these.map(function(token) {
+      return get_key_for_token(token);
+    });
+    index = {};
+    for (_i = 0, _len = queries.length; _i < _len; _i++) {
+      query = queries[_i];
+      index[query] = [];
+      idx = results.indexOf(query);
+      while (idx !== -1) {
+        index[query].push(idx);
+        idx = results.indexOf(query, idx + 1);
+      }
+    }
+    return index;
+  };
+
+  find_matching_blocks = function(before_tokens, after_tokens) {
+    var index_of_before_locations_in_after_tokens, matching_blocks;
+    matching_blocks = [];
+    index_of_before_locations_in_after_tokens = create_index({
+      find_these: before_tokens,
+      in_these: after_tokens
+    });
+    return recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, 0, before_tokens.length, 0, after_tokens.length, matching_blocks);
+  };
+
+  calculate_operations = function(before_tokens, after_tokens) {
+    var action_map, action_up_to_match_positions, index, is_single_whitespace, last_op, match, match_starts_at_current_position_in_after, match_starts_at_current_position_in_before, matches, op, operations, position_in_after, position_in_before, post_processed, _i, _j, _len, _len1;
+    if (before_tokens == null) {
+      throw new Error('before_tokens?');
+    }
+    if (after_tokens == null) {
+      throw new Error('after_tokens?');
+    }
+    position_in_before = position_in_after = 0;
+    operations = [];
+    action_map = {
+      'false,false': 'replace',
+      'true,false': 'insert',
+      'false,true': 'delete',
+      'true,true': 'none'
+    };
+    matches = find_matching_blocks(before_tokens, after_tokens);
+    matches.push(new Match(before_tokens.length, after_tokens.length, 0));
+    for (index = _i = 0, _len = matches.length; _i < _len; index = ++_i) {
+      match = matches[index];
+      match_starts_at_current_position_in_before = position_in_before === match.start_in_before;
+      match_starts_at_current_position_in_after = position_in_after === match.start_in_after;
+      action_up_to_match_positions = action_map[[match_starts_at_current_position_in_before, match_starts_at_current_position_in_after].toString()];
+      if (action_up_to_match_positions !== 'none') {
+        operations.push({
+          action: action_up_to_match_positions,
+          start_in_before: position_in_before,
+          end_in_before: (action_up_to_match_positions !== 'insert' ? match.start_in_before - 1 : void 0),
+          start_in_after: position_in_after,
+          end_in_after: (action_up_to_match_positions !== 'delete' ? match.start_in_after - 1 : void 0)
+        });
+      }
+      if (match.length !== 0) {
+        operations.push({
+          action: 'equal',
+          start_in_before: match.start_in_before,
+          end_in_before: match.end_in_before,
+          start_in_after: match.start_in_after,
+          end_in_after: match.end_in_after
+        });
+      }
+      position_in_before = match.end_in_before + 1;
+      position_in_after = match.end_in_after + 1;
+    }
+    post_processed = [];
+    last_op = {
+      action: 'none'
+    };
+    is_single_whitespace = function(op) {
+      if (op.action !== 'equal') {
+        return false;
+      }
+      if (op.end_in_before - op.start_in_before !== 0) {
+        return false;
+      }
+      return /^\s$/.test(before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9));
+    };
+    for (_j = 0, _len1 = operations.length; _j < _len1; _j++) {
+      op = operations[_j];
+      if (((is_single_whitespace(op)) && last_op.action === 'replace') || (op.action === 'replace' && last_op.action === 'replace')) {
+        last_op.end_in_before = op.end_in_before;
+        last_op.end_in_after = op.end_in_after;
+      } else {
+        post_processed.push(op);
+        last_op = op;
+      }
+    }
+    return post_processed;
+  };
+
+  consecutive_where = function(start, content, predicate) {
+    var answer, index, last_matching_index, token, _i, _len;
+    content = content.slice(start, +content.length + 1 || 9e9);
+    last_matching_index = void 0;
+    for (index = _i = 0, _len = content.length; _i < _len; index = ++_i) {
+      token = content[index];
+      answer = predicate(token);
+      if (answer === true) {
+        last_matching_index = index;
+      }
+      if (answer === false) {
+        break;
+      }
+    }
+    if (last_matching_index != null) {
+      return content.slice(0, +last_matching_index + 1 || 9e9);
+    }
+    return [];
+  };
+
+  wrap = function(tag, content) {
+    var length, non_tags, position, rendering, tags, val;
+    rendering = '';
+    position = 0;
+    length = content.length;
+    while (true) {
+      if (position >= length) {
+        break;
+      }
+      non_tags = consecutive_where(position, content, isnt_tag);
+      position += non_tags.length;
+      if (non_tags.length !== 0) {
+        val = non_tags.join('');
+        if (val.trim()) {
+          rendering += "<" + tag + ">" + val + "</" + tag + ">";
+        }
+      }
+      if (position >= length) {
+        break;
+      }
+      tags = consecutive_where(position, content, is_tag);
+      position += tags.length;
+      rendering += tags.join('');
+    }
+    return rendering;
+  };
+
+  op_map = {
+    equal: function(op, before_tokens, after_tokens) {
+      return after_tokens.slice(op.start_in_after, +op.end_in_after + 1 || 9e9).join('');
+    },
+    insert: function(op, before_tokens, after_tokens) {
+      var val;
+      val = after_tokens.slice(op.start_in_after, +op.end_in_after + 1 || 9e9);
+      return wrap('ins', val);
+    },
+    "delete": function(op, before_tokens, after_tokens) {
+      var val;
+      val = before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9);
+      return wrap('del', val);
+    }
+  };
+
+  op_map.replace = function(op, before_tokens, after_tokens) {
+    return (op_map["delete"](op, before_tokens, after_tokens)) + (op_map.insert(op, before_tokens, after_tokens));
+  };
+
+  render_operations = function(before_tokens, after_tokens, operations) {
+    var op, rendering, _i, _len;
+    rendering = '';
+    for (_i = 0, _len = operations.length; _i < _len; _i++) {
+      op = operations[_i];
+      rendering += op_map[op.action](op, before_tokens, after_tokens);
+    }
+    return rendering;
+  };
+
+  diff = function(before, after) {
+    var ops;
+    if (before === after) {
+      return before;
+    }
+    before = html_to_tokens(before);
+    after = html_to_tokens(after);
+    ops = calculate_operations(before, after);
+    return render_operations(before, after, ops);
+  };
+
+  diff.html_to_tokens = html_to_tokens;
+
+  diff.find_matching_blocks = find_matching_blocks;
+
+  find_matching_blocks.find_match = find_match;
+
+  find_matching_blocks.create_index = create_index;
+
+  find_matching_blocks.get_key_for_token = get_key_for_token;
+
+  diff.calculate_operations = calculate_operations;
+
+  diff.render_operations = render_operations;
+
+  if (typeof define === 'function') {
+    define([], function() {
+      return diff;
+    });
+  } else if (typeof module !== "undefined" && module !== null) {
+    module.exports = diff;
+  } else {
+    this.htmldiff = diff;
+  }
+
+}).call(this);
diff --git a/package.json b/package.json
index aaf09cc..893846a 100644
--- a/package.json
+++ b/package.json
@@ -4,8 +4,8 @@
     "description": "HTML Diffing in JavaScript (CoffeeScript)",
     "main": "htmldiff.js",
     "scripts": {
-        "test": "mocha -R min",
-        "install": "coffee --compile src"
+        "test": "mocha -R min --compilers coffee:coffee-script/register",
+        "install": "coffee --output js/ --compile src/"
     },
     "repository": {
         "type": "git",
diff --git a/src/htmldiff.coffee b/src/htmldiff.coffee
index dd5c658..ec0475c 100644
--- a/src/htmldiff.coffee
+++ b/src/htmldiff.coffee
@@ -4,20 +4,60 @@ is_whitespace = (char)-> /^\s+$/.test char
 is_tag = (token)-> /^\s*<[^>]+>\s*$/.test token
 isnt_tag = (token)-> not is_tag token
 
+###
+ * Checks if the current word is the beginning of an atomic tag. An atomic tag is one whose
+ * child nodes should not be compared - the entire tag should be treated as one token.
+ *
+ * @param {string} word The characters of the current token read so far.
+ *
+ * @return {string|null} The name of the atomic tag if the word will be an atomic tag,
+ *    null otherwise
+###
+is_start_of_atomic_tag = (word)->
+  result = /^<(iframe|object|math|svg)/.exec word
+  result = result[1] if result
+  return result
+
+###
+ * Checks if the current word is the end of an atomic tag (i.e. it has all the characters,
+ * except for the end bracket of the closing tag, such as "<iframe></iframe").
+ *
+ * @param {string} word The characters of the current token read so far.
+ * @param {string} tag The ending tag to look for.
+ *
+ * @return {boolean} True if the word is now a complete token (including the end tag),
+ *    false otherwise.
+###
+is_end_of_atomic_tag = (word, tag)->
+  (word.substring word.length - tag.length - 2) is "</#{tag}"
+
 class Match
   constructor: (@start_in_before, @start_in_after, @length)->
     @end_in_before = (@start_in_before + @length) - 1
     @end_in_after = (@start_in_after + @length) - 1
 
+###
+ * Tokenizes a string of HTML.
+ *
+ * @param {string} html The string to tokenize.
+ *
+ * @return {Array.<string>} The list of tokens.
+###
 html_to_tokens = (html)->
   mode = 'char'
   current_word = ''
+  current_atomic_tag = ''
   words = []
 
   for char in html
     switch mode
       when 'tag'
-        if is_end_of_tag char
+        atomic_tag = is_start_of_atomic_tag current_word
+        if atomic_tag
+          mode = 'atomic_tag'
+          current_atomic_tag = atomic_tag
+          current_word += char
+        else if is_end_of_tag char
           current_word += '>'
           words.push current_word
           current_word = ''
@@ -27,6 +67,16 @@ html_to_tokens = (html)->
             mode = 'char'
         else
           current_word += char
+      when 'atomic_tag'
+        if (is_end_of_tag char) \
+        and (is_end_of_atomic_tag current_word, current_atomic_tag)
+          current_word += '>'
+          words.push current_word
+          current_word = ''
+          current_atomic_tag = ''
+          mode = 'char'
+        else
+          current_word += char
       when 'char'
         if is_start_of_tag char
           words.push current_word if current_word
@@ -36,11 +86,19 @@ html_to_tokens = (html)->
           words.push current_word if current_word
           current_word = char
           mode = 'whitespace'
-        else if /[\w\#@]+/i.test char
+        else if /[\w\d\#@]/.test char
+          # Consider '#' as part of the same word, since it might be part of an HTML escaped
+          # character (e.g. '&#160;').
           current_word += char
-        else
+        else if /&/.test char
+          # Consider '&' as the start of a new word, since it might be the start of an HTML
+          # escaped character (e.g. '&#160;').
           words.push current_word if current_word
           current_word = char
+        else
+          current_word += char
+          words.push current_word
+          current_word = ''
       when 'whitespace'
         if is_start_of_tag char
           words.push current_word if current_word
@@ -57,6 +115,28 @@ html_to_tokens = (html)->
   words.push current_word if current_word
   return words
 
+###
+ * Creates a key that should be used to match tokens. This is useful, for example, if we want
+ * to consider two open tag tokens as equal, even if they don't have the same attributes. We
+ * use a key instead of overwriting the token because we may want to render original string
+ * without losing the attributes.
+ *
+ * @param {string} token The token to create the key for.
+ *
+ * @return {string} The identifying key that should be used to match before and after tokens.
+###
+get_key_for_token = (token)->
+  # If the token is a tag, return just the tag with no attributes since we do not compare
+  # attributes yet.
+  tag_name = /<([^\s>]+)[\s>]/.exec token
+  return "<#{tag_name[1].toLowerCase()}>" if tag_name
+
+  # If the token is text, collapse adjacent whitespace and replace non-breaking spaces with
+  # regular spaces.
+  return token.replace /(\s+|&nbsp;|&#160;)/g, ' ' if token
+
+  return token
+
 find_match = (before_tokens, after_tokens,
   index_of_before_locations_in_after_tokens,
   start_in_before, end_in_before,
@@ -70,7 +150,7 @@ find_match = (before_tokens, after_tokens,
 
   for index_in_before in [start_in_before...end_in_before]
     new_match_length_at = {}
-    looking_for = before_tokens[index_in_before]
+    looking_for = get_key_for_token before_tokens[index_in_before]
     locations_in_after =
       index_of_before_locations_in_after_tokens[looking_for]
 
@@ -128,17 +208,32 @@ recursively_find_matching_blocks = (before_tokens, after_tokens,
 
   return matching_blocks
 
-create_index = (p)->
-  throw new Error 'params must have find_these key' unless p.find_these?
-  throw new Error 'params must have in_these key' unless p.in_these?
+###
+ * Creates an index (A.K.A. hash table) that will be used to match the list of before
+ * tokens with the list of after tokens.
+ *
+ * @param {Object} options An object with the following:
+ *    - {Array.<string>} find_these The list of tokens that will be used to search.
+ *    - {Array.<string>} in_these The list of tokens that will be returned.
+ *
+ * @return {Object} An index that can be used to search for tokens.
+###
+create_index = (options)->
+  throw new Error 'params must have find_these key' unless options.find_these?
+  throw new Error 'params must have in_these key' unless options.in_these?
+
+  queries = options.find_these.map (token)->
+    return get_key_for_token token
+  results = options.in_these.map (token)->
+    return get_key_for_token token
 
   index = {}
-  for token in p.find_these
-    index[token] = []
-    idx = p.in_these.indexOf token
+  for query in queries
+    index[query] = []
+    idx = results.indexOf query
     while idx isnt -1
-      index[token].push idx
-      idx = p.in_these.indexOf token, idx+1
+      index[query].push idx
+      idx = results.indexOf query, idx+1
 
   return index
 
@@ -240,7 +335,8 @@ wrap = (tag, content)->
     non_tags = consecutive_where position, content, isnt_tag
     position += non_tags.length
     if non_tags.length isnt 0
-      rendering += "<#{tag}>#{non_tags.join ''}</#{tag}>"
+      val = non_tags.join ''
+      rendering += "<#{tag}>#{val}</#{tag}>" if val.trim()
 
     break if position >= length
     tags = consecutive_where position, content, is_tag
@@ -251,7 +347,7 @@ wrap = (tag, content)->
 
 op_map =
   equal: (op, before_tokens, after_tokens)->
-    before_tokens[op.start_in_before..op.end_in_before].join ''
+    after_tokens[op.start_in_after..op.end_in_after].join ''
 
   insert: (op, before_tokens, after_tokens)->
     val = after_tokens[op.start_in_after..op.end_in_after]
@@ -287,6 +383,7 @@ diff.html_to_tokens = html_to_tokens
 diff.find_matching_blocks = find_matching_blocks
 find_matching_blocks.find_match = find_match
 find_matching_blocks.create_index = create_index
+find_matching_blocks.get_key_for_token = get_key_for_token
 diff.calculate_operations = calculate_operations
 diff.render_operations = render_operations
 
diff --git a/test/diff.spec.coffee b/test/diff.spec.coffee
index 68d0cc9..c0ad77c 100644
--- a/test/diff.spec.coffee
+++ b/test/diff.spec.coffee
@@ -9,9 +9,19 @@ describe 'Diff', ->
     it 'should return the text', ->
       (expect @res).equal 'input text'
 
-  xdescribe 'When a letter is added', ->
+  describe 'When a letter is added', ->
     beforeEach ->
       @res = @cut 'input', 'input 2'
 
     it 'should mark the new letter', ->
-      (expect @res).to.equal 'input <ins>2</ins>'
+      (expect @res).to.equal 'input<ins> 2</ins>'
+
+  describe 'Whitespace differences', ->
+    it 'should collapse adjacent whitespace', ->
+      (expect @cut 'Much \n\t    spaces', 'Much spaces').to.equal 'Much spaces'
+
+    it 'should consider non-breaking spaces as equal', ->
+      (expect @cut 'Hello&nbsp;world', 'Hello&#160;world').to.equal 'Hello&#160;world'
+
+    it 'should consider non-breaking spaces and non-adjacent regular spaces as equal', ->
+      (expect @cut 'Hello&nbsp;world', 'Hello world').to.equal 'Hello world'
diff --git a/test/html_to_tokens.spec.coffee b/test/html_to_tokens.spec.coffee
index ac6c235..a770b18 100644
--- a/test/html_to_tokens.spec.coffee
+++ b/test/html_to_tokens.spec.coffee
@@ -28,3 +28,40 @@ describe 'html_to_tokens', ->
   it 'should identify self closing tags as tokens', ->
     (expect @cut '<p>hello</br>goodbye</p>')
     .eql ['<p>', 'hello', '</br>', 'goodbye', '</p>']
+
+  describe 'when encountering atomic tags', ->
+    it 'should identify an image tag as a single token', ->
+      (expect @cut '<p><img src="1.jpg"><img src="2.jpg"></p>')
+      .eql ['<p>', '<img src="1.jpg">', '<img src="2.jpg">', '</p>']
+
+    it 'should identify an iframe tag as a single token', ->
+      (expect @cut '<p><iframe src="sample.html"></iframe></p>')
+      .eql ['<p>', '<iframe src="sample.html"></iframe>', '</p>']
+
+    it 'should identify an object tag as a single token', ->
+      (expect @cut '<p><object><param name="1" /><param name="2" /></object></p>')
+      .eql ['<p>', '<object><param name="1" /><param name="2" /></object>', '</p>']
+
+    it 'should identify a math tag as a single token', ->
+      (expect @cut '<p><math xmlns="http://www.w3.org/1998/Math/MathML">' +
+        '<mi>&#x03C0;<!-- π --></mi>' +
+        '<mo>&#x2062;<!-- &InvisibleTimes; --></mo>' +
+        '<msup><mi>r</mi><mn>2</mn></msup></math></p>')
+      .eql [
+        '<p>',
+        '<math xmlns="http://www.w3.org/1998/Math/MathML">' +
+            '<mi>&#x03C0;<!-- π --></mi>' +
+            '<mo>&#x2062;<!-- &InvisibleTimes; --></mo>' +
+            '<msup><mi>r</mi><mn>2</mn></msup></math>',
+        '</p>']
+
+    it 'should identify a svg tag as a single token', ->
+      (expect @cut '<p><svg width="100" height="100">' +
+        '<circle cx="50" cy="50" r="40" stroke="green" stroke-width="4" fill="yellow" />' +
+        '</svg></p>')
+      .eql [
+        '<p>',
+        '<svg width="100" height="100">' +
+          '<circle cx="50" cy="50" r="40" stroke="green" stroke-width="4" fill="yellow" />' +
+          '</svg>',
+        '</p>']
diff --git a/test/render_operations.spec.coffee b/test/render_operations.spec.coffee
index a233315..179889a 100644
--- a/test/render_operations.spec.coffee
+++ b/test/render_operations.spec.coffee
@@ -63,3 +63,30 @@ describe 'render_operations', ->
 
       it 'should keep the change inside the <p>', ->
         (expect @res).to.equal '<p><del>this</del><ins>I</ins> is awesome</p>'
+
+  describe 'empty tokens', ->
+    it 'should not be wrapped', ->
+      before = ['text']
+      after = ['text', ' ']
+
+      @res = @cut before, after
+
+      (expect @res).to.equal 'text'
+
+  describe 'tags with attributes', ->
+    it 'should treat attribute changes as equal and output the after tag', ->
+      before = ['<p>', 'this', ' ', 'is', ' ', 'awesome', '</p>']
+      after = ['<p style="margin: 2px;" class="after">', 'this', ' ', 'is', ' ', 'awesome', '</p>']
+
+      @res = @cut before, after
+
+      (expect @res).to.equal '<p style="margin: 2px;" class="after">this is awesome</p>'
+
+    it 'should show changes within tags with different attributes', ->
+      before = ['<p>', 'this', ' ', 'is', ' ', 'awesome', '</p>']
+      after = ['<p style="margin: 2px;" class="after">', 'that', ' ', 'is', ' ', 'awesome', '</p>']
+
+      @res = @cut before, after
+
+      (expect @res).to.equal \
+        '<p style="margin: 2px;" class="after"><del>this</del><ins>that</ins> is awesome</p>'

From fcf51222893da261f2d22eca88e8d8bfa1c90962 Mon Sep 17 00:00:00 2001
From: Keanu Lee <keanu@inkling.com>
Date: Tue, 8 Apr 2014 12:27:45 -0700
Subject: [PATCH 2/2] PR comments

---
 src/htmldiff.coffee             | 5 +++--
 test/html_to_tokens.spec.coffee | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/htmldiff.coffee b/src/htmldiff.coffee
index ec0475c..d746473 100644
--- a/src/htmldiff.coffee
+++ b/src/htmldiff.coffee
@@ -6,7 +6,8 @@ isnt_tag = (token)-> not is_tag token
 
 ###
  * Checks if the current word is the beginning of an atomic tag. An atomic tag is one whose
- * child nodes should not be compared - the entire tag should be treated as one token.
+ * child nodes should not be compared - the entire tag should be treated as one token. This
+ * is useful for tags where it does not make sense to insert <ins> and <del> tags.
  *
  * @param {string} word The characters of the current token read so far.
  *
@@ -118,7 +119,7 @@ html_to_tokens = (html)->
 ###
  * Creates a key that should be used to match tokens. This is useful, for example, if we want
  * to consider two open tag tokens as equal, even if they don't have the same attributes. We
- * use a key instead of overwriting the token because we may want to render original string
+ * use a key instead of overwriting the token because we may want to render the original string
  * without losing the attributes.
  *
  * @param {string} token The token to create the key for.
diff --git a/test/html_to_tokens.spec.coffee b/test/html_to_tokens.spec.coffee
index a770b18..8455d61 100644
--- a/test/html_to_tokens.spec.coffee
+++ b/test/html_to_tokens.spec.coffee
@@ -55,7 +55,7 @@ describe 'html_to_tokens', ->
             '<msup><mi>r</mi><mn>2</mn></msup></math>',
         '</p>']
 
-    it 'should identify a svg tag as a single token', ->
+    it 'should identify an svg tag as a single token', ->
       (expect @cut '<p><svg width="100" height="100">' +
         '<circle cx="50" cy="50" r="40" stroke="green" stroke-width="4" fill="yellow" />' +
         '</svg></p>')