Skip to content

Instantly share code, notes, and snippets.

@matze
Created July 3, 2013 15:55
Show Gist options
  • Save matze/5919701 to your computer and use it in GitHub Desktop.
Save matze/5919701 to your computer and use it in GitHub Desktop.
Diff two texts and markup result with <del> and <ins>.
# -*- coding: utf-8 -*-
import re
import difflib
import codecs
from itertools import chain
_diff_split_re = re.compile(r'(\s+)(?u)')
def text_split(text):
worditer = chain([u''], _diff_split_re.split(text))
return [x + worditer.next() for x in worditer]
def diff(old_text, new_text):
old = text_split(old_text)
new = text_split(new_text)
matcher = difflib.SequenceMatcher(None, old, new)
def wrap(tag, words):
return u'<{0}>{1}</{0}>'.format(tag, u''.join(words))
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
if tag == 'replace':
yield wrap('del', old[i1:i2])
yield wrap('ins', new[j1:j2])
elif tag == 'delete':
yield wrap('del', old[i1:i2])
elif tag == 'insert':
yield wrap('ins', new[j1:j2])
else:
yield u''.join(old[i1:i2])
def merge_lines(seq):
if not seq:
return None
s = u''
merged = []
for line in seq:
if line != u'':
s += line + ' '
else:
merged.append(s)
s = u''
return merged
def render_html_diff(old_text, new_text):
s = u''
old = old_text.split('\n')
new = new_text.split('\n')
for l1, l2 in zip(merge_lines(old), merge_lines(new)):
s += u''.join((w for w in diff(l1, l2))) + u'\n\n'
return s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment