3 """Diff Match and Patch
5 Copyright 2006 Google Inc.
6 http://code.google.com/p/google-diff-match-patch/
8 Licensed under the Apache License, Version 2.0 (the "License");
9 you may not use this file except in compliance with the License.
10 You may obtain a copy of the License at
12 http://www.apache.org/licenses/LICENSE-2.0
14 Unless required by applicable law or agreed to in writing, software
15 distributed under the License is distributed on an "AS IS" BASIS,
16 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 See the License for the specific language governing permissions and
18 limitations under the License.
21 """Functions for diff, match and patch.
23 Computes the difference between two texts to create a patch.
24 Applies the patch onto another text, allowing for errors.
27 __author__ = 'fraser@google.com (Neil Fraser)'
34 class diff_match_patch:
35 """Class containing the diff, match and patch methods.
37 Also contains the behaviour settings.
41 """Inits a diff_match_patch object with default settings.
42 Redefine these in your program to override the defaults.
45 # Number of seconds to map a diff before giving up (0 for infinity).
46 self.Diff_Timeout = 1.0
47 # Cost of an empty edit operation in terms of edit characters.
48 self.Diff_EditCost = 4
49 # The size beyond which the double-ended diff activates.
50 # Double-ending is twice as fast, but less accurate.
51 self.Diff_DualThreshold = 32
52 # At what point is no match declared (0.0 = perfection, 1.0 = very loose).
53 self.Match_Threshold = 0.5
54 # How far to search for a match (0 = exact location, 1000+ = broad match).
55 # A match this many characters away from the expected location will add
56 # 1.0 to the score (0.0 is a perfect match).
57 self.Match_Distance = 1000
58 # When deleting a large block of text (over ~64 characters), how close does
59 # the contents have to match the expected contents. (0.0 = perfection,
60 # 1.0 = very loose). Note that Match_Threshold controls how closely the
61 # end points of a delete need to match.
62 self.Patch_DeleteThreshold = 0.5
63 # Chunk size for context length.
66 # How many bits in a number?
67 # Python has no maximum, thus to disable patch splitting set to 0.
68 # However to avoid long patches in certain pathological cases, use 32.
69 # Multiple short patches (using native ints) are much faster than long ones.
70 self.Match_MaxBits = 32
74 # The data structure representing a diff is an array of tuples:
75 # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")]
76 # which means: delete "Hello", add "Goodbye" and keep " world."
81 def diff_main(self, text1, text2, checklines=True):
82 """Find the differences between two texts. Simplifies the problem by
83 stripping any common prefix or suffix off the texts before diffing.
86 text1: Old string to be diffed.
87 text2: New string to be diffed.
88 checklines: Optional speedup flag. If present and false, then don't run
89 a line-level diff first to identify the changed areas.
90 Defaults to true, which does a faster, slightly less optimal diff.
96 # Check for null inputs.
97 if text1 == None or text2 == None:
98 raise ValueError("Null inputs. (diff_main)")
100 # Check for equality (speedup).
102 return [(self.DIFF_EQUAL, text1)]
104 # Trim off common prefix (speedup).
105 commonlength = self.diff_commonPrefix(text1, text2)
106 commonprefix = text1[:commonlength]
107 text1 = text1[commonlength:]
108 text2 = text2[commonlength:]
110 # Trim off common suffix (speedup).
111 commonlength = self.diff_commonSuffix(text1, text2)
112 if commonlength == 0:
115 commonsuffix = text1[-commonlength:]
116 text1 = text1[:-commonlength]
117 text2 = text2[:-commonlength]
119 # Compute the diff on the middle block.
120 diffs = self.diff_compute(text1, text2, checklines)
122 # Restore the prefix and suffix.
124 diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]
126 diffs.append((self.DIFF_EQUAL, commonsuffix))
127 self.diff_cleanupMerge(diffs)
130 def diff_compute(self, text1, text2, checklines):
131 """Find the differences between two texts. Assumes that the texts do not
132 have any common prefix or suffix.
135 text1: Old string to be diffed.
136 text2: New string to be diffed.
137 checklines: Speedup flag. If false, then don't run a line-level diff
138 first to identify the changed areas.
139 If true, then run a faster, slightly less optimal diff.
145 # Just add some text (speedup).
146 return [(self.DIFF_INSERT, text2)]
149 # Just delete some text (speedup).
150 return [(self.DIFF_DELETE, text1)]
152 if len(text1) > len(text2):
153 (longtext, shorttext) = (text1, text2)
155 (shorttext, longtext) = (text1, text2)
156 i = longtext.find(shorttext)
158 # Shorter text is inside the longer text (speedup).
159 diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext),
160 (self.DIFF_INSERT, longtext[i + len(shorttext):])]
161 # Swap insertions for deletions if diff is reversed.
162 if len(text1) > len(text2):
163 diffs[0] = (self.DIFF_DELETE, diffs[0][1])
164 diffs[2] = (self.DIFF_DELETE, diffs[2][1])
166 longtext = shorttext = None # Garbage collect.
168 # Check to see if the problem can be split in two.
169 hm = self.diff_halfMatch(text1, text2)
171 # A half-match was found, sort out the return data.
172 (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
173 # Send both pairs off for separate processing.
174 diffs_a = self.diff_main(text1_a, text2_a, checklines)
175 diffs_b = self.diff_main(text1_b, text2_b, checklines)
177 return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b
179 # Perform a real diff.
180 if checklines and (len(text1) < 100 or len(text2) < 100):
181 checklines = False # Too trivial for the overhead.
183 # Scan the text on a line-by-line basis first.
184 (text1, text2, linearray) = self.diff_linesToChars(text1, text2)
186 diffs = self.diff_map(text1, text2)
187 if not diffs: # No acceptable result.
188 diffs = [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
190 # Convert the diff back to original text.
191 self.diff_charsToLines(diffs, linearray)
192 # Eliminate freak matches (e.g. blank lines)
193 self.diff_cleanupSemantic(diffs)
195 # Rediff any replacement blocks, this time character-by-character.
196 # Add a dummy entry at the end.
197 diffs.append((self.DIFF_EQUAL, ''))
203 while pointer < len(diffs):
204 if diffs[pointer][0] == self.DIFF_INSERT:
206 text_insert += diffs[pointer][1]
207 elif diffs[pointer][0] == self.DIFF_DELETE:
209 text_delete += diffs[pointer][1]
210 elif diffs[pointer][0] == self.DIFF_EQUAL:
211 # Upon reaching an equality, check for prior redundancies.
212 if count_delete >= 1 and count_insert >= 1:
213 # Delete the offending records and add the merged ones.
214 a = self.diff_main(text_delete, text_insert, False)
215 diffs[pointer - count_delete - count_insert : pointer] = a
216 pointer = pointer - count_delete - count_insert + len(a)
224 diffs.pop() # Remove the dummy entry at the end.
227 def diff_linesToChars(self, text1, text2):
228 """Split two texts into an array of strings. Reduce the texts to a string
229 of hashes where each Unicode character represents one line.
233 text2: Second string.
236 Three element tuple, containing the encoded text1, the encoded text2 and
237 the array of unique strings. The zeroth element of the array of unique
238 strings is intentionally blank.
240 lineArray = [] # e.g. lineArray[4] == "Hello\n"
241 lineHash = {} # e.g. lineHash["Hello\n"] == 4
243 # "\x00" is a valid character, but various debuggers don't like it.
244 # So we'll insert a junk entry to avoid generating a null character.
247 def diff_linesToCharsMunge(text):
248 """Split a text into an array of strings. Reduce the texts to a string
249 of hashes where each Unicode character represents one line.
250 Modifies linearray and linehash through being a closure.
253 text: String to encode.
259 # Walk the text, pulling out a substring for each line.
260 # text.split('\n') would would temporarily double our memory footprint.
261 # Modifying text would create many large strings to garbage collect.
264 while lineEnd < len(text) - 1:
265 lineEnd = text.find('\n', lineStart)
267 lineEnd = len(text) - 1
268 line = text[lineStart:lineEnd + 1]
269 lineStart = lineEnd + 1
272 chars.append(unichr(lineHash[line]))
274 lineArray.append(line)
275 lineHash[line] = len(lineArray) - 1
276 chars.append(unichr(len(lineArray) - 1))
277 return "".join(chars)
279 chars1 = diff_linesToCharsMunge(text1)
280 chars2 = diff_linesToCharsMunge(text2)
281 return (chars1, chars2, lineArray)
283 def diff_charsToLines(self, diffs, lineArray):
284 """Rehydrate the text in a diff from a string of line hashes to real lines
288 diffs: Array of diff tuples.
289 lineArray: Array of unique strings.
291 for x in xrange(len(diffs)):
293 for char in diffs[x][1]:
294 text.append(lineArray[ord(char)])
295 diffs[x] = (diffs[x][0], "".join(text))
297 def diff_map(self, text1, text2):
298 """Explore the intersection points between the two texts.
301 text1: Old string to be diffed.
302 text2: New string to be diffed.
305 Array of diff tuples or None if no diff available.
308 # Unlike in most languages, Python counts time in seconds.
309 s_end = time.time() + self.Diff_Timeout # Don't run for too long.
310 # Cache the text lengths to prevent multiple calls.
311 text1_length = len(text1)
312 text2_length = len(text2)
313 max_d = text1_length + text2_length - 1
314 doubleEnd = self.Diff_DualThreshold * 2 < max_d
315 # Python efficiency note: (x << 32) + y is the fastest way to combine
316 # x and y into a single hashable value. Tested in Python 2.5.
317 # It is unclear why it is faster for v_map[d] to be indexed with an
318 # integer whereas footsteps is indexed with a string.
327 # If the total number of characters is odd, then the front path will
328 # collide with the reverse path.
329 front = (text1_length + text2_length) % 2
330 for d in xrange(max_d):
331 # Bail out if timeout reached.
332 if self.Diff_Timeout > 0 and time.time() > s_end:
335 # Walk the front path one step.
337 for k in xrange(-d, d + 1, 2):
338 if k == -d or k != d and v1[k - 1] < v1[k + 1]:
344 footstep = str((x << 32) + y)
345 if front and footstep in footsteps:
348 footsteps[footstep] = d
350 while (not done and x < text1_length and y < text2_length and
351 text1[x] == text2[y]):
355 footstep = str((x << 32) + y)
356 if front and footstep in footsteps:
359 footsteps[footstep] = d
362 v_map1[d][(x << 32) + y] = True
363 if x == text1_length and y == text2_length:
364 # Reached the end in single-path mode.
365 return self.diff_path1(v_map1, text1, text2)
367 # Front path ran over reverse path.
368 v_map2 = v_map2[:footsteps[footstep] + 1]
369 a = self.diff_path1(v_map1, text1[:x], text2[:y])
370 b = self.diff_path2(v_map2, text1[x:], text2[y:])
374 # Walk the reverse path one step.
376 for k in xrange(-d, d + 1, 2):
377 if k == -d or k != d and v2[k - 1] < v2[k + 1]:
382 footstep = str((text1_length - x << 32) + text2_length - y)
383 if not front and footstep in footsteps:
386 footsteps[footstep] = d
387 while (not done and x < text1_length and y < text2_length and
388 text1[-x - 1] == text2[-y - 1]):
391 footstep = str((text1_length - x << 32) + text2_length - y)
392 if not front and footstep in footsteps:
395 footsteps[footstep] = d
398 v_map2[d][(x << 32) + y] = True
400 # Reverse path ran over front path.
401 v_map1 = v_map1[:footsteps[footstep] + 1]
402 a = self.diff_path1(v_map1, text1[:text1_length - x],
403 text2[:text2_length - y])
404 b = self.diff_path2(v_map2, text1[text1_length - x:],
405 text2[text2_length - y:])
408 # Number of diffs equals number of characters, no commonality at all.
411 def diff_path1(self, v_map, text1, text2):
412 """Work from the middle back to the start to determine the path.
415 v_map: Array of paths.
416 text1: Old string fragment to be diffed.
417 text2: New string fragment to be diffed.
420 Array of diff tuples.
426 for d in xrange(len(v_map) - 2, -1, -1):
428 if (x - 1 << 32) + y in v_map[d]:
430 if last_op == self.DIFF_DELETE:
431 path[0] = (self.DIFF_DELETE, text1[x] + path[0][1])
433 path[:0] = [(self.DIFF_DELETE, text1[x])]
434 last_op = self.DIFF_DELETE
436 elif (x << 32) + y - 1 in v_map[d]:
438 if last_op == self.DIFF_INSERT:
439 path[0] = (self.DIFF_INSERT, text2[y] + path[0][1])
441 path[:0] = [(self.DIFF_INSERT, text2[y])]
442 last_op = self.DIFF_INSERT
447 assert text1[x] == text2[y], ("No diagonal. " +
448 "Can't happen. (diff_path1)")
449 if last_op == self.DIFF_EQUAL:
450 path[0] = (self.DIFF_EQUAL, text1[x] + path[0][1])
452 path[:0] = [(self.DIFF_EQUAL, text1[x])]
453 last_op = self.DIFF_EQUAL
456 def diff_path2(self, v_map, text1, text2):
457 """Work from the middle back to the end to determine the path.
460 v_map: Array of paths.
461 text1: Old string fragment to be diffed.
462 text2: New string fragment to be diffed.
465 Array of diff tuples.
471 for d in xrange(len(v_map) - 2, -1, -1):
473 if (x - 1 << 32) + y in v_map[d]:
475 if last_op == self.DIFF_DELETE:
476 path[-1] = (self.DIFF_DELETE, path[-1][1] + text1[-x - 1])
478 path.append((self.DIFF_DELETE, text1[-x - 1]))
479 last_op = self.DIFF_DELETE
481 elif (x << 32) + y - 1 in v_map[d]:
483 if last_op == self.DIFF_INSERT:
484 path[-1] = (self.DIFF_INSERT, path[-1][1] + text2[-y - 1])
486 path.append((self.DIFF_INSERT, text2[-y - 1]))
487 last_op = self.DIFF_INSERT
492 assert text1[-x - 1] == text2[-y - 1], ("No diagonal. " +
493 "Can't happen. (diff_path2)")
494 if last_op == self.DIFF_EQUAL:
495 path[-1] = (self.DIFF_EQUAL, path[-1][1] + text1[-x - 1])
497 path.append((self.DIFF_EQUAL, text1[-x - 1]))
498 last_op = self.DIFF_EQUAL
501 def diff_commonPrefix(self, text1, text2):
502 """Determine the common prefix of two strings.
506 text2: Second string.
509 The number of characters common to the start of each string.
511 # Quick check for common null cases.
512 if not text1 or not text2 or text1[0] != text2[0]:
515 # Performance analysis: http://neil.fraser.name/news/2007/10/09/
517 pointermax = min(len(text1), len(text2))
518 pointermid = pointermax
520 while pointermin < pointermid:
521 if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:
522 pointermin = pointermid
523 pointerstart = pointermin
525 pointermax = pointermid
526 pointermid = int((pointermax - pointermin) / 2 + pointermin)
529 def diff_commonSuffix(self, text1, text2):
530 """Determine the common suffix of two strings.
534 text2: Second string.
537 The number of characters common to the end of each string.
539 # Quick check for common null cases.
540 if not text1 or not text2 or text1[-1] != text2[-1]:
543 # Performance analysis: http://neil.fraser.name/news/2007/10/09/
545 pointermax = min(len(text1), len(text2))
546 pointermid = pointermax
548 while pointermin < pointermid:
549 if (text1[-pointermid:len(text1) - pointerend] ==
550 text2[-pointermid:len(text2) - pointerend]):
551 pointermin = pointermid
552 pointerend = pointermin
554 pointermax = pointermid
555 pointermid = int((pointermax - pointermin) / 2 + pointermin)
558 def diff_halfMatch(self, text1, text2):
559 """Do the two texts share a substring which is at least half the length of
564 text2: Second string.
567 Five element Array, containing the prefix of text1, the suffix of text1,
568 the prefix of text2, the suffix of text2 and the common middle. Or None
569 if there was no match.
571 if len(text1) > len(text2):
572 (longtext, shorttext) = (text1, text2)
574 (shorttext, longtext) = (text1, text2)
575 if len(longtext) < 10 or len(shorttext) < 1:
576 return None # Pointless.
578 def diff_halfMatchI(longtext, shorttext, i):
579 """Does a substring of shorttext exist within longtext such that the
580 substring is at least half the length of longtext?
581 Closure, but does not reference any external variables.
584 longtext: Longer string.
585 shorttext: Shorter string.
586 i: Start index of quarter length substring within longtext.
589 Five element Array, containing the prefix of longtext, the suffix of
590 longtext, the prefix of shorttext, the suffix of shorttext and the
591 common middle. Or None if there was no match.
593 seed = longtext[i:i + len(longtext) / 4]
595 j = shorttext.find(seed)
597 prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])
598 suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])
599 if len(best_common) < suffixLength + prefixLength:
600 best_common = (shorttext[j - suffixLength:j] +
601 shorttext[j:j + prefixLength])
602 best_longtext_a = longtext[:i - suffixLength]
603 best_longtext_b = longtext[i + prefixLength:]
604 best_shorttext_a = shorttext[:j - suffixLength]
605 best_shorttext_b = shorttext[j + prefixLength:]
606 j = shorttext.find(seed, j + 1)
608 if len(best_common) >= len(longtext) / 2:
609 return (best_longtext_a, best_longtext_b,
610 best_shorttext_a, best_shorttext_b, best_common)
614 # First check if the second quarter is the seed for a half-match.
615 hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) / 4)
616 # Check again based on the third quarter.
617 hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) / 2)
618 if not hm1 and not hm2:
625 # Both matched. Select the longest.
626 if len(hm1[4]) > len(hm2[4]):
631 # A half-match was found, sort out the return data.
632 if len(text1) > len(text2):
633 (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
635 (text2_a, text2_b, text1_a, text1_b, mid_common) = hm
636 return (text1_a, text1_b, text2_a, text2_b, mid_common)
638 def diff_cleanupSemantic(self, diffs):
639 """Reduce the number of edits by eliminating semantically trivial
643 diffs: Array of diff tuples.
646 equalities = [] # Stack of indices where equalities are found.
647 lastequality = None # Always equal to equalities[-1][1]
648 pointer = 0 # Index of current position.
649 length_changes1 = 0 # Number of chars that changed prior to the equality.
650 length_changes2 = 0 # Number of chars that changed after the equality.
651 while pointer < len(diffs):
652 if diffs[pointer][0] == self.DIFF_EQUAL: # equality found
653 equalities.append(pointer)
654 length_changes1 = length_changes2
656 lastequality = diffs[pointer][1]
657 else: # an insertion or deletion
658 length_changes2 += len(diffs[pointer][1])
659 if (lastequality != None and (len(lastequality) <= length_changes1) and
660 (len(lastequality) <= length_changes2)):
662 diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
663 # Change second copy to insert.
664 diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
665 diffs[equalities[-1] + 1][1])
666 # Throw away the equality we just deleted.
668 # Throw away the previous equality (it needs to be reevaluated).
669 if len(equalities) != 0:
672 pointer = equalities[-1]
675 length_changes1 = 0 # Reset the counters.
682 self.diff_cleanupMerge(diffs)
684 self.diff_cleanupSemanticLossless(diffs)
686 def diff_cleanupSemanticLossless(self, diffs):
687 """Look for single edits surrounded on both sides by equalities
688 which can be shifted sideways to align the edit to a word boundary.
689 e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
692 diffs: Array of diff tuples.
695 def diff_cleanupSemanticScore(one, two):
696 """Given two strings, compute a score representing whether the
697 internal boundary falls on logical boundaries.
698 Scores range from 5 (best) to 0 (worst).
699 Closure, but does not reference any external variables.
708 if not one or not two:
709 # Edges are the best.
712 # Each port of this function behaves slightly differently due to
713 # subtle differences in each language's definition of things like
714 # 'whitespace'. Since this function's purpose is largely cosmetic,
715 # the choice has been made to use each language's native features
716 # rather than force total conformity.
718 # One point for non-alphanumeric.
719 if not one[-1].isalnum() or not two[0].isalnum():
721 # Two points for whitespace.
722 if one[-1].isspace() or two[0].isspace():
724 # Three points for line breaks.
725 if (one[-1] == "\r" or one[-1] == "\n" or
726 two[0] == "\r" or two[0] == "\n"):
728 # Four points for blank lines.
729 if (re.search("\\n\\r?\\n$", one) or
730 re.match("^\\r?\\n\\r?\\n", two)):
735 # Intentionally ignore the first and last element (don't need checking).
736 while pointer < len(diffs) - 1:
737 if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
738 diffs[pointer + 1][0] == self.DIFF_EQUAL):
739 # This is a single edit surrounded by equalities.
740 equality1 = diffs[pointer - 1][1]
741 edit = diffs[pointer][1]
742 equality2 = diffs[pointer + 1][1]
744 # First, shift the edit as far left as possible.
745 commonOffset = self.diff_commonSuffix(equality1, edit)
747 commonString = edit[-commonOffset:]
748 equality1 = equality1[:-commonOffset]
749 edit = commonString + edit[:-commonOffset]
750 equality2 = commonString + equality2
752 # Second, step character by character right, looking for the best fit.
753 bestEquality1 = equality1
755 bestEquality2 = equality2
756 bestScore = (diff_cleanupSemanticScore(equality1, edit) +
757 diff_cleanupSemanticScore(edit, equality2))
758 while edit and equality2 and edit[0] == equality2[0]:
760 edit = edit[1:] + equality2[0]
761 equality2 = equality2[1:]
762 score = (diff_cleanupSemanticScore(equality1, edit) +
763 diff_cleanupSemanticScore(edit, equality2))
764 # The >= encourages trailing rather than leading whitespace on edits.
765 if score >= bestScore:
767 bestEquality1 = equality1
769 bestEquality2 = equality2
771 if diffs[pointer - 1][1] != bestEquality1:
772 # We have an improvement, save it back to the diff.
774 diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1)
776 del diffs[pointer - 1]
778 diffs[pointer] = (diffs[pointer][0], bestEdit)
780 diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2)
782 del diffs[pointer + 1]
786 def diff_cleanupEfficiency(self, diffs):
787 """Reduce the number of edits by eliminating operationally trivial
791 diffs: Array of diff tuples.
794 equalities = [] # Stack of indices where equalities are found.
795 lastequality = '' # Always equal to equalities[-1][1]
796 pointer = 0 # Index of current position.
797 pre_ins = False # Is there an insertion operation before the last equality.
798 pre_del = False # Is there a deletion operation before the last equality.
799 post_ins = False # Is there an insertion operation after the last equality.
800 post_del = False # Is there a deletion operation after the last equality.
801 while pointer < len(diffs):
802 if diffs[pointer][0] == self.DIFF_EQUAL: # equality found
803 if (len(diffs[pointer][1]) < self.Diff_EditCost and
804 (post_ins or post_del)):
806 equalities.append(pointer)
809 lastequality = diffs[pointer][1]
811 # Not a candidate, and can never become one.
815 post_ins = post_del = False
816 else: # an insertion or deletion
817 if diffs[pointer][0] == self.DIFF_DELETE:
822 # Five types to be split:
823 # <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
824 # <ins>A</ins>X<ins>C</ins><del>D</del>
825 # <ins>A</ins><del>B</del>X<ins>C</ins>
826 # <ins>A</del>X<ins>C</ins><del>D</del>
827 # <ins>A</ins><del>B</del>X<del>C</del>
829 if lastequality and ((pre_ins and pre_del and post_ins and post_del) or
830 ((len(lastequality) < self.Diff_EditCost / 2) and
831 (pre_ins + pre_del + post_ins + post_del) == 3)):
833 diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
834 # Change second copy to insert.
835 diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
836 diffs[equalities[-1] + 1][1])
837 equalities.pop() # Throw away the equality we just deleted
839 if pre_ins and pre_del:
840 # No changes made which could affect previous entry, keep going.
841 post_ins = post_del = True
845 equalities.pop() # Throw away the previous equality
847 pointer = equalities[-1]
850 post_ins = post_del = False
855 self.diff_cleanupMerge(diffs)
857 def diff_cleanupMerge(self, diffs):
858 """Reorder and merge like edit sections. Merge equalities.
859 Any edit section can move as long as it doesn't cross an equality.
862 diffs: Array of diff tuples.
864 diffs.append((self.DIFF_EQUAL, '')) # Add a dummy entry at the end.
870 while pointer < len(diffs):
871 if diffs[pointer][0] == self.DIFF_INSERT:
873 text_insert += diffs[pointer][1]
875 elif diffs[pointer][0] == self.DIFF_DELETE:
877 text_delete += diffs[pointer][1]
879 elif diffs[pointer][0] == self.DIFF_EQUAL:
880 # Upon reaching an equality, check for prior redundancies.
881 if count_delete != 0 or count_insert != 0:
882 if count_delete != 0 and count_insert != 0:
883 # Factor out any common prefixies.
884 commonlength = self.diff_commonPrefix(text_insert, text_delete)
885 if commonlength != 0:
886 x = pointer - count_delete - count_insert - 1
887 if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:
888 diffs[x] = (diffs[x][0], diffs[x][1] +
889 text_insert[:commonlength])
891 diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength]))
893 text_insert = text_insert[commonlength:]
894 text_delete = text_delete[commonlength:]
895 # Factor out any common suffixies.
896 commonlength = self.diff_commonSuffix(text_insert, text_delete)
897 if commonlength != 0:
898 diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] +
900 text_insert = text_insert[:-commonlength]
901 text_delete = text_delete[:-commonlength]
902 # Delete the offending records and add the merged ones.
903 if count_delete == 0:
904 diffs[pointer - count_insert : pointer] = [
905 (self.DIFF_INSERT, text_insert)]
906 elif count_insert == 0:
907 diffs[pointer - count_delete : pointer] = [
908 (self.DIFF_DELETE, text_delete)]
910 diffs[pointer - count_delete - count_insert : pointer] = [
911 (self.DIFF_DELETE, text_delete),
912 (self.DIFF_INSERT, text_insert)]
913 pointer = pointer - count_delete - count_insert + 1
914 if count_delete != 0:
916 if count_insert != 0:
918 elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:
919 # Merge this equality with the previous one.
920 diffs[pointer - 1] = (diffs[pointer - 1][0],
921 diffs[pointer - 1][1] + diffs[pointer][1])
931 if diffs[-1][1] == '':
932 diffs.pop() # Remove the dummy entry at the end.
934 # Second pass: look for single edits surrounded on both sides by equalities
935 # which can be shifted sideways to eliminate an equality.
936 # e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
939 # Intentionally ignore the first and last element (don't need checking).
940 while pointer < len(diffs) - 1:
941 if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
942 diffs[pointer + 1][0] == self.DIFF_EQUAL):
943 # This is a single edit surrounded by equalities.
944 if diffs[pointer][1].endswith(diffs[pointer - 1][1]):
945 # Shift the edit over the previous equality.
946 diffs[pointer] = (diffs[pointer][0],
947 diffs[pointer - 1][1] +
948 diffs[pointer][1][:-len(diffs[pointer - 1][1])])
949 diffs[pointer + 1] = (diffs[pointer + 1][0],
950 diffs[pointer - 1][1] + diffs[pointer + 1][1])
951 del diffs[pointer - 1]
953 elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):
954 # Shift the edit over the next equality.
955 diffs[pointer - 1] = (diffs[pointer - 1][0],
956 diffs[pointer - 1][1] + diffs[pointer + 1][1])
957 diffs[pointer] = (diffs[pointer][0],
958 diffs[pointer][1][len(diffs[pointer + 1][1]):] +
959 diffs[pointer + 1][1])
960 del diffs[pointer + 1]
964 # If shifts were made, the diff needs reordering and another shift sweep.
966 self.diff_cleanupMerge(diffs)
968 def diff_xIndex(self, diffs, loc):
969 """loc is a location in text1, compute and return the equivalent location
970 in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8
973 diffs: Array of diff tuples.
974 loc: Location within text1.
977 Location within text2.
983 for x in xrange(len(diffs)):
984 (op, text) = diffs[x]
985 if op != self.DIFF_INSERT: # Equality or deletion.
987 if op != self.DIFF_DELETE: # Equality or insertion.
989 if chars1 > loc: # Overshot the location.
994 if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:
995 # The location was deleted.
997 # Add the remaining len(character).
998 return last_chars2 + (loc - last_chars1)
1000 def diff_prettyHtml(self, diffs):
1001 """Convert a diff array into a pretty HTML report.
1004 diffs: Array of diff tuples.
1007 HTML representation.
1011 for (op, data) in diffs:
1012 text = (data.replace("&", "&").replace("<", "<")
1013 .replace(">", ">").replace("\n", "¶<BR>"))
1014 if op == self.DIFF_INSERT:
1015 html.append("<INS STYLE=\"background:#E6FFE6;\" TITLE=\"i=%i\">%s</INS>"
1017 elif op == self.DIFF_DELETE:
1018 html.append("<DEL STYLE=\"background:#FFE6E6;\" TITLE=\"i=%i\">%s</DEL>"
1020 elif op == self.DIFF_EQUAL:
1021 html.append("<SPAN TITLE=\"i=%i\">%s</SPAN>" % (i, text))
1022 if op != self.DIFF_DELETE:
1024 return "".join(html)
1026 def diff_text1(self, diffs):
1027 """Compute and return the source text (all equalities and deletions).
1030 diffs: Array of diff tuples.
1036 for (op, data) in diffs:
1037 if op != self.DIFF_INSERT:
1039 return "".join(text)
1041 def diff_text2(self, diffs):
1042 """Compute and return the destination text (all equalities and insertions).
1045 diffs: Array of diff tuples.
1051 for (op, data) in diffs:
1052 if op != self.DIFF_DELETE:
1054 return "".join(text)
1056 def diff_levenshtein(self, diffs):
1057 """Compute the Levenshtein distance; the number of inserted, deleted or
1058 substituted characters.
1061 diffs: Array of diff tuples.
1069 for (op, data) in diffs:
1070 if op == self.DIFF_INSERT:
1071 insertions += len(data)
1072 elif op == self.DIFF_DELETE:
1073 deletions += len(data)
1074 elif op == self.DIFF_EQUAL:
1075 # A deletion and an insertion is one substitution.
1076 levenshtein += max(insertions, deletions)
1079 levenshtein += max(insertions, deletions)
1082 def diff_toDelta(self, diffs):
1083 """Crush the diff into an encoded string which describes the operations
1084 required to transform text1 into text2.
1085 E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.
1086 Operations are tab-separated. Inserted text is escaped using %xx notation.
1089 diffs: Array of diff tuples.
1095 for (op, data) in diffs:
1096 if op == self.DIFF_INSERT:
1097 # High ascii will raise UnicodeDecodeError. Use Unicode instead.
1098 data = data.encode("utf-8")
1099 text.append("+" + urllib.quote(data, "!~*'();/?:@&=+$,# "))
1100 elif op == self.DIFF_DELETE:
1101 text.append("-%d" % len(data))
1102 elif op == self.DIFF_EQUAL:
1103 text.append("=%d" % len(data))
1104 return "\t".join(text)
1106 def diff_fromDelta(self, text1, delta):
1107 """Given the original text1, and an encoded string which describes the
1108 operations required to transform text1 into text2, compute the full diff.
1111 text1: Source string for the diff.
1115 Array of diff tuples.
1118 ValueError: If invalid input.
1120 if type(delta) == unicode:
1121 # Deltas should be composed of a subset of ascii chars, Unicode not
1122 # required. If this encode raises UnicodeEncodeError, delta is invalid.
1123 delta = delta.encode("ascii")
1125 pointer = 0 # Cursor in text1
1126 tokens = delta.split("\t")
1127 for token in tokens:
1129 # Blank tokens are ok (from a trailing \t).
1131 # Each token begins with a one character parameter which specifies the
1132 # operation of this token (delete, insert, equality).
1135 param = urllib.unquote(param).decode("utf-8")
1136 diffs.append((self.DIFF_INSERT, param))
1137 elif token[0] == "-" or token[0] == "=":
1141 raise ValueError("Invalid number in diff_fromDelta: " + param)
1143 raise ValueError("Negative number in diff_fromDelta: " + param)
1144 text = text1[pointer : pointer + n]
1147 diffs.append((self.DIFF_EQUAL, text))
1149 diffs.append((self.DIFF_DELETE, text))
1151 # Anything else is an error.
1152 raise ValueError("Invalid diff operation in diff_fromDelta: " +
1154 if pointer != len(text1):
1156 "Delta length (%d) does not equal source text length (%d)." %
1157 (pointer, len(text1)))
1162 def match_main(self, text, pattern, loc):
1163 """Locate the best instance of 'pattern' in 'text' near 'loc'.
1166 text: The text to search.
1167 pattern: The pattern to search for.
1168 loc: The location to search around.
1171 Best match index or -1.
1173 # Check for null inputs.
1174 if text == None or pattern == None:
1175 raise ValueError("Null inputs. (match_main)")
1177 loc = max(0, min(loc, len(text)))
1179 # Shortcut (potentially not guaranteed by the algorithm)
1184 elif text[loc:loc + len(pattern)] == pattern:
1185 # Perfect match at the perfect spot! (Includes case of null pattern)
1188 # Do a fuzzy compare.
1189 match = self.match_bitap(text, pattern, loc)
1192 def match_bitap(self, text, pattern, loc):
1193 """Locate the best instance of 'pattern' in 'text' near 'loc' using the
1197 text: The text to search.
1198 pattern: The pattern to search for.
1199 loc: The location to search around.
1202 Best match index or -1.
1204 # Python doesn't have a maxint limit, so ignore this check.
1205 #if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits:
1206 # raise ValueError("Pattern too long for this application.")
1208 # Initialise the alphabet.
1209 s = self.match_alphabet(pattern)
1211 def match_bitapScore(e, x):
1212 """Compute and return the score for a match with e errors and x location.
1213 Accesses loc and pattern through being a closure.
1216 e: Number of errors in match.
1217 x: Location of match.
1220 Overall score for match (0.0 = good, 1.0 = bad).
1222 accuracy = float(e) / len(pattern)
1223 proximity = abs(loc - x)
1224 if not self.Match_Distance:
1225 # Dodge divide by zero error.
1226 return proximity and 1.0 or accuracy
1227 return accuracy + (proximity / float(self.Match_Distance))
1229 # Highest score beyond which we give up.
1230 score_threshold = self.Match_Threshold
1231 # Is there a nearby exact match? (speedup)
1232 best_loc = text.find(pattern, loc)
1234 score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
1235 # What about in the other direction? (speedup)
1236 best_loc = text.rfind(pattern, loc + len(pattern))
1238 score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
1240 # Initialise the bit arrays.
1241 matchmask = 1 << (len(pattern) - 1)
1244 bin_max = len(pattern) + len(text)
1245 # Empty initialization added to appease pychecker.
1247 for d in xrange(len(pattern)):
1248 # Scan for the best match each iteration allows for one more error.
1249 # Run a binary search to determine how far from 'loc' we can stray at
1253 while bin_min < bin_mid:
1254 if match_bitapScore(d, loc + bin_mid) <= score_threshold:
1258 bin_mid = (bin_max - bin_min) / 2 + bin_min
1260 # Use the result from this iteration as the maximum for the next.
1262 start = max(1, loc - bin_mid + 1)
1263 finish = min(loc + bin_mid, len(text)) + len(pattern)
1265 rd = range(finish + 1)
1266 rd.append((1 << d) - 1)
1267 for j in xrange(finish, start - 1, -1):
1268 if len(text) <= j - 1:
1272 charMatch = s.get(text[j - 1], 0)
1273 if d == 0: # First pass: exact match.
1274 rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
1275 else: # Subsequent passes: fuzzy match.
1276 rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | (
1277 ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]
1278 if rd[j] & matchmask:
1279 score = match_bitapScore(d, j - 1)
1280 # This match will almost certainly be better than any existing match.
1282 if score <= score_threshold:
1284 score_threshold = score
1287 # When passing loc, don't exceed our current distance from loc.
1288 start = max(1, 2 * loc - best_loc)
1290 # Already passed loc, downhill from here on in.
1292 # No hope for a (better) match at greater error levels.
1293 if match_bitapScore(d + 1, loc) > score_threshold:
1298 def match_alphabet(self, pattern):
1299 """Initialise the alphabet for the Bitap algorithm.
1302 pattern: The text to encode.
1305 Hash of character locations.
1308 for char in pattern:
1310 for i in xrange(len(pattern)):
1311 s[pattern[i]] |= 1 << (len(pattern) - i - 1)
1316 def patch_addContext(self, patch, text):
1317 """Increase the context until it is unique,
1318 but don't let the pattern expand beyond Match_MaxBits.
1321 patch: The patch to grow.
1326 pattern = text[patch.start2 : patch.start2 + patch.length1]
1329 # Look for the first and last matches of pattern in text. If two different
1330 # matches are found, increase the pattern length.
1331 while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits ==
1332 0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin -
1333 self.Patch_Margin)):
1334 padding += self.Patch_Margin
1335 pattern = text[max(0, patch.start2 - padding) :
1336 patch.start2 + patch.length1 + padding]
1337 # Add one chunk for good luck.
1338 padding += self.Patch_Margin
1341 prefix = text[max(0, patch.start2 - padding) : patch.start2]
1343 patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]
1345 suffix = text[patch.start2 + patch.length1 :
1346 patch.start2 + patch.length1 + padding]
1348 patch.diffs.append((self.DIFF_EQUAL, suffix))
1350 # Roll back the start points.
1351 patch.start1 -= len(prefix)
1352 patch.start2 -= len(prefix)
1354 patch.length1 += len(prefix) + len(suffix)
1355 patch.length2 += len(prefix) + len(suffix)
1357 def patch_make(self, a, b=None, c=None):
1358 """Compute a list of patches to turn text1 into text2.
1359 Use diffs if provided, otherwise compute it ourselves.
1360 There are four ways to call this function, depending on what data is
1361 available to the caller:
1363 a = text1, b = text2
1367 a = text1, b = diffs
1368 Method 4 (deprecated, use method 3):
1369 a = text1, b = text2, c = diffs
1372 a: text1 (methods 1,3,4) or Array of diff tuples for text1 to
1374 b: text2 (methods 1,4) or Array of diff tuples for text1 to
1375 text2 (method 3) or undefined (method 2).
1376 c: Array of diff tuples for text1 to text2 (method 4) or
1377 undefined (methods 1,2,3).
1380 Array of patch objects.
1384 # Note that texts may arrive as 'str' or 'unicode'.
1385 if isinstance(a, basestring) and isinstance(b, basestring) and c is None:
1386 # Method 1: text1, text2
1387 # Compute diffs from text1 and text2.
1389 diffs = self.diff_main(text1, b, True)
1391 self.diff_cleanupSemantic(diffs)
1392 self.diff_cleanupEfficiency(diffs)
1393 elif isinstance(a, list) and b is None and c is None:
1395 # Compute text1 from diffs.
1397 text1 = self.diff_text1(diffs)
1398 elif isinstance(a, basestring) and isinstance(b, list) and c is None:
1399 # Method 3: text1, diffs
1402 elif (isinstance(a, basestring) and isinstance(b, basestring) and
1403 isinstance(c, list)):
1404 # Method 4: text1, text2, diffs
1405 # text2 is not used.
1409 raise ValueError("Unknown call format to patch_make.")
1412 return [] # Get rid of the None case.
1415 char_count1 = 0 # Number of characters into the text1 string.
1416 char_count2 = 0 # Number of characters into the text2 string.
1417 prepatch_text = text1 # Recreate the patches to determine context info.
1418 postpatch_text = text1
1419 for x in xrange(len(diffs)):
1420 (diff_type, diff_text) = diffs[x]
1421 if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:
1422 # A new patch starts here.
1423 patch.start1 = char_count1
1424 patch.start2 = char_count2
1425 if diff_type == self.DIFF_INSERT:
1427 patch.diffs.append(diffs[x])
1428 patch.length2 += len(diff_text)
1429 postpatch_text = (postpatch_text[:char_count2] + diff_text +
1430 postpatch_text[char_count2:])
1431 elif diff_type == self.DIFF_DELETE:
1433 patch.length1 += len(diff_text)
1434 patch.diffs.append(diffs[x])
1435 postpatch_text = (postpatch_text[:char_count2] +
1436 postpatch_text[char_count2 + len(diff_text):])
1437 elif (diff_type == self.DIFF_EQUAL and
1438 len(diff_text) <= 2 * self.Patch_Margin and
1439 len(patch.diffs) != 0 and len(diffs) != x + 1):
1440 # Small equality inside a patch.
1441 patch.diffs.append(diffs[x])
1442 patch.length1 += len(diff_text)
1443 patch.length2 += len(diff_text)
1445 if (diff_type == self.DIFF_EQUAL and
1446 len(diff_text) >= 2 * self.Patch_Margin):
1447 # Time for a new patch.
1448 if len(patch.diffs) != 0:
1449 self.patch_addContext(patch, prepatch_text)
1450 patches.append(patch)
1452 # Unlike Unidiff, our patch lists have a rolling context.
1453 # http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
1454 # Update prepatch text & pos to reflect the application of the
1455 # just completed patch.
1456 prepatch_text = postpatch_text
1457 char_count1 = char_count2
1459 # Update the current character count.
1460 if diff_type != self.DIFF_INSERT:
1461 char_count1 += len(diff_text)
1462 if diff_type != self.DIFF_DELETE:
1463 char_count2 += len(diff_text)
1465 # Pick up the leftover patch if not empty.
1466 if len(patch.diffs) != 0:
1467 self.patch_addContext(patch, prepatch_text)
1468 patches.append(patch)
1471 def patch_deepCopy(self, patches):
1472 """Given an array of patches, return another array that is identical.
1475 patches: Array of patch objects.
1478 Array of patch objects.
1481 for patch in patches:
1482 patchCopy = patch_obj()
1483 # No need to deep copy the tuples since they are immutable.
1484 patchCopy.diffs = patch.diffs[:]
1485 patchCopy.start1 = patch.start1
1486 patchCopy.start2 = patch.start2
1487 patchCopy.length1 = patch.length1
1488 patchCopy.length2 = patch.length2
1489 patchesCopy.append(patchCopy)
1492 def patch_apply(self, patches, text):
1493 """Merge a set of patches onto the text. Return a patched text, as well
1494 as a list of true/false values indicating which patches were applied.
1497 patches: Array of patch objects.
1501 Two element Array, containing the new text and an array of boolean values.
1506 # Deep copy the patches so that no changes are made to originals.
1507 patches = self.patch_deepCopy(patches)
1509 nullPadding = self.patch_addPadding(patches)
1510 text = nullPadding + text + nullPadding
1511 self.patch_splitMax(patches)
1513 # delta keeps track of the offset between the expected and actual location
1514 # of the previous patch. If there are patches expected at positions 10 and
1515 # 20, but the first patch was found at 12, delta is 2 and the second patch
1516 # has an effective expected position of 22.
1519 for patch in patches:
1520 expected_loc = patch.start2 + delta
1521 text1 = self.diff_text1(patch.diffs)
1523 if len(text1) > self.Match_MaxBits:
1524 # patch_splitMax will only provide an oversized pattern in the case of
1526 start_loc = self.match_main(text, text1[:self.Match_MaxBits],
1529 end_loc = self.match_main(text, text1[-self.Match_MaxBits:],
1530 expected_loc + len(text1) - self.Match_MaxBits)
1531 if end_loc == -1 or start_loc >= end_loc:
1532 # Can't find valid trailing context. Drop this patch.
1535 start_loc = self.match_main(text, text1, expected_loc)
1537 # No match found. :(
1538 results.append(False)
1539 # Subtract the delta for this failed patch from subsequent patches.
1540 delta -= patch.length2 - patch.length1
1543 results.append(True)
1544 delta = start_loc - expected_loc
1546 text2 = text[start_loc : start_loc + len(text1)]
1548 text2 = text[start_loc : end_loc + self.Match_MaxBits]
1550 # Perfect match, just shove the replacement text in.
1551 text = (text[:start_loc] + self.diff_text2(patch.diffs) +
1552 text[start_loc + len(text1):])
1555 # Run a diff to get a framework of equivalent indices.
1556 diffs = self.diff_main(text1, text2, False)
1557 if (len(text1) > self.Match_MaxBits and
1558 self.diff_levenshtein(diffs) / float(len(text1)) >
1559 self.Patch_DeleteThreshold):
1560 # The end points match, but the content is unacceptably bad.
1563 self.diff_cleanupSemanticLossless(diffs)
1565 for (op, data) in patch.diffs:
1566 if op != self.DIFF_EQUAL:
1567 index2 = self.diff_xIndex(diffs, index1)
1568 if op == self.DIFF_INSERT: # Insertion
1569 text = text[:start_loc + index2] + data + text[start_loc +
1571 elif op == self.DIFF_DELETE: # Deletion
1572 text = text[:start_loc + index2] + text[start_loc +
1573 self.diff_xIndex(diffs, index1 + len(data)):]
1574 if op != self.DIFF_DELETE:
1576 # Strip the padding off.
1577 text = text[len(nullPadding):-len(nullPadding)]
1578 return (text, results)
1580 def patch_addPadding(self, patches):
1581 """Add some padding on text start and end so that edges can match
1582 something. Intended to be called only from within patch_apply.
1585 patches: Array of patch objects.
1588 The padding string added to each side.
1590 paddingLength = self.Patch_Margin
1592 for x in xrange(1, paddingLength + 1):
1593 nullPadding += chr(x)
1595 # Bump all the patches forward.
1596 for patch in patches:
1597 patch.start1 += paddingLength
1598 patch.start2 += paddingLength
1600 # Add some padding on start of first diff.
1603 if not diffs or diffs[0][0] != self.DIFF_EQUAL:
1604 # Add nullPadding equality.
1605 diffs.insert(0, (self.DIFF_EQUAL, nullPadding))
1606 patch.start1 -= paddingLength # Should be 0.
1607 patch.start2 -= paddingLength # Should be 0.
1608 patch.length1 += paddingLength
1609 patch.length2 += paddingLength
1610 elif paddingLength > len(diffs[0][1]):
1611 # Grow first equality.
1612 extraLength = paddingLength - len(diffs[0][1])
1613 newText = nullPadding[len(diffs[0][1]):] + diffs[0][1]
1614 diffs[0] = (diffs[0][0], newText)
1615 patch.start1 -= extraLength
1616 patch.start2 -= extraLength
1617 patch.length1 += extraLength
1618 patch.length2 += extraLength
1620 # Add some padding on end of last diff.
1623 if not diffs or diffs[-1][0] != self.DIFF_EQUAL:
1624 # Add nullPadding equality.
1625 diffs.append((self.DIFF_EQUAL, nullPadding))
1626 patch.length1 += paddingLength
1627 patch.length2 += paddingLength
1628 elif paddingLength > len(diffs[-1][1]):
1629 # Grow last equality.
1630 extraLength = paddingLength - len(diffs[-1][1])
1631 newText = diffs[-1][1] + nullPadding[:extraLength]
1632 diffs[-1] = (diffs[-1][0], newText)
1633 patch.length1 += extraLength
1634 patch.length2 += extraLength
1638 def patch_splitMax(self, patches):
1639 """Look through the patches and break up any which are longer than the
1640 maximum limit of the match algorithm.
1643 patches: Array of patch objects.
1645 if self.Match_MaxBits == 0:
1647 for x in xrange(len(patches)):
1648 if patches[x].length1 > self.Match_MaxBits:
1649 bigpatch = patches[x]
1650 # Remove the big old patch.
1653 patch_size = self.Match_MaxBits
1654 start1 = bigpatch.start1
1655 start2 = bigpatch.start2
1657 while len(bigpatch.diffs) != 0:
1658 # Create one of several smaller patches.
1661 patch.start1 = start1 - len(precontext)
1662 patch.start2 = start2 - len(precontext)
1664 patch.length1 = patch.length2 = len(precontext)
1665 patch.diffs.append((self.DIFF_EQUAL, precontext))
1667 while (len(bigpatch.diffs) != 0 and
1668 patch.length1 < patch_size - self.Patch_Margin):
1669 (diff_type, diff_text) = bigpatch.diffs[0]
1670 if diff_type == self.DIFF_INSERT:
1671 # Insertions are harmless.
1672 patch.length2 += len(diff_text)
1673 start2 += len(diff_text)
1674 patch.diffs.append(bigpatch.diffs.pop(0))
1676 elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and
1677 patch.diffs[0][0] == self.DIFF_EQUAL and
1678 len(diff_text) > 2 * patch_size):
1679 # This is a large deletion. Let it pass in one chunk.
1680 patch.length1 += len(diff_text)
1681 start1 += len(diff_text)
1683 patch.diffs.append((diff_type, diff_text))
1684 del bigpatch.diffs[0]
1686 # Deletion or equality. Only take as much as we can stomach.
1687 diff_text = diff_text[:patch_size - patch.length1 -
1689 patch.length1 += len(diff_text)
1690 start1 += len(diff_text)
1691 if diff_type == self.DIFF_EQUAL:
1692 patch.length2 += len(diff_text)
1693 start2 += len(diff_text)
1697 patch.diffs.append((diff_type, diff_text))
1698 if diff_text == bigpatch.diffs[0][1]:
1699 del bigpatch.diffs[0]
1701 bigpatch.diffs[0] = (bigpatch.diffs[0][0],
1702 bigpatch.diffs[0][1][len(diff_text):])
1704 # Compute the head context for the next patch.
1705 precontext = self.diff_text2(patch.diffs)
1706 precontext = precontext[-self.Patch_Margin:]
1707 # Append the end context for this patch.
1708 postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin]
1710 patch.length1 += len(postcontext)
1711 patch.length2 += len(postcontext)
1712 if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
1713 patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +
1716 patch.diffs.append((self.DIFF_EQUAL, postcontext))
1720 patches.insert(x, patch)
1722 def patch_toText(self, patches):
1723 """Take a list of patches and return a textual representation.
1726 patches: Array of patch objects.
1729 Text representation of patches.
1732 for patch in patches:
1733 text.append(str(patch))
1734 return "".join(text)
1736 def patch_fromText(self, textline):
1737 """Parse a textual representation of patches and return a list of patch
1741 textline: Text representation of patches.
1744 Array of patch objects.
1747 ValueError: If invalid input.
1749 if type(textline) == unicode:
1750 # Patches should be composed of a subset of ascii chars, Unicode not
1751 # required. If this encode raises UnicodeEncodeError, patch is invalid.
1752 textline = textline.encode("ascii")
1756 text = textline.split('\n')
1757 while len(text) != 0:
1758 m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])
1760 raise ValueError("Invalid patch string: " + text[0])
1762 patches.append(patch)
1763 patch.start1 = int(m.group(1))
1764 if m.group(2) == '':
1767 elif m.group(2) == '0':
1771 patch.length1 = int(m.group(2))
1773 patch.start2 = int(m.group(3))
1774 if m.group(4) == '':
1777 elif m.group(4) == '0':
1781 patch.length2 = int(m.group(4))
1785 while len(text) != 0:
1790 line = urllib.unquote(text[0][1:])
1791 line = line.decode("utf-8")
1794 patch.diffs.append((self.DIFF_INSERT, line))
1797 patch.diffs.append((self.DIFF_DELETE, line))
1800 patch.diffs.append((self.DIFF_EQUAL, line))
1802 # Start of next patch.
1805 # Blank line? Whatever.
1809 raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line))
1815 """Class representing one patch operation.
1819 """Initializes with an empty list of diffs.
1828 """Emmulate GNU diff's format.
1829 Header: @@ -382,8 +481,9 @@
1830 Indicies are printed as 1-based, not 0-based.
1833 The GNU diff string.
1835 if self.length1 == 0:
1836 coords1 = str(self.start1) + ",0"
1837 elif self.length1 == 1:
1838 coords1 = str(self.start1 + 1)
1840 coords1 = str(self.start1 + 1) + "," + str(self.length1)
1841 if self.length2 == 0:
1842 coords2 = str(self.start2) + ",0"
1843 elif self.length2 == 1:
1844 coords2 = str(self.start2 + 1)
1846 coords2 = str(self.start2 + 1) + "," + str(self.length2)
1847 text = ["@@ -", coords1, " +", coords2, " @@\n"]
1848 # Escape the body of the patch with %xx notation.
1849 for (op, data) in self.diffs:
1850 if op == diff_match_patch.DIFF_INSERT:
1852 elif op == diff_match_patch.DIFF_DELETE:
1854 elif op == diff_match_patch.DIFF_EQUAL:
1856 # High ascii will raise UnicodeDecodeError. Use Unicode instead.
1857 data = data.encode("utf-8")
1858 text.append(urllib.quote(data, "!~*'();/?:@&=+$,# ") + "\n")
1859 return "".join(text)