diff options
Diffstat (limited to 'bitbake/lib/simplediff/__init__.py')
-rw-r--r-- | bitbake/lib/simplediff/__init__.py | 198 |
1 files changed, 198 insertions, 0 deletions
diff --git a/bitbake/lib/simplediff/__init__.py b/bitbake/lib/simplediff/__init__.py new file mode 100644 index 0000000000..57ee3c5c40 --- /dev/null +++ b/bitbake/lib/simplediff/__init__.py | |||
@@ -0,0 +1,198 @@ | |||
1 | ''' | ||
2 | Simple Diff for Python version 1.0 | ||
3 | |||
4 | Annotate two versions of a list with the values that have been | ||
5 | changed between the versions, similar to unix's `diff` but with | ||
6 | a dead-simple Python interface. | ||
7 | |||
8 | (C) Paul Butler 2008-2012 <http://www.paulbutler.org/> | ||
9 | May be used and distributed under the zlib/libpng license | ||
10 | <http://www.opensource.org/licenses/zlib-license.php> | ||
11 | ''' | ||
12 | |||
13 | __all__ = ['diff', 'string_diff', 'html_diff'] | ||
14 | __version__ = '1.0' | ||
15 | |||
16 | |||
17 | def diff(old, new): | ||
18 | ''' | ||
19 | Find the differences between two lists. Returns a list of pairs, where the | ||
20 | first value is in ['+','-','='] and represents an insertion, deletion, or | ||
21 | no change for that list. The second value of the pair is the list | ||
22 | of elements. | ||
23 | |||
24 | Params: | ||
25 | old the old list of immutable, comparable values (ie. a list | ||
26 | of strings) | ||
27 | new the new list of immutable, comparable values | ||
28 | |||
29 | Returns: | ||
30 | A list of pairs, with the first part of the pair being one of three | ||
31 | strings ('-', '+', '=') and the second part being a list of values from | ||
32 | the original old and/or new lists. The first part of the pair | ||
33 | corresponds to whether the list of values is a deletion, insertion, or | ||
34 | unchanged, respectively. | ||
35 | |||
36 | Examples: | ||
37 | >>> diff([1,2,3,4],[1,3,4]) | ||
38 | [('=', [1]), ('-', [2]), ('=', [3, 4])] | ||
39 | |||
40 | >>> diff([1,2,3,4],[2,3,4,1]) | ||
41 | [('-', [1]), ('=', [2, 3, 4]), ('+', [1])] | ||
42 | |||
43 | >>> diff('The quick brown fox jumps over the lazy dog'.split(), | ||
44 | ... 'The slow blue cheese drips over the lazy carrot'.split()) | ||
45 | ... # doctest: +NORMALIZE_WHITESPACE | ||
46 | [('=', ['The']), | ||
47 | ('-', ['quick', 'brown', 'fox', 'jumps']), | ||
48 | ('+', ['slow', 'blue', 'cheese', 'drips']), | ||
49 | ('=', ['over', 'the', 'lazy']), | ||
50 | ('-', ['dog']), | ||
51 | ('+', ['carrot'])] | ||
52 | |||
53 | ''' | ||
54 | |||
55 | # Create a map from old values to their indices | ||
56 | old_index_map = dict() | ||
57 | for i, val in enumerate(old): | ||
58 | old_index_map.setdefault(val,list()).append(i) | ||
59 | |||
60 | # Find the largest substring common to old and new. | ||
61 | # We use a dynamic programming approach here. | ||
62 | # | ||
63 | # We iterate over each value in the `new` list, calling the | ||
64 | # index `inew`. At each iteration, `overlap[i]` is the | ||
65 | # length of the largest suffix of `old[:i]` equal to a suffix | ||
66 | # of `new[:inew]` (or unset when `old[i]` != `new[inew]`). | ||
67 | # | ||
68 | # At each stage of iteration, the new `overlap` (called | ||
69 | # `_overlap` until the original `overlap` is no longer needed) | ||
70 | # is built from the old one. | ||
71 | # | ||
72 | # If the length of overlap exceeds the largest substring | ||
73 | # seen so far (`sub_length`), we update the largest substring | ||
74 | # to the overlapping strings. | ||
75 | |||
76 | overlap = dict() | ||
77 | # `sub_start_old` is the index of the beginning of the largest overlapping | ||
78 | # substring in the old list. `sub_start_new` is the index of the beginning | ||
79 | # of the same substring in the new list. `sub_length` is the length that | ||
80 | # overlaps in both. | ||
81 | # These track the largest overlapping substring seen so far, so naturally | ||
82 | # we start with a 0-length substring. | ||
83 | sub_start_old = 0 | ||
84 | sub_start_new = 0 | ||
85 | sub_length = 0 | ||
86 | |||
87 | for inew, val in enumerate(new): | ||
88 | _overlap = dict() | ||
89 | for iold in old_index_map.get(val,list()): | ||
90 | # now we are considering all values of iold such that | ||
91 | # `old[iold] == new[inew]`. | ||
92 | _overlap[iold] = (iold and overlap.get(iold - 1, 0)) + 1 | ||
93 | if(_overlap[iold] > sub_length): | ||
94 | # this is the largest substring seen so far, so store its | ||
95 | # indices | ||
96 | sub_length = _overlap[iold] | ||
97 | sub_start_old = iold - sub_length + 1 | ||
98 | sub_start_new = inew - sub_length + 1 | ||
99 | overlap = _overlap | ||
100 | |||
101 | if sub_length == 0: | ||
102 | # If no common substring is found, we return an insert and delete... | ||
103 | return (old and [('-', old)] or []) + (new and [('+', new)] or []) | ||
104 | else: | ||
105 | # ...otherwise, the common substring is unchanged and we recursively | ||
106 | # diff the text before and after that substring | ||
107 | return diff(old[ : sub_start_old], new[ : sub_start_new]) + \ | ||
108 | [('=', new[sub_start_new : sub_start_new + sub_length])] + \ | ||
109 | diff(old[sub_start_old + sub_length : ], | ||
110 | new[sub_start_new + sub_length : ]) | ||
111 | |||
112 | |||
113 | def string_diff(old, new): | ||
114 | ''' | ||
115 | Returns the difference between the old and new strings when split on | ||
116 | whitespace. Considers punctuation a part of the word | ||
117 | |||
118 | This function is intended as an example; you'll probably want | ||
119 | a more sophisticated wrapper in practice. | ||
120 | |||
121 | Params: | ||
122 | old the old string | ||
123 | new the new string | ||
124 | |||
125 | Returns: | ||
126 | the output of `diff` on the two strings after splitting them | ||
127 | on whitespace (a list of change instructions; see the docstring | ||
128 | of `diff`) | ||
129 | |||
130 | Examples: | ||
131 | >>> string_diff('The quick brown fox', 'The fast blue fox') | ||
132 | ... # doctest: +NORMALIZE_WHITESPACE | ||
133 | [('=', ['The']), | ||
134 | ('-', ['quick', 'brown']), | ||
135 | ('+', ['fast', 'blue']), | ||
136 | ('=', ['fox'])] | ||
137 | |||
138 | ''' | ||
139 | return diff(old.split(), new.split()) | ||
140 | |||
141 | |||
142 | def html_diff(old, new): | ||
143 | ''' | ||
144 | Returns the difference between two strings (as in stringDiff) in | ||
145 | HTML format. HTML code in the strings is NOT escaped, so you | ||
146 | will get weird results if the strings contain HTML. | ||
147 | |||
148 | This function is intended as an example; you'll probably want | ||
149 | a more sophisticated wrapper in practice. | ||
150 | |||
151 | Params: | ||
152 | old the old string | ||
153 | new the new string | ||
154 | |||
155 | Returns: | ||
156 | the output of the diff expressed with HTML <ins> and <del> | ||
157 | tags. | ||
158 | |||
159 | Examples: | ||
160 | >>> html_diff('The quick brown fox', 'The fast blue fox') | ||
161 | 'The <del>quick brown</del> <ins>fast blue</ins> fox' | ||
162 | ''' | ||
163 | con = {'=': (lambda x: x), | ||
164 | '+': (lambda x: "<ins>" + x + "</ins>"), | ||
165 | '-': (lambda x: "<del>" + x + "</del>")} | ||
166 | return " ".join([(con[a])(" ".join(b)) for a, b in string_diff(old, new)]) | ||
167 | |||
168 | |||
169 | def check_diff(old, new): | ||
170 | ''' | ||
171 | This tests that diffs returned by `diff` are valid. You probably won't | ||
172 | want to use this function, but it's provided for documentation and | ||
173 | testing. | ||
174 | |||
175 | A diff should satisfy the property that the old input is equal to the | ||
176 | elements of the result annotated with '-' or '=' concatenated together. | ||
177 | Likewise, the new input is equal to the elements of the result annotated | ||
178 | with '+' or '=' concatenated together. This function compares `old`, | ||
179 | `new`, and the results of `diff(old, new)` to ensure this is true. | ||
180 | |||
181 | Tests: | ||
182 | >>> check_diff('ABCBA', 'CBABA') | ||
183 | >>> check_diff('Foobarbaz', 'Foobarbaz') | ||
184 | >>> check_diff('Foobarbaz', 'Boobazbam') | ||
185 | >>> check_diff('The quick brown fox', 'Some quick brown car') | ||
186 | >>> check_diff('A thick red book', 'A quick blue book') | ||
187 | >>> check_diff('dafhjkdashfkhasfjsdafdasfsda', 'asdfaskjfhksahkfjsdha') | ||
188 | >>> check_diff('88288822828828288282828', '88288882882828282882828') | ||
189 | >>> check_diff('1234567890', '24689') | ||
190 | ''' | ||
191 | old = list(old) | ||
192 | new = list(new) | ||
193 | result = diff(old, new) | ||
194 | _old = [val for (a, vals) in result if (a in '=-') for val in vals] | ||
195 | assert old == _old, 'Expected %s, got %s' % (old, _old) | ||
196 | _new = [val for (a, vals) in result if (a in '=+') for val in vals] | ||
197 | assert new == _new, 'Expected %s, got %s' % (new, _new) | ||
198 | |||