1
|
<?php
|
2
|
|
3
|
/*
|
4
|
|
5
|
class.Diff.php
|
6
|
|
7
|
A class containing a diff implementation
|
8
|
|
9
|
Created by Stephen Morley - http://stephenmorley.org/ - and released under the
|
10
|
terms of the CC0 1.0 Universal legal code:
|
11
|
|
12
|
http://creativecommons.org/publicdomain/zero/1.0/legalcode
|
13
|
|
14
|
*/
|
15
|
|
16
|
// A class containing functions for computing diffs and formatting the output.
|
17
|
class Diff{
|
18
|
|
19
|
// define the constants
|
20
|
const UNMODIFIED = 0;
|
21
|
const DELETED = 1;
|
22
|
const INSERTED = 2;
|
23
|
|
24
|
/* Returns the diff for two strings. The return value is an array, each of
|
25
|
* whose values is an array containing two values: a line (or character, if
|
26
|
* $compareCharacters is true), and one of the constants DIFF::UNMODIFIED (the
|
27
|
* line or character is in both strings), DIFF::DELETED (the line or character
|
28
|
* is only in the first string), and DIFF::INSERTED (the line or character is
|
29
|
* only in the second string). The parameters are:
|
30
|
*
|
31
|
* $string1 - the first string
|
32
|
* $string2 - the second string
|
33
|
* $compareCharacters - true to compare characters, and false to compare
|
34
|
* lines; this optional parameter defaults to false
|
35
|
*/
|
36
|
public static function compare(
|
37
|
$string1, $string2, $compareCharacters = false){
|
38
|
|
39
|
// initialise the sequences and comparison start and end positions
|
40
|
$start = 0;
|
41
|
if ($compareCharacters){
|
42
|
$sequence1 = $string1;
|
43
|
$sequence2 = $string2;
|
44
|
$end1 = strlen($string1) - 1;
|
45
|
$end2 = strlen($string2) - 1;
|
46
|
}else{
|
47
|
$sequence1 = preg_split('/\R/', $string1);
|
48
|
$sequence2 = preg_split('/\R/', $string2);
|
49
|
$end1 = count($sequence1) - 1;
|
50
|
$end2 = count($sequence2) - 1;
|
51
|
}
|
52
|
|
53
|
// skip any common prefix
|
54
|
while ($start <= $end1 && $start <= $end2
|
55
|
&& $sequence1[$start] == $sequence2[$start]){
|
56
|
$start ++;
|
57
|
}
|
58
|
|
59
|
// skip any common suffix
|
60
|
while ($end1 >= $start && $end2 >= $start
|
61
|
&& $sequence1[$end1] == $sequence2[$end2]){
|
62
|
$end1 --;
|
63
|
$end2 --;
|
64
|
}
|
65
|
|
66
|
// compute the table of longest common subsequence lengths
|
67
|
$table = self::computeTable($sequence1, $sequence2, $start, $end1, $end2);
|
68
|
|
69
|
// generate the partial diff
|
70
|
$partialDiff =
|
71
|
self::generatePartialDiff($table, $sequence1, $sequence2, $start);
|
72
|
|
73
|
// generate the full diff
|
74
|
$diff = array();
|
75
|
for ($index = 0; $index < $start; $index ++){
|
76
|
$diff[] = array($sequence1[$index], self::UNMODIFIED);
|
77
|
}
|
78
|
while (count($partialDiff) > 0) $diff[] = array_pop($partialDiff);
|
79
|
for ($index = $end1 + 1;
|
80
|
$index < ($compareCharacters ? strlen($sequence1) : count($sequence1));
|
81
|
$index ++){
|
82
|
$diff[] = array($sequence1[$index], self::UNMODIFIED);
|
83
|
}
|
84
|
|
85
|
// return the diff
|
86
|
return $diff;
|
87
|
|
88
|
}
|
89
|
|
90
|
/* Returns the diff for two files. The parameters are:
|
91
|
*
|
92
|
* $file1 - the path to the first file
|
93
|
* $file2 - the path to the second file
|
94
|
* $compareCharacters - true to compare characters, and false to compare
|
95
|
* lines; this optional parameter defaults to false
|
96
|
*/
|
97
|
public static function compareFiles(
|
98
|
$file1, $file2, $compareCharacters = false){
|
99
|
|
100
|
// return the diff of the files
|
101
|
return self::compare(
|
102
|
file_get_contents($file1),
|
103
|
file_get_contents($file2),
|
104
|
$compareCharacters);
|
105
|
|
106
|
}
|
107
|
|
108
|
/* Returns the table of longest common subsequence lengths for the specified
|
109
|
* sequences. The parameters are:
|
110
|
*
|
111
|
* $sequence1 - the first sequence
|
112
|
* $sequence2 - the second sequence
|
113
|
* $start - the starting index
|
114
|
* $end1 - the ending index for the first sequence
|
115
|
* $end2 - the ending index for the second sequence
|
116
|
*/
|
117
|
private static function computeTable(
|
118
|
$sequence1, $sequence2, $start, $end1, $end2){
|
119
|
|
120
|
// determine the lengths to be compared
|
121
|
$length1 = $end1 - $start + 1;
|
122
|
$length2 = $end2 - $start + 1;
|
123
|
|
124
|
// initialise the table
|
125
|
$table = array(array_fill(0, $length2 + 1, 0));
|
126
|
|
127
|
// loop over the rows
|
128
|
for ($index1 = 1; $index1 <= $length1; $index1 ++){
|
129
|
|
130
|
// create the new row
|
131
|
$table[$index1] = array(0);
|
132
|
|
133
|
// loop over the columns
|
134
|
for ($index2 = 1; $index2 <= $length2; $index2 ++){
|
135
|
|
136
|
// store the longest common subsequence length
|
137
|
if ($sequence1[$index1 + $start - 1]
|
138
|
== $sequence2[$index2 + $start - 1]){
|
139
|
$table[$index1][$index2] = $table[$index1 - 1][$index2 - 1] + 1;
|
140
|
}else{
|
141
|
$table[$index1][$index2] =
|
142
|
max($table[$index1 - 1][$index2], $table[$index1][$index2 - 1]);
|
143
|
}
|
144
|
|
145
|
}
|
146
|
}
|
147
|
|
148
|
// return the table
|
149
|
return $table;
|
150
|
|
151
|
}
|
152
|
|
153
|
/* Returns the partial diff for the specificed sequences, in reverse order.
|
154
|
* The parameters are:
|
155
|
*
|
156
|
* $table - the table returned by the computeTable function
|
157
|
* $sequence1 - the first sequence
|
158
|
* $sequence2 - the second sequence
|
159
|
* $start - the starting index
|
160
|
*/
|
161
|
private static function generatePartialDiff(
|
162
|
$table, $sequence1, $sequence2, $start){
|
163
|
|
164
|
// initialise the diff
|
165
|
$diff = array();
|
166
|
|
167
|
// initialise the indices
|
168
|
$index1 = count($table) - 1;
|
169
|
$index2 = count($table[0]) - 1;
|
170
|
|
171
|
// loop until there are no items remaining in either sequence
|
172
|
while ($index1 > 0 || $index2 > 0){
|
173
|
|
174
|
// check what has happened to the items at these indices
|
175
|
if ($index1 > 0 && $index2 > 0
|
176
|
&& $sequence1[$index1 + $start - 1]
|
177
|
== $sequence2[$index2 + $start - 1]){
|
178
|
|
179
|
// update the diff and the indices
|
180
|
$diff[] = array($sequence1[$index1 + $start - 1], self::UNMODIFIED);
|
181
|
$index1 --;
|
182
|
$index2 --;
|
183
|
|
184
|
}elseif ($index2 > 0
|
185
|
&& $table[$index1][$index2] == $table[$index1][$index2 - 1]){
|
186
|
|
187
|
// update the diff and the indices
|
188
|
$diff[] = array($sequence2[$index2 + $start - 1], self::INSERTED);
|
189
|
$index2 --;
|
190
|
|
191
|
}else{
|
192
|
|
193
|
// update the diff and the indices
|
194
|
$diff[] = array($sequence1[$index1 + $start - 1], self::DELETED);
|
195
|
$index1 --;
|
196
|
|
197
|
}
|
198
|
|
199
|
}
|
200
|
|
201
|
// return the diff
|
202
|
return $diff;
|
203
|
|
204
|
}
|
205
|
|
206
|
/* Returns a diff as a string, where unmodified lines are prefixed by ' ',
|
207
|
* deletions are prefixed by '- ', and insertions are prefixed by '+ '. The
|
208
|
* parameters are:
|
209
|
*
|
210
|
* $diff - the diff array
|
211
|
* $separator - the separator between lines; this optional parameter defaults
|
212
|
* to "\n"
|
213
|
*/
|
214
|
public static function toString($diff, $separator = "\n"){
|
215
|
|
216
|
// initialise the string
|
217
|
$string = '';
|
218
|
|
219
|
// loop over the lines in the diff
|
220
|
foreach ($diff as $line){
|
221
|
|
222
|
// extend the string with the line
|
223
|
switch ($line[1]){
|
224
|
case self::UNMODIFIED : $string .= ' ' . $line[0];break;
|
225
|
case self::DELETED : $string .= '- ' . $line[0];break;
|
226
|
case self::INSERTED : $string .= '+ ' . $line[0];break;
|
227
|
}
|
228
|
|
229
|
// extend the string with the separator
|
230
|
$string .= $separator;
|
231
|
|
232
|
}
|
233
|
|
234
|
// return the string
|
235
|
return $string;
|
236
|
|
237
|
}
|
238
|
|
239
|
/* Returns a diff as an HTML string, where unmodified lines are contained
|
240
|
* within 'span' elements, deletions are contained within 'del' elements, and
|
241
|
* insertions are contained within 'ins' elements. The parameters are:
|
242
|
*
|
243
|
* $diff - the diff array
|
244
|
* $separator - the separator between lines; this optional parameter defaults
|
245
|
* to '<br>'
|
246
|
*/
|
247
|
public static function toHTML($diff, $separator = '<br>'){
|
248
|
|
249
|
// initialise the HTML
|
250
|
$html = '';
|
251
|
|
252
|
// loop over the lines in the diff
|
253
|
foreach ($diff as $line){
|
254
|
|
255
|
// extend the HTML with the line
|
256
|
switch ($line[1]){
|
257
|
case self::UNMODIFIED : $element = 'span'; break;
|
258
|
case self::DELETED : $element = 'del'; break;
|
259
|
case self::INSERTED : $element = 'ins'; break;
|
260
|
}
|
261
|
$html .=
|
262
|
'<' . $element . '>'
|
263
|
. htmlspecialchars($line[0])
|
264
|
. '</' . $element . '>';
|
265
|
|
266
|
// extend the HTML with the separator
|
267
|
$html .= $separator;
|
268
|
|
269
|
}
|
270
|
|
271
|
// return the HTML
|
272
|
return $html;
|
273
|
|
274
|
}
|
275
|
|
276
|
/* Returns a diff as an HTML table. The parameters are:
|
277
|
*
|
278
|
* $diff - the diff array
|
279
|
* $indentation - indentation to add to every line of the generated HTML; this
|
280
|
* optional parameter defaults to ''
|
281
|
* $separator - the separator between lines; this optional parameter
|
282
|
* defaults to '<br>'
|
283
|
*/
|
284
|
public static function toTable($diff, $indentation = '', $separator = '<br>'){
|
285
|
|
286
|
// initialise the HTML
|
287
|
$html = $indentation . "<table class=\"diff\">\n";
|
288
|
|
289
|
// loop over the lines in the diff
|
290
|
$index = 0;
|
291
|
while ($index < count($diff)){
|
292
|
|
293
|
// determine the line type
|
294
|
switch ($diff[$index][1]){
|
295
|
|
296
|
// display the content on the left and right
|
297
|
case self::UNMODIFIED:
|
298
|
$leftCell =
|
299
|
self::getCellContent(
|
300
|
$diff, $indentation, $separator, $index, self::UNMODIFIED);
|
301
|
$rightCell = $leftCell;
|
302
|
break;
|
303
|
|
304
|
// display the deleted on the left and inserted content on the right
|
305
|
case self::DELETED:
|
306
|
$leftCell =
|
307
|
self::getCellContent(
|
308
|
$diff, $indentation, $separator, $index, self::DELETED);
|
309
|
$rightCell =
|
310
|
self::getCellContent(
|
311
|
$diff, $indentation, $separator, $index, self::INSERTED);
|
312
|
break;
|
313
|
|
314
|
// display the inserted content on the right
|
315
|
case self::INSERTED:
|
316
|
$leftCell = '';
|
317
|
$rightCell =
|
318
|
self::getCellContent(
|
319
|
$diff, $indentation, $separator, $index, self::INSERTED);
|
320
|
break;
|
321
|
|
322
|
}
|
323
|
|
324
|
// extend the HTML with the new row
|
325
|
$html .=
|
326
|
$indentation
|
327
|
. " <tr>\n"
|
328
|
. $indentation
|
329
|
. ' <td class="diff'
|
330
|
. ($leftCell == $rightCell
|
331
|
? 'Unmodified'
|
332
|
: ($leftCell == '' ? 'Blank' : 'Deleted'))
|
333
|
. '">'
|
334
|
. $leftCell
|
335
|
. "</td>\n"
|
336
|
. $indentation
|
337
|
. ' <td class="diff'
|
338
|
. ($leftCell == $rightCell
|
339
|
? 'Unmodified'
|
340
|
: ($rightCell == '' ? 'Blank' : 'Inserted'))
|
341
|
. '">'
|
342
|
. $rightCell
|
343
|
. "</td>\n"
|
344
|
. $indentation
|
345
|
. " </tr>\n";
|
346
|
|
347
|
}
|
348
|
|
349
|
// return the HTML
|
350
|
return $html . $indentation . "</table>\n";
|
351
|
|
352
|
}
|
353
|
|
354
|
/* Returns the content of the cell, for use in the toTable function. The
|
355
|
* parameters are:
|
356
|
*
|
357
|
* $diff - the diff array
|
358
|
* $indentation - indentation to add to every line of the generated HTML
|
359
|
* $separator - the separator between lines
|
360
|
* $index - the current index, passes by reference
|
361
|
* $type - the type of line
|
362
|
*/
|
363
|
private static function getCellContent(
|
364
|
$diff, $indentation, $separator, &$index, $type){
|
365
|
|
366
|
// initialise the HTML
|
367
|
$html = '';
|
368
|
|
369
|
// loop over the matching lines, adding them to the HTML
|
370
|
while ($index < count($diff) && $diff[$index][1] == $type){
|
371
|
$html .=
|
372
|
'<span>'
|
373
|
. htmlspecialchars($diff[$index][0])
|
374
|
. '</span>'
|
375
|
. $separator;
|
376
|
$index ++;
|
377
|
}
|
378
|
|
379
|
// return the HTML
|
380
|
return $html;
|
381
|
|
382
|
}
|
383
|
|
384
|
}
|
385
|
|
386
|
?>
|