Project

General

Profile

Download (11 KB) Statistics
| Branch: | Tag: | Revision:
1
<?php
2

    
3
/*
4

    
5
class.Diff.php
6

    
7
A class containing a diff implementation
8

    
9
Created by Stephen Morley - http://stephenmorley.org/ - and released under the
10
terms of the CC0 1.0 Universal legal code:
11

    
12
http://creativecommons.org/publicdomain/zero/1.0/legalcode
13

    
14
*/
15

    
16
// A class containing functions for computing diffs and formatting the output.
17
class Diff{
18

    
19
  // define the constants
20
  const UNMODIFIED = 0;
21
  const DELETED    = 1;
22
  const INSERTED   = 2;
23

    
24
  /* Returns the diff for two strings. The return value is an array, each of
25
   * whose values is an array containing two values: a line (or character, if
26
   * $compareCharacters is true), and one of the constants DIFF::UNMODIFIED (the
27
   * line or character is in both strings), DIFF::DELETED (the line or character
28
   * is only in the first string), and DIFF::INSERTED (the line or character is
29
   * only in the second string). The parameters are:
30
   *
31
   * $string1           - the first string
32
   * $string2           - the second string
33
   * $compareCharacters - true to compare characters, and false to compare
34
   *                      lines; this optional parameter defaults to false
35
   */
36
  public static function compare(
37
      $string1, $string2, $compareCharacters = false){
38

    
39
    // initialise the sequences and comparison start and end positions
40
    $start = 0;
41
    if ($compareCharacters){
42
      $sequence1 = $string1;
43
      $sequence2 = $string2;
44
      $end1 = strlen($string1) - 1;
45
      $end2 = strlen($string2) - 1;
46
    }else{
47
      $sequence1 = preg_split('/\R/', $string1);
48
      $sequence2 = preg_split('/\R/', $string2);
49
      $end1 = count($sequence1) - 1;
50
      $end2 = count($sequence2) - 1;
51
    }
52

    
53
    // skip any common prefix
54
    while ($start <= $end1 && $start <= $end2
55
        && $sequence1[$start] == $sequence2[$start]){
56
      $start ++;
57
    }
58

    
59
    // skip any common suffix
60
    while ($end1 >= $start && $end2 >= $start
61
        && $sequence1[$end1] == $sequence2[$end2]){
62
      $end1 --;
63
      $end2 --;
64
    }
65

    
66
    // compute the table of longest common subsequence lengths
67
    $table = self::computeTable($sequence1, $sequence2, $start, $end1, $end2);
68

    
69
    // generate the partial diff
70
    $partialDiff =
71
        self::generatePartialDiff($table, $sequence1, $sequence2, $start);
72

    
73
    // generate the full diff
74
    $diff = array();
75
    for ($index = 0; $index < $start; $index ++){
76
      $diff[] = array($sequence1[$index], self::UNMODIFIED);
77
    }
78
    while (count($partialDiff) > 0) $diff[] = array_pop($partialDiff);
79
    for ($index = $end1 + 1;
80
        $index < ($compareCharacters ? strlen($sequence1) : count($sequence1));
81
        $index ++){
82
      $diff[] = array($sequence1[$index], self::UNMODIFIED);
83
    }
84

    
85
    // return the diff
86
    return $diff;
87

    
88
  }
89

    
90
  /* Returns the diff for two files. The parameters are:
91
   *
92
   * $file1             - the path to the first file
93
   * $file2             - the path to the second file
94
   * $compareCharacters - true to compare characters, and false to compare
95
   *                      lines; this optional parameter defaults to false
96
   */
97
  public static function compareFiles(
98
      $file1, $file2, $compareCharacters = false){
99

    
100
    // return the diff of the files
101
    return self::compare(
102
        file_get_contents($file1),
103
        file_get_contents($file2),
104
        $compareCharacters);
105

    
106
  }
107

    
108
  /* Returns the table of longest common subsequence lengths for the specified
109
   * sequences. The parameters are:
110
   *
111
   * $sequence1 - the first sequence
112
   * $sequence2 - the second sequence
113
   * $start     - the starting index
114
   * $end1      - the ending index for the first sequence
115
   * $end2      - the ending index for the second sequence
116
   */
117
  private static function computeTable(
118
      $sequence1, $sequence2, $start, $end1, $end2){
119

    
120
    // determine the lengths to be compared
121
    $length1 = $end1 - $start + 1;
122
    $length2 = $end2 - $start + 1;
123

    
124
    // initialise the table
125
    $table = array(array_fill(0, $length2 + 1, 0));
126

    
127
    // loop over the rows
128
    for ($index1 = 1; $index1 <= $length1; $index1 ++){
129

    
130
      // create the new row
131
      $table[$index1] = array(0);
132

    
133
      // loop over the columns
134
      for ($index2 = 1; $index2 <= $length2; $index2 ++){
135

    
136
        // store the longest common subsequence length
137
        if ($sequence1[$index1 + $start - 1]
138
            == $sequence2[$index2 + $start - 1]){
139
          $table[$index1][$index2] = $table[$index1 - 1][$index2 - 1] + 1;
140
        }else{
141
          $table[$index1][$index2] =
142
              max($table[$index1 - 1][$index2], $table[$index1][$index2 - 1]);
143
        }
144

    
145
      }
146
    }
147

    
148
    // return the table
149
    return $table;
150

    
151
  }
152

    
153
  /* Returns the partial diff for the specificed sequences, in reverse order.
154
   * The parameters are:
155
   *
156
   * $table     - the table returned by the computeTable function
157
   * $sequence1 - the first sequence
158
   * $sequence2 - the second sequence
159
   * $start     - the starting index
160
   */
161
  private static function generatePartialDiff(
162
      $table, $sequence1, $sequence2, $start){
163

    
164
    //  initialise the diff
165
    $diff = array();
166

    
167
    // initialise the indices
168
    $index1 = count($table) - 1;
169
    $index2 = count($table[0]) - 1;
170

    
171
    // loop until there are no items remaining in either sequence
172
    while ($index1 > 0 || $index2 > 0){
173

    
174
      // check what has happened to the items at these indices
175
      if ($index1 > 0 && $index2 > 0
176
          && $sequence1[$index1 + $start - 1]
177
              == $sequence2[$index2 + $start - 1]){
178

    
179
        // update the diff and the indices
180
        $diff[] = array($sequence1[$index1 + $start - 1], self::UNMODIFIED);
181
        $index1 --;
182
        $index2 --;
183

    
184
      }elseif ($index2 > 0
185
          && $table[$index1][$index2] == $table[$index1][$index2 - 1]){
186

    
187
        // update the diff and the indices
188
        $diff[] = array($sequence2[$index2 + $start - 1], self::INSERTED);
189
        $index2 --;
190

    
191
      }else{
192

    
193
        // update the diff and the indices
194
        $diff[] = array($sequence1[$index1 + $start - 1], self::DELETED);
195
        $index1 --;
196

    
197
      }
198

    
199
    }
200

    
201
    // return the diff
202
    return $diff;
203

    
204
  }
205

    
206
  /* Returns a diff as a string, where unmodified lines are prefixed by '  ',
207
   * deletions are prefixed by '- ', and insertions are prefixed by '+ '. The
208
   * parameters are:
209
   *
210
   * $diff      - the diff array
211
   * $separator - the separator between lines; this optional parameter defaults
212
   *              to "\n"
213
   */
214
  public static function toString($diff, $separator = "\n"){
215

    
216
    // initialise the string
217
    $string = '';
218

    
219
    // loop over the lines in the diff
220
    foreach ($diff as $line){
221

    
222
      // extend the string with the line
223
      switch ($line[1]){
224
        case self::UNMODIFIED : $string .= '  ' . $line[0];break;
225
        case self::DELETED    : $string .= '- ' . $line[0];break;
226
        case self::INSERTED   : $string .= '+ ' . $line[0];break;
227
      }
228

    
229
      // extend the string with the separator
230
      $string .= $separator;
231

    
232
    }
233

    
234
    // return the string
235
    return $string;
236

    
237
  }
238

    
239
  /* Returns a diff as an HTML string, where unmodified lines are contained
240
   * within 'span' elements, deletions are contained within 'del' elements, and
241
   * insertions are contained within 'ins' elements. The parameters are:
242
   *
243
   * $diff      - the diff array
244
   * $separator - the separator between lines; this optional parameter defaults
245
   *              to '<br>'
246
   */
247
  public static function toHTML($diff, $separator = '<br>'){
248

    
249
    // initialise the HTML
250
    $html = '';
251

    
252
    // loop over the lines in the diff
253
    foreach ($diff as $line){
254

    
255
      // extend the HTML with the line
256
      switch ($line[1]){
257
        case self::UNMODIFIED : $element = 'span'; break;
258
        case self::DELETED    : $element = 'del';  break;
259
        case self::INSERTED   : $element = 'ins';  break;
260
      }
261
      $html .=
262
          '<' . $element . '>'
263
          . htmlspecialchars($line[0])
264
          . '</' . $element . '>';
265

    
266
      // extend the HTML with the separator
267
      $html .= $separator;
268

    
269
    }
270

    
271
    // return the HTML
272
    return $html;
273

    
274
  }
275

    
276
  /* Returns a diff as an HTML table. The parameters are:
277
   *
278
   * $diff        - the diff array
279
   * $indentation - indentation to add to every line of the generated HTML; this
280
   *                optional parameter defaults to ''
281
   * $separator   - the separator between lines; this optional parameter
282
   *                defaults to '<br>'
283
   */
284
  public static function toTable($diff, $indentation = '', $separator = '<br>'){
285

    
286
    // initialise the HTML
287
    $html = $indentation . "<table class=\"diff\">\n";
288

    
289
    // loop over the lines in the diff
290
    $index = 0;
291
    while ($index < count($diff)){
292

    
293
      // determine the line type
294
      switch ($diff[$index][1]){
295

    
296
        // display the content on the left and right
297
        case self::UNMODIFIED:
298
          $leftCell =
299
              self::getCellContent(
300
                  $diff, $indentation, $separator, $index, self::UNMODIFIED);
301
          $rightCell = $leftCell;
302
          break;
303

    
304
        // display the deleted on the left and inserted content on the right
305
        case self::DELETED:
306
          $leftCell =
307
              self::getCellContent(
308
                  $diff, $indentation, $separator, $index, self::DELETED);
309
          $rightCell =
310
              self::getCellContent(
311
                  $diff, $indentation, $separator, $index, self::INSERTED);
312
          break;
313

    
314
        // display the inserted content on the right
315
        case self::INSERTED:
316
          $leftCell = '';
317
          $rightCell =
318
              self::getCellContent(
319
                  $diff, $indentation, $separator, $index, self::INSERTED);
320
          break;
321

    
322
      }
323

    
324
      // extend the HTML with the new row
325
      $html .=
326
          $indentation
327
          . "  <tr>\n"
328
          . $indentation
329
          . '    <td class="diff'
330
          . ($leftCell == $rightCell
331
              ? 'Unmodified'
332
              : ($leftCell == '' ? 'Blank' : 'Deleted'))
333
          . '">'
334
          . $leftCell
335
          . "</td>\n"
336
          . $indentation
337
          . '    <td class="diff'
338
          . ($leftCell == $rightCell
339
              ? 'Unmodified'
340
              : ($rightCell == '' ? 'Blank' : 'Inserted'))
341
          . '">'
342
          . $rightCell
343
          . "</td>\n"
344
          . $indentation
345
          . "  </tr>\n";
346

    
347
    }
348

    
349
    // return the HTML
350
    return $html . $indentation . "</table>\n";
351

    
352
  }
353

    
354
  /* Returns the content of the cell, for use in the toTable function. The
355
   * parameters are:
356
   *
357
   * $diff        - the diff array
358
   * $indentation - indentation to add to every line of the generated HTML
359
   * $separator   - the separator between lines
360
   * $index       - the current index, passes by reference
361
   * $type        - the type of line
362
   */
363
  private static function getCellContent(
364
      $diff, $indentation, $separator, &$index, $type){
365

    
366
    // initialise the HTML
367
    $html = '';
368

    
369
    // loop over the matching lines, adding them to the HTML
370
    while ($index < count($diff) && $diff[$index][1] == $type){
371
      $html .=
372
          '<span>'
373
          . htmlspecialchars($diff[$index][0])
374
          . '</span>'
375
          . $separator;
376
      $index ++;
377
    }
378

    
379
    // return the HTML
380
    return $html;
381

    
382
  }
383

    
384
}
385

    
386
?>
    (1-1/1)