util/rundiff

   1 #! /usr/bin/env perl
   2
   3 # Copyright (c) 2003 The Regents of The University of Michigan
   4 # All rights reserved.
   5 #
   6 # Redistribution and use in source and binary forms, with or without
   7 # modification, are permitted provided that the following conditions are
   8 # met: redistributions of source code must retain the above copyright
   9 # notice, this list of conditions and the following disclaimer;
  10 # redistributions in binary form must reproduce the above copyright
  11 # notice, this list of conditions and the following disclaimer in the
  12 # documentation and/or other materials provided with the distribution;
  13 # neither the name of the copyright holders nor the names of its
  14 # contributors may be used to endorse or promote products derived from
  15 # this software without specific prior written permission.
  16 #
  17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28
  29 # Diff two streams.
  30 #
  31 # Unlike regular diff, this script does not read in the entire input
  32 # before doing a diff, so it can be used on lengthy outputs piped from
  33 # other programs (e.g., M5 traces).  The best way to do this is to
  34 # take advantage of the power of Perl's open function, which will
  35 # automatically fork a subprocess if the last character in the
  36 # "filename" is a pipe (|).  Thus to compare the instruction traces
  37 # from two versions of m5 (m5a and m5b), you can do this:
  38 #
  39 # rundiff 'm5a --trace:flags=InstExec |' 'm5b --trace:flags=InstExec |'
  40 #
  41
  42 use strict;
  43
  44 use Getopt::Std;
  45
  46 #
  47 # Options:
  48 #  -c <n> : print n lines of context before & after changes
  49 #  -l <n> : use n lines of lookahead
  50 #  -x     : use "complex" diff from Algorithm::Diff (see below)
  51 #
  52 our ($opt_c, $opt_l, $opt_x);
  53 getopts('c:l:x');
  54
  55 #
  56 # For the highest-quality (minimal) diffs, we can use the
  57 # Algorithm::Diff package.  By default, a built-in, simple, and
  58 # generally quite adequate algorithm will be used.  If you have
  59 # Algorithm::Diff installed on your system, and don't mind having the
  60 # script go slower (like 3-4x slower, based on informal observation),
  61 # then specify '-x' on the command line to use it.
  62 my $use_complexdiff = defined($opt_x);
  63
  64 if ($use_complexdiff) {
  65     # Don't use 'use', as that's a compile-time option and will fail
  66     # on systems that don't have Algorithm::Diff installed even if
  67     # $use_complexdiff is false.  'require' is evaluated at runtime,
  68     # so it's OK.
  69     require Algorithm::Diff;
  70     import Algorithm::Diff qw(traverse_sequences);
  71 };
  72
  73 my $lookahead_lines = $opt_l || 200;
  74
  75 # in theory you could have different amounts of context before and
  76 # after a diff, but until someone needs that there's only one arg to
  77 # set both.
  78 my $precontext_lines = $opt_c || 3;
  79 my $postcontext_lines = $precontext_lines;
  80
  81 my $file1 = $ARGV[0];
  82 my $file2 = $ARGV[1];
  83
  84 die "Need two args." if (!(defined($file1) && defined($file2)));
  85
  86 my ($fh1, $fh2);
  87 open($fh1, $file1) or die "Can't open $file1";
  88 open($fh2, $file2) or die "Can't open $file2";
  89
  90 # print files to output so we know which is which
  91 print "-$file1\n";
  92 print "+$file2\n";
  93
  94 # buffer of matching lines for pre-diff context
  95 my @precontext = ();
  96 # number of post-diff matching lines remaining to print
  97 my $postcontext = 0;
  98
  99 # lookahead buffers for $file1 and $file2 respectively
 100 my @lines1 = ();
 101 my @lines2 = ();
 102
 103 # Next line number available to print from each file.  Generally this
 104 # corresponds to the oldest line in @precontext, or the oldest line in
 105 # @lines1 and @lines2 if @precontext is empty.
 106 my $lineno1 = 1;
 107 my $lineno2 = 1;
 108
 109 # Fill a lookahead buffer to $lookahead_lines lines (or until EOF).
 110 sub fill
 111 {
 112     my ($fh, $array) = @_;
 113
 114     while (@$array < $lookahead_lines) {
 115         my $line = <$fh>;
 116         last if (!defined($line));
 117         push @$array, $line;
 118     }
 119 }
 120
 121 # Print and delete n lines from front of given array with given prefix.
 122 sub printlines
 123 {
 124     my ($array, $n, $prefix) = @_;
 125
 126     while ($n--) {
 127         my $line = shift @$array;
 128         last if (!defined($line));
 129         print $prefix, $line;
 130     }
 131 }
 132
 133 # Print a difference region where n1 lines of file1 were replaced by
 134 # n2 lines of file2 (where either n1 or n2 could be zero).
 135 sub printdiff
 136 {
 137     my ($n1, $n2)= @_;
 138
 139     # If the precontext buffer is full or we're at the beginning of a
 140     # file, then this is a new diff region, so we should print a
 141     # header indicating the current line numbers.  If we're past the
 142     # beginning and the precontext buffer isn't full, then whatever
 143     # we're about to print is contiguous with the end of the last
 144     # region we printed, so we just concatenate them on the output.
 145     if (@precontext == $precontext_lines || ($lineno1 == 0 && $lineno2 == 0)) {
 146         print "@@ -$lineno1 +$lineno2 @@\n";
 147     }
 148
 149     # Print and clear the precontext buffer.
 150     if (@precontext) {
 151         print ' ', join(' ', @precontext);
 152         $lineno1 += scalar(@precontext);
 153         $lineno2 += scalar(@precontext);
 154         @precontext = ();
 155     }
 156
 157     # Print the differing lines.
 158     printlines(\@lines1, $n1, '-');
 159     printlines(\@lines2, $n2, '+');
 160     $lineno1 += $n1;
 161     $lineno2 += $n2;
 162
 163     # Set $postcontext to print the next $postcontext_lines matching lines.
 164     $postcontext = $postcontext_lines;
 165 }
 166
 167
 168 ########################
 169 #
 170 # Complex diff algorithm
 171 #
 172 ########################
 173
 174 {
 175     my $match_found;
 176     my $discard_lines1;
 177     my $discard_lines2;
 178
 179     sub match { $match_found = 1; }
 180     sub discard1 { $discard_lines1++ unless $match_found; }
 181     sub discard2 { $discard_lines2++ unless $match_found; }
 182
 183     sub complex_diff
 184     {
 185         $match_found = 0;
 186         $discard_lines1 = 0;
 187         $discard_lines2 = 0;
 188
 189         # See Diff.pm.  Note that even though this call generates a
 190         # complete diff of both lookahead buffers, all we use it for
 191         # is to figure out how many lines to discard off the front of
 192         # each buffer to resync the streams.
 193         traverse_sequences( \@lines1, \@lines2,
 194                             { MATCH => \&match,
 195                               DISCARD_A => \&discard1,
 196                               DISCARD_B => \&discard2 });
 197
 198         if (!$match_found) {
 199             printdiff(scalar(@lines1), scalar(@lines2));
 200             die "Lost sync!";
 201         }
 202
 203         # Since we shouldn't get here unless the first lines of the
 204         # buffers are different, then we must discard some lines off
 205         # at least one of the buffers.
 206         die if ($discard_lines1 == 0 && $discard_lines2 == 0);
 207
 208         printdiff($discard_lines1, $discard_lines2);
 209     }
 210 }
 211
 212 #######################
 213 #
 214 # Simple diff algorithm
 215 #
 216 #######################
 217
 218 # Check for a pair of matching lines; if found, generate appropriate
 219 # diff output.
 220 sub checkmatch
 221 {
 222     my ($n1, $n2) = @_;
 223
 224     # Check if two adjacent lines match, to reduce false resyncs
 225     # (particularly on unrelated blank lines).  This generates
 226     # larger-than-necessary diffs when a single line really should be
 227     # treated as common; if that bugs you, use Algorithm::Diff.
 228     if ($lines1[$n1] eq $lines2[$n2] && $lines1[$n1+1] eq $lines2[$n2+1]) {
 229         printdiff($n1, $n2);
 230         return 1;
 231     }
 232
 233     return 0;
 234 }
 235
 236 sub simple_diff
 237 {
 238     # Look for differences of $cnt lines to resync,
 239     # increasing $cnt from 1 to $lookahead_lines until we find
 240     # something.
 241     for (my $cnt = 1; $cnt < $lookahead_lines-1; ++$cnt) {
 242         # Check for n lines in one file being replaced by
 243         # n lines in the other.
 244         return if checkmatch($cnt, $cnt);
 245         # Find differences where n lines in one file were
 246         # replaced by m lines in the other.  We let m = $cnt
 247         # and iterate for n = 0 to $cnt-1.
 248         for (my $n = 0; $n < $cnt; ++$n) {
 249             return if checkmatch($n, $cnt);
 250             return if checkmatch($cnt, $n);
 251         }
 252     }
 253
 254     printdiff(scalar(@lines1), scalar(@lines2));
 255     die "Lost sync!";
 256 }
 257
 258 # Set the pointer to the appropriate diff function.
 259 #
 260 # Note that in either case the function determines how many lines to
 261 # discard from the front of each lookahead buffer to resync the
 262 # streams, then prints the appropriate diff output and discards them.
 263 # After the function returns, it should always be the case that
 264 # $lines1[0] eq $lines2[0].
 265 my $find_diff = $use_complexdiff ? \&complex_diff : \&simple_diff;
 266
 267 # The main loop.
 268 while (1) {
 269     # keep lookahead buffers topped up
 270     fill($fh1, \@lines1);
 271     fill($fh2, \@lines2);
 272
 273     # peek at first line in each buffer
 274     my $l1 = $lines1[0];
 275     my $l2 = $lines2[0];
 276
 277     if (!defined($l1) && !defined($l2)) {
 278         # reached EOF on both streams: exit
 279         exit(1);
 280     }
 281
 282     if ($l1 eq $l2) {
 283         # matching lines: delete from lookahead buffer
 284         shift @lines1;
 285         shift @lines2;
 286         # figure out what to do with this line
 287         if ($postcontext > 0) {
 288             # we're in the post-context of a diff: print it
 289             $postcontext--;
 290             print ' ', $l1;
 291             $lineno1++;
 292             $lineno2++;
 293         }
 294         else {
 295             # we're in the middle of a matching region... save this
 296             # line for precontext in case we run into a difference.
 297             push @precontext, $l1;
 298             # don't let precontext buffer get bigger than needed
 299             while (@precontext > $precontext_lines) {
 300                 shift @precontext;
 301                 $lineno1++;
 302                 $lineno2++;
 303             }
 304         }
 305     }
 306     else {
 307         # Mismatch.  Deal with it.
 308         &$find_diff();
 309     }
 310 }