util/rundiff

   1 #!/usr/bin/perl
   2
   3 # Copyright (c) 2001 Nathan L. Binkert
   4 # All rights reserved.
   5 #
   6 # Permission to redistribute, use, copy, and modify this software
   7 # without fee is hereby granted, provided that the following
   8 # conditions are met:
   9 #
  10 # 1. This entire notice is included in all source code copies of any
  11 #    software which is or includes a copy or modification of this
  12 #    software.
  13 # 2. The name of the author may not be used to endorse or promote
  14 #    products derived from this software without specific prior
  15 #    written permission.
  16 #
  17 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
  18 # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  19 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  21 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
  23 # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  25 # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  26 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  27 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28 #
  29
  30 use Algorithm::Diff qw(diff);
  31 use vars qw ($opt_C $opt_c $opt_u $opt_U);
  32
  33 $opt_u = "";
  34 $opt_c = undef;
  35
  36 $diffsize = 2000;
  37 # After we've read up to a certain point in each file, the number of items
  38 # we've read from each file will differ by $FLD (could be 0)
  39 my $File_Length_Difference = 0;
  40 my $Context_Lines = 9;
  41
  42 $progname = $0;
  43 if (scalar(@ARGV) != 2) {
  44   usage();
  45 }
  46
  47 my ($filename1, $filename2);
  48 ($filename1, $start1) = parse_filearg($ARGV[0]);
  49 ($filename2, $start2) = parse_filearg($ARGV[1]);
  50
  51 if ($filename1 eq "-" && $filename2 eq "-") {
  52   die "Only one of the inputs may be standard in\n";
  53 }
  54
  55 my ($file1, $file2);
  56 if ($filename1 eq "-") {
  57   $file1 = STDIN;
  58 } else {
  59   open(FILE1, $filename1) || die "can't open $file1: $!\n";
  60   $file1 = FILE1;
  61 }
  62
  63 if ($filename2 eq "-") {
  64   $file2 = STDIN;
  65 } else {
  66   open(FILE2, $filename2) || die "can't open $file2: $!\n";
  67   $file2 = FILE2;
  68 }
  69
  70 my $file_offset1 = ffw($file1, $start1);
  71 my $file_offset2 = ffw($file2, $start2);
  72
  73 $skip_first = 0;
  74 my (@buf1, @buf2, @printbuf1, @printbuf2);
  75
  76 $Compare_Ahead = 0;
  77
  78 while (!eof($file1) && !eof($file2)) {
  79   my $line1 = <$file1>; chomp $line1;
  80   my $line2 = <$file2>; chomp $line2;
  81   my $printline1 = $line1;
  82   my $printline2 = $line2;
  83
  84   push @buf1, $line1;
  85   push @buf2, $line2;
  86   push @printbuf1, $printline1;
  87   push @printbuf2, $printline2;
  88
  89 #  while ($Compare_Ahead < $Context_Lines) {
  90 #    $line1 = @buf1[$Compare_Ahead];
  91 #    $line2 = @buf2[$Compare_Ahead];
  92 #    $line2 =~ s/ *--.*$//;
  93 #    if ($line1 ne $line2) { last; }
  94 #    ++$Compare_Ahead;
  95 #  }
  96
  97   $line1 = @buf1[$Compare_Ahead];
  98   $line2 = @buf2[$Compare_Ahead];
  99   $line2 =~ s/ *--.*$//;
 100
 101   if ($line1 ne $line2) {
 102     while (!eof($file1) && scalar(@buf1) < $diffsize) {
 103       $line = <$file1>; chomp $line;
 104       my $printline = $line;
 105
 106       push @printbuf1, $printline;
 107       push @buf1, $line;
 108     }
 109
 110     while (!eof($file2) && scalar(@buf2) < $diffsize) {
 111       $line = <$file2>; chomp $line;
 112       my $printline = $line;
 113 #      $line =~ s/ *--.*$//;
 114
 115       push @printbuf2, $printline;
 116       push @buf2, $line;
 117     }
 118
 119     my $diffs = diff(\@buf1, \@buf2);
 120
 121     next unless @$diffs;
 122
 123     my @hunklist;
 124     my ($hunk,$oldhunk);
 125     # Loop over hunks. If a hunk overlaps with the last hunk, join them.
 126     # Otherwise, print out the old one.
 127     foreach my $piece (@$diffs) {
 128       $hunk = new Hunk ($piece, $Context_Lines, scalar(@buf1));
 129       next unless $oldhunk;
 130
 131       if ($hunk->does_overlap($oldhunk)) {
 132         $hunk->prepend_hunk($oldhunk);
 133       } else {
 134         push @hunklist, $oldhunk;
 135       }
 136     } continue {
 137       $oldhunk = $hunk;
 138     }
 139
 140     my $change = 0;
 141     while (scalar(@hunklist) && !$change) {
 142       $hunk = pop @hunklist;
 143       $change = $hunk->{"change"};
 144     }
 145     push @hunklist, $hunk;
 146     $last_start1 = $hunk->{"start1"};
 147     $last_start2 = $hunk->{"start2"};
 148     $last_end1 = $hunk->{"end1"};
 149     $last_end2 = $hunk->{"end2"};
 150
 151     while (scalar(@hunklist)) {
 152       $hunk = shift @hunklist;
 153 #      $hunk->output_diff(\@buf1, \@buf2);
 154       $hunk->output_diff(\@printbuf1, \@printbuf2);
 155     }
 156
 157     $last_end1 -=  $Context_Lines - 1;
 158     $last_end2 -=  $Context_Lines - 1;
 159     $file_offset1 += $last_end1;
 160     $file_offset2 += $last_end2;
 161     @printbuf1 = @printbuf1[$last_end1..$#printbuf1];
 162     @printbuf2 = @printbuf2[$last_end2..$#printbuf2];
 163     @buf1 = @buf1[$last_end1..$#buf1];
 164     @buf2 = @buf2[$last_end2..$#buf2];
 165     while (scalar(@buf1) > $Context_Lines &&
 166            scalar(@buf2) > $Context_Lines) {
 167       $foo1 = @buf1[$Context_Lines];
 168       $foo2 = @buf2[$Context_Lines];
 169       if (scalar($foo1) != scalar($foo2) || $foo1 ne $foo2) { last; }
 170       $foo1 = shift @printbuf1;
 171       $foo2 = shift @printbuf2;
 172       $foo1 = shift @buf1;
 173       $foo2 = shift @buf2;
 174       ++$file_offset1;
 175       ++$file_offset2;
 176     }
 177   } else {
 178     ++$file_offset1;
 179     ++$file_offset2;
 180     $foo1 = shift @printbuf1;
 181     $foo2 = shift @printbuf2;
 182     $foo1 = shift @buf1;
 183     $foo2 = shift @buf2;
 184   }
 185 }
 186
 187 close $file1;
 188 close $file2;
 189
 190 sub ffw() {
 191   if (scalar(@_) != 2) { die "improper usage of ffw\n"; }
 192
 193   my $FILE = $_[0];
 194   my $start = $_[1];
 195   my $count = 0;
 196
 197   while ($start-- > 0 && !eof($FILE)) {
 198     <$FILE>;
 199     $count++;
 200   }
 201
 202   if ($start > 0) {die "File too short for ffw amount\n"; }
 203   return $count;
 204 }
 205
 206 sub parse_filearg() {
 207   $start = 0;
 208   split /:/, @_[0];
 209   if (scalar(@_) > 2) { usage(); }
 210
 211   $file = $_[0];
 212   if (scalar(@_) > 1) { $start = $_[1]; }
 213
 214   return ($file, $start);
 215 }
 216
 217 sub usage() {
 218   printf "usage: $progname <file1>[:start] <file2>[:start]\n";
 219   exit 1;
 220 }
 221
 222
 223 # Package Hunk. A Hunk is a group of Blocks which overlap because of the
 224 # context surrounding each block. (So if we're not using context, every
 225 # hunk will contain one block.)
 226 {
 227 package Hunk;
 228
 229 sub new {
 230 # Arg1 is output from &LCS::diff (which corresponds to one Block)
 231 # Arg2 is the number of items (lines, e.g.,) of context around each block
 232 #
 233 # This subroutine changes $File_Length_Difference
 234 #
 235 # Fields in a Hunk:
 236 # blocks      - a list of Block objects
 237 # start       - index in file 1 where first block of the hunk starts
 238 # end         - index in file 1 where last block of the hunk ends
 239 #
 240 # Variables:
 241 # before_diff - how much longer file 2 is than file 1 due to all hunks
 242 #               until but NOT including this one
 243 # after_diff  - difference due to all hunks including this one
 244     my ($class, $piece, $context_items, $maxlen) = @_;
 245
 246     my $block = new Block ($piece); # this modifies $FLD!
 247
 248     my $before_diff = $File_Length_Difference; # BEFORE this hunk
 249     my $after_diff = $before_diff + $block->{"length_diff"};
 250     $File_Length_Difference += $block->{"length_diff"};
 251
 252     # @remove_array and @insert_array hold the items to insert and remove
 253     # Save the start & beginning of each array. If the array doesn't exist
 254     # though (e.g., we're only adding items in this block), then figure
 255     # out the line number based on the line number of the other file and
 256     # the current difference in file lenghts
 257     my @remove_array = $block->remove;
 258     my @insert_array = $block->insert;
 259     my ($a1, $a2, $b1, $b2, $start1, $start2, $end1, $end2, $change);
 260     $a1 = @remove_array ? $remove_array[0 ]->{"item_no"} : -1;
 261     $a2 = @remove_array ? $remove_array[-1]->{"item_no"} : -1;
 262     $b1 = @insert_array ? $insert_array[0 ]->{"item_no"} : -1;
 263     $b2 = @insert_array ? $insert_array[-1]->{"item_no"} : -1;
 264
 265     $start1 = $a1 == -1 ? $b1 - $before_diff : $a1;
 266     $end1   = $a2 == -1 ? $b2 - $after_diff  : $a2;
 267     $start2 = $b1 == -1 ? $a1 + $before_diff : $b1;
 268     $end2   = $b2 == -1 ? $a2 + $after_diff  : $b2;
 269     $change = scalar(@remove_array) && scalar(@insert_array);
 270
 271     # At first, a hunk will have just one Block in it
 272     my $hunk = {
 273                 "start1" => $start1,
 274                 "start2" => $start2,
 275                 "end1" => $end1,
 276                 "end2" => $end2,
 277                 "maxlen" => $maxlen,
 278                 "change" => $change,
 279                 "blocks" => [$block],
 280               };
 281     bless $hunk, $class;
 282
 283     $hunk->flag_context($context_items);
 284
 285     return $hunk;
 286 }
 287
 288 # Change the "start" and "end" fields to note that context should be added
 289 # to this hunk
 290 sub flag_context {
 291     my ($hunk, $context_items) = @_;
 292     return unless $context_items; # no context
 293
 294     # add context before
 295     my $start1 = $hunk->{"start1"};
 296     my $num_added = $context_items > $start1 ? $start1 : $context_items;
 297     $hunk->{"start1"} -= $num_added;
 298     $hunk->{"start2"} -= $num_added;
 299
 300     # context after
 301     my $end1 = $hunk->{"end1"};
 302     $num_added = ($end1+$context_items > $hunk->{"maxlen"}) ?
 303                   $hunk->{"maxlen"} - $end1 :
 304                   $context_items;
 305     $hunk->{"end1"} += $num_added;
 306     $hunk->{"end2"} += $num_added;
 307 }
 308
 309 # Is there an overlap between hunk arg0 and old hunk arg1?
 310 # Note: if end of old hunk is one less than beginning of second, they overlap
 311 sub does_overlap {
 312     my ($hunk, $oldhunk) = @_;
 313     return "" unless $oldhunk; # first time through, $oldhunk is empty
 314
 315     # Do I actually need to test both?
 316     return ($hunk->{"start1"} - $oldhunk->{"end1"} <= 1 ||
 317             $hunk->{"start2"} - $oldhunk->{"end2"} <= 1);
 318 }
 319
 320 # Prepend hunk arg1 to hunk arg0
 321 # Note that arg1 isn't updated! Only arg0 is.
 322 sub prepend_hunk {
 323     my ($hunk, $oldhunk) = @_;
 324
 325     $hunk->{"start1"} = $oldhunk->{"start1"};
 326     $hunk->{"start2"} = $oldhunk->{"start2"};
 327
 328     unshift (@{$hunk->{"blocks"}}, @{$oldhunk->{"blocks"}});
 329 }
 330
 331
 332 # DIFF OUTPUT ROUTINES. THESE ROUTINES CONTAIN DIFF FORMATTING INFO...
 333 sub output_diff {
 334     if    (defined $main::opt_u) {&output_unified_diff(@_)}
 335     elsif (defined $main::opt_c) {&output_context_diff(@_)}
 336     else {die "unknown diff"}
 337 }
 338
 339 sub output_unified_diff {
 340     my ($hunk, $fileref1, $fileref2) = @_;
 341     my @blocklist;
 342
 343     # Calculate item number range.
 344     my $range1 = $hunk->unified_range(1, $file_offset1);
 345     my $range2 = $hunk->unified_range(2, $file_offset2);
 346     print "@@ -$range1 +$range2 @@\n";
 347
 348     # Outlist starts containing the hunk of file 1.
 349     # Removing an item just means putting a '-' in front of it.
 350     # Inserting an item requires getting it from file2 and splicing it in.
 351     #    We splice in $num_added items. Remove blocks use $num_added because
 352     # splicing changed the length of outlist.
 353     #    We remove $num_removed items. Insert blocks use $num_removed because
 354     # their item numbers---corresponding to positions in file *2*--- don't take
 355     # removed items into account.
 356     my $low = $hunk->{"start1"};
 357     my $hi = $hunk->{"end1"};
 358     my ($num_added, $num_removed) = (0,0);
 359     my @outlist = @$fileref1[$low..$hi];
 360     map {s/^/ /} @outlist; # assume it's just context
 361
 362     foreach my $block (@{$hunk->{"blocks"}}) {
 363         foreach my $item ($block->remove) {
 364             my $op = $item->{"sign"}; # -
 365             my $offset = $item->{"item_no"} - $low + $num_added;
 366             $outlist[$offset] =~ s/^ /$op/;
 367             $num_removed++;
 368         }
 369         foreach my $item ($block->insert) {
 370             my $op = $item->{"sign"}; # +
 371             my $i = $item->{"item_no"};
 372             my $offset = $i - $hunk->{"start2"} + $num_removed;
 373             splice(@outlist,$offset,0,"$op$$fileref2[$i]");
 374             $num_added++;
 375         }
 376     }
 377
 378     map {s/$/\n/} @outlist; # add \n's
 379     print @outlist;
 380
 381 }
 382
 383 sub output_context_diff {
 384     my ($hunk, $fileref1, $fileref2) = @_;
 385     my @blocklist;
 386
 387     print "***************\n";
 388     # Calculate item number range.
 389     my $range1 = $hunk->context_range(1, $file_offset1);
 390     my $range2 = $hunk->context_range(2, $file_offset2);
 391
 392     # Print out file 1 part for each block in context diff format if there are
 393     # any blocks that remove items
 394     print "*** $range1 ****\n";
 395     my $low = $hunk->{"start1"};
 396     my $hi  = $hunk->{"end1"};
 397     if (@blocklist = grep {$_->remove} @{$hunk->{"blocks"}}) {
 398         my @outlist = @$fileref1[$low..$hi];
 399         map {s/^/  /} @outlist; # assume it's just context
 400         foreach my $block (@blocklist) {
 401             my $op = $block->op; # - or !
 402             foreach my $item ($block->remove) {
 403                 $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/;
 404             }
 405         }
 406         map {s/$/\n/} @outlist; # add \n's
 407         print @outlist;
 408     }
 409
 410     print "--- $range2 ----\n";
 411     $low = $hunk->{"start2"};
 412     $hi  = $hunk->{"end2"};
 413     if (@blocklist = grep {$_->insert} @{$hunk->{"blocks"}}) {
 414         my @outlist = @$fileref2[$low..$hi];
 415         map {s/^/  /} @outlist; # assume it's just context
 416         foreach my $block (@blocklist) {
 417             my $op = $block->op; # + or !
 418             foreach my $item ($block->insert) {
 419                 $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/;
 420             }
 421         }
 422         map {s/$/\n/} @outlist; # add \n's
 423         print @outlist;
 424     }
 425 }
 426
 427 sub context_range {
 428 # Generate a range of item numbers to print. Only print 1 number if the range
 429 # has only one item in it. Otherwise, it's 'start,end'
 430     my ($hunk, $flag, $offset) = @_;
 431     my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
 432
 433     # index from 1, not zero
 434     $start += $offset + 1;
 435     $end += $offset + 1;
 436     my $range = ($start < $end) ? "$start,$end" : $end;
 437     return $range;
 438 }
 439
 440 sub unified_range {
 441 # Generate a range of item numbers to print for unified diff
 442 # Print number where block starts, followed by number of lines in the block
 443 # (don't print number of lines if it's 1)
 444     my ($hunk, $flag, $offset) = @_;
 445     my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
 446
 447     # index from 1, not zero
 448     $start += $offset + 1;
 449     $end += $offset + 1;
 450     my $length = $end - $start + 1;
 451     my $first = $length < 2 ? $end : $start; # strange, but correct...
 452     my $range = $length== 1 ? $first : "$first,$length";
 453     return $range;
 454 }
 455 } # end Package Hunk
 456
 457 # Package Block. A block is an operation removing, adding, or changing
 458 # a group of items. Basically, this is just a list of changes, where each
 459 # change adds or deletes a single item.
 460 # (Change could be a separate class, but it didn't seem worth it)
 461 {
 462 package Block;
 463 sub new {
 464 # Input is a chunk from &Algorithm::LCS::diff
 465 # Fields in a block:
 466 # length_diff - how much longer file 2 is than file 1 due to this block
 467 # Each change has:
 468 # sign        - '+' for insert, '-' for remove
 469 # item_no     - number of the item in the file (e.g., line number)
 470 # We don't bother storing the text of the item
 471 #
 472     my ($class,$chunk) = @_;
 473     my @changes = ();
 474
 475 # This just turns each change into a hash.
 476     foreach my $item (@$chunk) {
 477         my ($sign, $item_no, $text) = @$item;
 478         my $hashref = {"sign" => $sign, "item_no" => $item_no};
 479         push @changes, $hashref;
 480     }
 481
 482     my $block = { "changes" => \@changes };
 483     bless $block, $class;
 484
 485     $block->{"length_diff"} = $block->insert - $block->remove;
 486     return $block;
 487 }
 488
 489
 490 # LOW LEVEL FUNCTIONS
 491 sub op {
 492 # what kind of block is this?
 493     my $block = shift;
 494     my $insert = $block->insert;
 495     my $remove = $block->remove;
 496
 497     $remove && $insert and return '!';
 498     $remove and return '-';
 499     $insert and return '+';
 500     warn "unknown block type";
 501     return '^'; # context block
 502 }
 503
 504 # Returns a list of the changes in this block that remove items
 505 # (or the number of removals if called in scalar context)
 506 sub remove { return grep {$_->{"sign"} eq '-'} @{shift->{"changes"}}; }
 507
 508 # Returns a list of the changes in this block that insert items
 509 sub insert { return grep {$_->{"sign"} eq '+'} @{shift->{"changes"}}; }
 510
 511 } # end of package Block