Import changeset
[gem5.git] / util / rundiff
1 #!/usr/bin/perl
2
3 # Copyright (c) 2001 Nathan L. Binkert
4 # All rights reserved.
5 #
6 # Permission to redistribute, use, copy, and modify this software
7 # without fee is hereby granted, provided that the following
8 # conditions are met:
9 #
10 # 1. This entire notice is included in all source code copies of any
11 # software which is or includes a copy or modification of this
12 # software.
13 # 2. The name of the author may not be used to endorse or promote
14 # products derived from this software without specific prior
15 # written permission.
16 #
17 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #
29
30 use Algorithm::Diff qw(diff);
31 use vars qw ($opt_C $opt_c $opt_u $opt_U);
32
33 $opt_u = "";
34 $opt_c = undef;
35
36 $diffsize = 2000;
37 # After we've read up to a certain point in each file, the number of items
38 # we've read from each file will differ by $FLD (could be 0)
39 my $File_Length_Difference = 0;
40 my $Context_Lines = 9;
41
42 $progname = $0;
43 if (scalar(@ARGV) != 2) {
44 usage();
45 }
46
47 my ($filename1, $filename2);
48 ($filename1, $start1) = parse_filearg($ARGV[0]);
49 ($filename2, $start2) = parse_filearg($ARGV[1]);
50
51 if ($filename1 eq "-" && $filename2 eq "-") {
52 die "Only one of the inputs may be standard in\n";
53 }
54
55 my ($file1, $file2);
56 if ($filename1 eq "-") {
57 $file1 = STDIN;
58 } else {
59 open(FILE1, $filename1) || die "can't open $file1: $!\n";
60 $file1 = FILE1;
61 }
62
63 if ($filename2 eq "-") {
64 $file2 = STDIN;
65 } else {
66 open(FILE2, $filename2) || die "can't open $file2: $!\n";
67 $file2 = FILE2;
68 }
69
70 my $file_offset1 = ffw($file1, $start1);
71 my $file_offset2 = ffw($file2, $start2);
72
73 $skip_first = 0;
74 my (@buf1, @buf2, @printbuf1, @printbuf2);
75
76 $Compare_Ahead = 0;
77
78 while (!eof($file1) && !eof($file2)) {
79 my $line1 = <$file1>; chomp $line1;
80 my $line2 = <$file2>; chomp $line2;
81 my $printline1 = $line1;
82 my $printline2 = $line2;
83
84 push @buf1, $line1;
85 push @buf2, $line2;
86 push @printbuf1, $printline1;
87 push @printbuf2, $printline2;
88
89 # while ($Compare_Ahead < $Context_Lines) {
90 # $line1 = @buf1[$Compare_Ahead];
91 # $line2 = @buf2[$Compare_Ahead];
92 # $line2 =~ s/ *--.*$//;
93 # if ($line1 ne $line2) { last; }
94 # ++$Compare_Ahead;
95 # }
96
97 $line1 = @buf1[$Compare_Ahead];
98 $line2 = @buf2[$Compare_Ahead];
99 $line2 =~ s/ *--.*$//;
100
101 if ($line1 ne $line2) {
102 while (!eof($file1) && scalar(@buf1) < $diffsize) {
103 $line = <$file1>; chomp $line;
104 my $printline = $line;
105
106 push @printbuf1, $printline;
107 push @buf1, $line;
108 }
109
110 while (!eof($file2) && scalar(@buf2) < $diffsize) {
111 $line = <$file2>; chomp $line;
112 my $printline = $line;
113 # $line =~ s/ *--.*$//;
114
115 push @printbuf2, $printline;
116 push @buf2, $line;
117 }
118
119 my $diffs = diff(\@buf1, \@buf2);
120
121 next unless @$diffs;
122
123 my @hunklist;
124 my ($hunk,$oldhunk);
125 # Loop over hunks. If a hunk overlaps with the last hunk, join them.
126 # Otherwise, print out the old one.
127 foreach my $piece (@$diffs) {
128 $hunk = new Hunk ($piece, $Context_Lines, scalar(@buf1));
129 next unless $oldhunk;
130
131 if ($hunk->does_overlap($oldhunk)) {
132 $hunk->prepend_hunk($oldhunk);
133 } else {
134 push @hunklist, $oldhunk;
135 }
136 } continue {
137 $oldhunk = $hunk;
138 }
139
140 my $change = 0;
141 while (scalar(@hunklist) && !$change) {
142 $hunk = pop @hunklist;
143 $change = $hunk->{"change"};
144 }
145 push @hunklist, $hunk;
146 $last_start1 = $hunk->{"start1"};
147 $last_start2 = $hunk->{"start2"};
148 $last_end1 = $hunk->{"end1"};
149 $last_end2 = $hunk->{"end2"};
150
151 while (scalar(@hunklist)) {
152 $hunk = shift @hunklist;
153 # $hunk->output_diff(\@buf1, \@buf2);
154 $hunk->output_diff(\@printbuf1, \@printbuf2);
155 }
156
157 $last_end1 -= $Context_Lines - 1;
158 $last_end2 -= $Context_Lines - 1;
159 $file_offset1 += $last_end1;
160 $file_offset2 += $last_end2;
161 @printbuf1 = @printbuf1[$last_end1..$#printbuf1];
162 @printbuf2 = @printbuf2[$last_end2..$#printbuf2];
163 @buf1 = @buf1[$last_end1..$#buf1];
164 @buf2 = @buf2[$last_end2..$#buf2];
165 while (scalar(@buf1) > $Context_Lines &&
166 scalar(@buf2) > $Context_Lines) {
167 $foo1 = @buf1[$Context_Lines];
168 $foo2 = @buf2[$Context_Lines];
169 if (scalar($foo1) != scalar($foo2) || $foo1 ne $foo2) { last; }
170 $foo1 = shift @printbuf1;
171 $foo2 = shift @printbuf2;
172 $foo1 = shift @buf1;
173 $foo2 = shift @buf2;
174 ++$file_offset1;
175 ++$file_offset2;
176 }
177 } else {
178 ++$file_offset1;
179 ++$file_offset2;
180 $foo1 = shift @printbuf1;
181 $foo2 = shift @printbuf2;
182 $foo1 = shift @buf1;
183 $foo2 = shift @buf2;
184 }
185 }
186
187 close $file1;
188 close $file2;
189
190 sub ffw() {
191 if (scalar(@_) != 2) { die "improper usage of ffw\n"; }
192
193 my $FILE = $_[0];
194 my $start = $_[1];
195 my $count = 0;
196
197 while ($start-- > 0 && !eof($FILE)) {
198 <$FILE>;
199 $count++;
200 }
201
202 if ($start > 0) {die "File too short for ffw amount\n"; }
203 return $count;
204 }
205
206 sub parse_filearg() {
207 $start = 0;
208 split /:/, @_[0];
209 if (scalar(@_) > 2) { usage(); }
210
211 $file = $_[0];
212 if (scalar(@_) > 1) { $start = $_[1]; }
213
214 return ($file, $start);
215 }
216
217 sub usage() {
218 printf "usage: $progname <file1>[:start] <file2>[:start]\n";
219 exit 1;
220 }
221
222
223 # Package Hunk. A Hunk is a group of Blocks which overlap because of the
224 # context surrounding each block. (So if we're not using context, every
225 # hunk will contain one block.)
226 {
227 package Hunk;
228
229 sub new {
230 # Arg1 is output from &LCS::diff (which corresponds to one Block)
231 # Arg2 is the number of items (lines, e.g.,) of context around each block
232 #
233 # This subroutine changes $File_Length_Difference
234 #
235 # Fields in a Hunk:
236 # blocks - a list of Block objects
237 # start - index in file 1 where first block of the hunk starts
238 # end - index in file 1 where last block of the hunk ends
239 #
240 # Variables:
241 # before_diff - how much longer file 2 is than file 1 due to all hunks
242 # until but NOT including this one
243 # after_diff - difference due to all hunks including this one
244 my ($class, $piece, $context_items, $maxlen) = @_;
245
246 my $block = new Block ($piece); # this modifies $FLD!
247
248 my $before_diff = $File_Length_Difference; # BEFORE this hunk
249 my $after_diff = $before_diff + $block->{"length_diff"};
250 $File_Length_Difference += $block->{"length_diff"};
251
252 # @remove_array and @insert_array hold the items to insert and remove
253 # Save the start & beginning of each array. If the array doesn't exist
254 # though (e.g., we're only adding items in this block), then figure
255 # out the line number based on the line number of the other file and
256 # the current difference in file lenghts
257 my @remove_array = $block->remove;
258 my @insert_array = $block->insert;
259 my ($a1, $a2, $b1, $b2, $start1, $start2, $end1, $end2, $change);
260 $a1 = @remove_array ? $remove_array[0 ]->{"item_no"} : -1;
261 $a2 = @remove_array ? $remove_array[-1]->{"item_no"} : -1;
262 $b1 = @insert_array ? $insert_array[0 ]->{"item_no"} : -1;
263 $b2 = @insert_array ? $insert_array[-1]->{"item_no"} : -1;
264
265 $start1 = $a1 == -1 ? $b1 - $before_diff : $a1;
266 $end1 = $a2 == -1 ? $b2 - $after_diff : $a2;
267 $start2 = $b1 == -1 ? $a1 + $before_diff : $b1;
268 $end2 = $b2 == -1 ? $a2 + $after_diff : $b2;
269 $change = scalar(@remove_array) && scalar(@insert_array);
270
271 # At first, a hunk will have just one Block in it
272 my $hunk = {
273 "start1" => $start1,
274 "start2" => $start2,
275 "end1" => $end1,
276 "end2" => $end2,
277 "maxlen" => $maxlen,
278 "change" => $change,
279 "blocks" => [$block],
280 };
281 bless $hunk, $class;
282
283 $hunk->flag_context($context_items);
284
285 return $hunk;
286 }
287
288 # Change the "start" and "end" fields to note that context should be added
289 # to this hunk
290 sub flag_context {
291 my ($hunk, $context_items) = @_;
292 return unless $context_items; # no context
293
294 # add context before
295 my $start1 = $hunk->{"start1"};
296 my $num_added = $context_items > $start1 ? $start1 : $context_items;
297 $hunk->{"start1"} -= $num_added;
298 $hunk->{"start2"} -= $num_added;
299
300 # context after
301 my $end1 = $hunk->{"end1"};
302 $num_added = ($end1+$context_items > $hunk->{"maxlen"}) ?
303 $hunk->{"maxlen"} - $end1 :
304 $context_items;
305 $hunk->{"end1"} += $num_added;
306 $hunk->{"end2"} += $num_added;
307 }
308
309 # Is there an overlap between hunk arg0 and old hunk arg1?
310 # Note: if end of old hunk is one less than beginning of second, they overlap
311 sub does_overlap {
312 my ($hunk, $oldhunk) = @_;
313 return "" unless $oldhunk; # first time through, $oldhunk is empty
314
315 # Do I actually need to test both?
316 return ($hunk->{"start1"} - $oldhunk->{"end1"} <= 1 ||
317 $hunk->{"start2"} - $oldhunk->{"end2"} <= 1);
318 }
319
320 # Prepend hunk arg1 to hunk arg0
321 # Note that arg1 isn't updated! Only arg0 is.
322 sub prepend_hunk {
323 my ($hunk, $oldhunk) = @_;
324
325 $hunk->{"start1"} = $oldhunk->{"start1"};
326 $hunk->{"start2"} = $oldhunk->{"start2"};
327
328 unshift (@{$hunk->{"blocks"}}, @{$oldhunk->{"blocks"}});
329 }
330
331
332 # DIFF OUTPUT ROUTINES. THESE ROUTINES CONTAIN DIFF FORMATTING INFO...
333 sub output_diff {
334 if (defined $main::opt_u) {&output_unified_diff(@_)}
335 elsif (defined $main::opt_c) {&output_context_diff(@_)}
336 else {die "unknown diff"}
337 }
338
339 sub output_unified_diff {
340 my ($hunk, $fileref1, $fileref2) = @_;
341 my @blocklist;
342
343 # Calculate item number range.
344 my $range1 = $hunk->unified_range(1, $file_offset1);
345 my $range2 = $hunk->unified_range(2, $file_offset2);
346 print "@@ -$range1 +$range2 @@\n";
347
348 # Outlist starts containing the hunk of file 1.
349 # Removing an item just means putting a '-' in front of it.
350 # Inserting an item requires getting it from file2 and splicing it in.
351 # We splice in $num_added items. Remove blocks use $num_added because
352 # splicing changed the length of outlist.
353 # We remove $num_removed items. Insert blocks use $num_removed because
354 # their item numbers---corresponding to positions in file *2*--- don't take
355 # removed items into account.
356 my $low = $hunk->{"start1"};
357 my $hi = $hunk->{"end1"};
358 my ($num_added, $num_removed) = (0,0);
359 my @outlist = @$fileref1[$low..$hi];
360 map {s/^/ /} @outlist; # assume it's just context
361
362 foreach my $block (@{$hunk->{"blocks"}}) {
363 foreach my $item ($block->remove) {
364 my $op = $item->{"sign"}; # -
365 my $offset = $item->{"item_no"} - $low + $num_added;
366 $outlist[$offset] =~ s/^ /$op/;
367 $num_removed++;
368 }
369 foreach my $item ($block->insert) {
370 my $op = $item->{"sign"}; # +
371 my $i = $item->{"item_no"};
372 my $offset = $i - $hunk->{"start2"} + $num_removed;
373 splice(@outlist,$offset,0,"$op$$fileref2[$i]");
374 $num_added++;
375 }
376 }
377
378 map {s/$/\n/} @outlist; # add \n's
379 print @outlist;
380
381 }
382
383 sub output_context_diff {
384 my ($hunk, $fileref1, $fileref2) = @_;
385 my @blocklist;
386
387 print "***************\n";
388 # Calculate item number range.
389 my $range1 = $hunk->context_range(1, $file_offset1);
390 my $range2 = $hunk->context_range(2, $file_offset2);
391
392 # Print out file 1 part for each block in context diff format if there are
393 # any blocks that remove items
394 print "*** $range1 ****\n";
395 my $low = $hunk->{"start1"};
396 my $hi = $hunk->{"end1"};
397 if (@blocklist = grep {$_->remove} @{$hunk->{"blocks"}}) {
398 my @outlist = @$fileref1[$low..$hi];
399 map {s/^/ /} @outlist; # assume it's just context
400 foreach my $block (@blocklist) {
401 my $op = $block->op; # - or !
402 foreach my $item ($block->remove) {
403 $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/;
404 }
405 }
406 map {s/$/\n/} @outlist; # add \n's
407 print @outlist;
408 }
409
410 print "--- $range2 ----\n";
411 $low = $hunk->{"start2"};
412 $hi = $hunk->{"end2"};
413 if (@blocklist = grep {$_->insert} @{$hunk->{"blocks"}}) {
414 my @outlist = @$fileref2[$low..$hi];
415 map {s/^/ /} @outlist; # assume it's just context
416 foreach my $block (@blocklist) {
417 my $op = $block->op; # + or !
418 foreach my $item ($block->insert) {
419 $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/;
420 }
421 }
422 map {s/$/\n/} @outlist; # add \n's
423 print @outlist;
424 }
425 }
426
427 sub context_range {
428 # Generate a range of item numbers to print. Only print 1 number if the range
429 # has only one item in it. Otherwise, it's 'start,end'
430 my ($hunk, $flag, $offset) = @_;
431 my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
432
433 # index from 1, not zero
434 $start += $offset + 1;
435 $end += $offset + 1;
436 my $range = ($start < $end) ? "$start,$end" : $end;
437 return $range;
438 }
439
440 sub unified_range {
441 # Generate a range of item numbers to print for unified diff
442 # Print number where block starts, followed by number of lines in the block
443 # (don't print number of lines if it's 1)
444 my ($hunk, $flag, $offset) = @_;
445 my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
446
447 # index from 1, not zero
448 $start += $offset + 1;
449 $end += $offset + 1;
450 my $length = $end - $start + 1;
451 my $first = $length < 2 ? $end : $start; # strange, but correct...
452 my $range = $length== 1 ? $first : "$first,$length";
453 return $range;
454 }
455 } # end Package Hunk
456
457 # Package Block. A block is an operation removing, adding, or changing
458 # a group of items. Basically, this is just a list of changes, where each
459 # change adds or deletes a single item.
460 # (Change could be a separate class, but it didn't seem worth it)
461 {
462 package Block;
463 sub new {
464 # Input is a chunk from &Algorithm::LCS::diff
465 # Fields in a block:
466 # length_diff - how much longer file 2 is than file 1 due to this block
467 # Each change has:
468 # sign - '+' for insert, '-' for remove
469 # item_no - number of the item in the file (e.g., line number)
470 # We don't bother storing the text of the item
471 #
472 my ($class,$chunk) = @_;
473 my @changes = ();
474
475 # This just turns each change into a hash.
476 foreach my $item (@$chunk) {
477 my ($sign, $item_no, $text) = @$item;
478 my $hashref = {"sign" => $sign, "item_no" => $item_no};
479 push @changes, $hashref;
480 }
481
482 my $block = { "changes" => \@changes };
483 bless $block, $class;
484
485 $block->{"length_diff"} = $block->insert - $block->remove;
486 return $block;
487 }
488
489
490 # LOW LEVEL FUNCTIONS
491 sub op {
492 # what kind of block is this?
493 my $block = shift;
494 my $insert = $block->insert;
495 my $remove = $block->remove;
496
497 $remove && $insert and return '!';
498 $remove and return '-';
499 $insert and return '+';
500 warn "unknown block type";
501 return '^'; # context block
502 }
503
504 # Returns a list of the changes in this block that remove items
505 # (or the number of removals if called in scalar context)
506 sub remove { return grep {$_->{"sign"} eq '-'} @{shift->{"changes"}}; }
507
508 # Returns a list of the changes in this block that insert items
509 sub insert { return grep {$_->{"sign"} eq '+'} @{shift->{"changes"}}; }
510
511 } # end of package Block