3 # Copyright (c) 2001 Nathan L. Binkert
6 # Permission to redistribute, use, copy, and modify this software
7 # without fee is hereby granted, provided that the following
10 # 1. This entire notice is included in all source code copies of any
11 # software which is or includes a copy or modification of this
13 # 2. The name of the author may not be used to endorse or promote
14 # products derived from this software without specific prior
17 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 use Algorithm
::Diff
qw(diff);
31 use vars qw
($opt_C $opt_c $opt_u $opt_U);
37 # After we've read up to a certain point in each file, the number of items
38 # we've read from each file will differ by $FLD (could be 0)
39 my $File_Length_Difference = 0;
40 my $Context_Lines = 9;
43 if (scalar(@ARGV) != 2) {
47 my ($filename1, $filename2);
48 ($filename1, $start1) = parse_filearg
($ARGV[0]);
49 ($filename2, $start2) = parse_filearg
($ARGV[1]);
51 if ($filename1 eq "-" && $filename2 eq "-") {
52 die "Only one of the inputs may be standard in\n";
56 if ($filename1 eq "-") {
59 open(FILE1
, $filename1) || die "can't open $file1: $!\n";
63 if ($filename2 eq "-") {
66 open(FILE2
, $filename2) || die "can't open $file2: $!\n";
70 my $file_offset1 = ffw
($file1, $start1);
71 my $file_offset2 = ffw
($file2, $start2);
74 my (@buf1, @buf2, @printbuf1, @printbuf2);
78 while (!eof($file1) && !eof($file2)) {
79 my $line1 = <$file1>; chomp $line1;
80 my $line2 = <$file2>; chomp $line2;
81 my $printline1 = $line1;
82 my $printline2 = $line2;
86 push @printbuf1, $printline1;
87 push @printbuf2, $printline2;
89 # while ($Compare_Ahead < $Context_Lines) {
90 # $line1 = @buf1[$Compare_Ahead];
91 # $line2 = @buf2[$Compare_Ahead];
92 # $line2 =~ s/ *--.*$//;
93 # if ($line1 ne $line2) { last; }
97 $line1 = @buf1[$Compare_Ahead];
98 $line2 = @buf2[$Compare_Ahead];
99 $line2 =~ s/ *--.*$//;
101 if ($line1 ne $line2) {
102 while (!eof($file1) && scalar(@buf1) < $diffsize) {
103 $line = <$file1>; chomp $line;
104 my $printline = $line;
106 push @printbuf1, $printline;
110 while (!eof($file2) && scalar(@buf2) < $diffsize) {
111 $line = <$file2>; chomp $line;
112 my $printline = $line;
113 # $line =~ s/ *--.*$//;
115 push @printbuf2, $printline;
119 my $diffs = diff
(\
@buf1, \
@buf2);
125 # Loop over hunks. If a hunk overlaps with the last hunk, join them.
126 # Otherwise, print out the old one.
127 foreach my $piece (@
$diffs) {
128 $hunk = new Hunk
($piece, $Context_Lines, scalar(@buf1));
129 next unless $oldhunk;
131 if ($hunk->does_overlap($oldhunk)) {
132 $hunk->prepend_hunk($oldhunk);
134 push @hunklist, $oldhunk;
141 while (scalar(@hunklist) && !$change) {
142 $hunk = pop @hunklist;
143 $change = $hunk->{"change"};
145 push @hunklist, $hunk;
146 $last_start1 = $hunk->{"start1"};
147 $last_start2 = $hunk->{"start2"};
148 $last_end1 = $hunk->{"end1"};
149 $last_end2 = $hunk->{"end2"};
151 while (scalar(@hunklist)) {
152 $hunk = shift @hunklist;
153 # $hunk->output_diff(\@buf1, \@buf2);
154 $hunk->output_diff(\
@printbuf1, \
@printbuf2);
157 $last_end1 -= $Context_Lines - 1;
158 $last_end2 -= $Context_Lines - 1;
159 $file_offset1 += $last_end1;
160 $file_offset2 += $last_end2;
161 @printbuf1 = @printbuf1[$last_end1..$#printbuf1];
162 @printbuf2 = @printbuf2[$last_end2..$#printbuf2];
163 @buf1 = @buf1[$last_end1..$#buf1];
164 @buf2 = @buf2[$last_end2..$#buf2];
165 while (scalar(@buf1) > $Context_Lines &&
166 scalar(@buf2) > $Context_Lines) {
167 $foo1 = @buf1[$Context_Lines];
168 $foo2 = @buf2[$Context_Lines];
169 if (scalar($foo1) != scalar($foo2) || $foo1 ne $foo2) { last; }
170 $foo1 = shift @printbuf1;
171 $foo2 = shift @printbuf2;
180 $foo1 = shift @printbuf1;
181 $foo2 = shift @printbuf2;
191 if (scalar(@_) != 2) { die "improper usage of ffw\n"; }
197 while ($start-- > 0 && !eof($FILE)) {
202 if ($start > 0) {die "File too short for ffw amount\n"; }
206 sub parse_filearg
() {
209 if (scalar(@_) > 2) { usage
(); }
212 if (scalar(@_) > 1) { $start = $_[1]; }
214 return ($file, $start);
218 printf "usage: $progname <file1>[:start] <file2>[:start]\n";
223 # Package Hunk. A Hunk is a group of Blocks which overlap because of the
224 # context surrounding each block. (So if we're not using context, every
225 # hunk will contain one block.)
230 # Arg1 is output from &LCS::diff (which corresponds to one Block)
231 # Arg2 is the number of items (lines, e.g.,) of context around each block
233 # This subroutine changes $File_Length_Difference
236 # blocks - a list of Block objects
237 # start - index in file 1 where first block of the hunk starts
238 # end - index in file 1 where last block of the hunk ends
241 # before_diff - how much longer file 2 is than file 1 due to all hunks
242 # until but NOT including this one
243 # after_diff - difference due to all hunks including this one
244 my ($class, $piece, $context_items, $maxlen) = @_;
246 my $block = new Block
($piece); # this modifies $FLD!
248 my $before_diff = $File_Length_Difference; # BEFORE this hunk
249 my $after_diff = $before_diff + $block->{"length_diff"};
250 $File_Length_Difference += $block->{"length_diff"};
252 # @remove_array and @insert_array hold the items to insert and remove
253 # Save the start & beginning of each array. If the array doesn't exist
254 # though (e.g., we're only adding items in this block), then figure
255 # out the line number based on the line number of the other file and
256 # the current difference in file lenghts
257 my @remove_array = $block->remove;
258 my @insert_array = $block->insert;
259 my ($a1, $a2, $b1, $b2, $start1, $start2, $end1, $end2, $change);
260 $a1 = @remove_array ?
$remove_array[0 ]->{"item_no"} : -1;
261 $a2 = @remove_array ?
$remove_array[-1]->{"item_no"} : -1;
262 $b1 = @insert_array ?
$insert_array[0 ]->{"item_no"} : -1;
263 $b2 = @insert_array ?
$insert_array[-1]->{"item_no"} : -1;
265 $start1 = $a1 == -1 ?
$b1 - $before_diff : $a1;
266 $end1 = $a2 == -1 ?
$b2 - $after_diff : $a2;
267 $start2 = $b1 == -1 ?
$a1 + $before_diff : $b1;
268 $end2 = $b2 == -1 ?
$a2 + $after_diff : $b2;
269 $change = scalar(@remove_array) && scalar(@insert_array);
271 # At first, a hunk will have just one Block in it
279 "blocks" => [$block],
283 $hunk->flag_context($context_items);
288 # Change the "start" and "end" fields to note that context should be added
291 my ($hunk, $context_items) = @_;
292 return unless $context_items; # no context
295 my $start1 = $hunk->{"start1"};
296 my $num_added = $context_items > $start1 ?
$start1 : $context_items;
297 $hunk->{"start1"} -= $num_added;
298 $hunk->{"start2"} -= $num_added;
301 my $end1 = $hunk->{"end1"};
302 $num_added = ($end1+$context_items > $hunk->{"maxlen"}) ?
303 $hunk->{"maxlen"} - $end1 :
305 $hunk->{"end1"} += $num_added;
306 $hunk->{"end2"} += $num_added;
309 # Is there an overlap between hunk arg0 and old hunk arg1?
310 # Note: if end of old hunk is one less than beginning of second, they overlap
312 my ($hunk, $oldhunk) = @_;
313 return "" unless $oldhunk; # first time through, $oldhunk is empty
315 # Do I actually need to test both?
316 return ($hunk->{"start1"} - $oldhunk->{"end1"} <= 1 ||
317 $hunk->{"start2"} - $oldhunk->{"end2"} <= 1);
320 # Prepend hunk arg1 to hunk arg0
321 # Note that arg1 isn't updated! Only arg0 is.
323 my ($hunk, $oldhunk) = @_;
325 $hunk->{"start1"} = $oldhunk->{"start1"};
326 $hunk->{"start2"} = $oldhunk->{"start2"};
328 unshift (@
{$hunk->{"blocks"}}, @
{$oldhunk->{"blocks"}});
332 # DIFF OUTPUT ROUTINES. THESE ROUTINES CONTAIN DIFF FORMATTING INFO...
334 if (defined $main::opt_u
) {&output_unified_diff
(@_)}
335 elsif (defined $main::opt_c
) {&output_context_diff
(@_)}
336 else {die "unknown diff"}
339 sub output_unified_diff
{
340 my ($hunk, $fileref1, $fileref2) = @_;
343 # Calculate item number range.
344 my $range1 = $hunk->unified_range(1, $file_offset1);
345 my $range2 = $hunk->unified_range(2, $file_offset2);
346 print "@@ -$range1 +$range2 @@\n";
348 # Outlist starts containing the hunk of file 1.
349 # Removing an item just means putting a '-' in front of it.
350 # Inserting an item requires getting it from file2 and splicing it in.
351 # We splice in $num_added items. Remove blocks use $num_added because
352 # splicing changed the length of outlist.
353 # We remove $num_removed items. Insert blocks use $num_removed because
354 # their item numbers---corresponding to positions in file *2*--- don't take
355 # removed items into account.
356 my $low = $hunk->{"start1"};
357 my $hi = $hunk->{"end1"};
358 my ($num_added, $num_removed) = (0,0);
359 my @outlist = @
$fileref1[$low..$hi];
360 map {s/^/ /} @outlist; # assume it's just context
362 foreach my $block (@
{$hunk->{"blocks"}}) {
363 foreach my $item ($block->remove) {
364 my $op = $item->{"sign"}; # -
365 my $offset = $item->{"item_no"} - $low + $num_added;
366 $outlist[$offset] =~ s/^ /$op/;
369 foreach my $item ($block->insert) {
370 my $op = $item->{"sign"}; # +
371 my $i = $item->{"item_no"};
372 my $offset = $i - $hunk->{"start2"} + $num_removed;
373 splice(@outlist,$offset,0,"$op$$fileref2[$i]");
378 map {s/$/\n/} @outlist; # add \n's
383 sub output_context_diff
{
384 my ($hunk, $fileref1, $fileref2) = @_;
387 print "***************\n";
388 # Calculate item number range.
389 my $range1 = $hunk->context_range(1, $file_offset1);
390 my $range2 = $hunk->context_range(2, $file_offset2);
392 # Print out file 1 part for each block in context diff format if there are
393 # any blocks that remove items
394 print "*** $range1 ****\n";
395 my $low = $hunk->{"start1"};
396 my $hi = $hunk->{"end1"};
397 if (@blocklist = grep {$_->remove} @
{$hunk->{"blocks"}}) {
398 my @outlist = @
$fileref1[$low..$hi];
399 map {s/^/ /} @outlist; # assume it's just context
400 foreach my $block (@blocklist) {
401 my $op = $block->op; # - or !
402 foreach my $item ($block->remove) {
403 $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/;
406 map {s/$/\n/} @outlist; # add \n's
410 print "--- $range2 ----\n";
411 $low = $hunk->{"start2"};
412 $hi = $hunk->{"end2"};
413 if (@blocklist = grep {$_->insert} @
{$hunk->{"blocks"}}) {
414 my @outlist = @
$fileref2[$low..$hi];
415 map {s/^/ /} @outlist; # assume it's just context
416 foreach my $block (@blocklist) {
417 my $op = $block->op; # + or !
418 foreach my $item ($block->insert) {
419 $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/;
422 map {s/$/\n/} @outlist; # add \n's
428 # Generate a range of item numbers to print. Only print 1 number if the range
429 # has only one item in it. Otherwise, it's 'start,end'
430 my ($hunk, $flag, $offset) = @_;
431 my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
433 # index from 1, not zero
434 $start += $offset + 1;
436 my $range = ($start < $end) ?
"$start,$end" : $end;
441 # Generate a range of item numbers to print for unified diff
442 # Print number where block starts, followed by number of lines in the block
443 # (don't print number of lines if it's 1)
444 my ($hunk, $flag, $offset) = @_;
445 my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
447 # index from 1, not zero
448 $start += $offset + 1;
450 my $length = $end - $start + 1;
451 my $first = $length < 2 ?
$end : $start; # strange, but correct...
452 my $range = $length== 1 ?
$first : "$first,$length";
457 # Package Block. A block is an operation removing, adding, or changing
458 # a group of items. Basically, this is just a list of changes, where each
459 # change adds or deletes a single item.
460 # (Change could be a separate class, but it didn't seem worth it)
464 # Input is a chunk from &Algorithm::LCS::diff
466 # length_diff - how much longer file 2 is than file 1 due to this block
468 # sign - '+' for insert, '-' for remove
469 # item_no - number of the item in the file (e.g., line number)
470 # We don't bother storing the text of the item
472 my ($class,$chunk) = @_;
475 # This just turns each change into a hash.
476 foreach my $item (@
$chunk) {
477 my ($sign, $item_no, $text) = @
$item;
478 my $hashref = {"sign" => $sign, "item_no" => $item_no};
479 push @changes, $hashref;
482 my $block = { "changes" => \
@changes };
483 bless $block, $class;
485 $block->{"length_diff"} = $block->insert - $block->remove;
490 # LOW LEVEL FUNCTIONS
492 # what kind of block is this?
494 my $insert = $block->insert;
495 my $remove = $block->remove;
497 $remove && $insert and return '!';
498 $remove and return '-';
499 $insert and return '+';
500 warn "unknown block type";
501 return '^'; # context block
504 # Returns a list of the changes in this block that remove items
505 # (or the number of removals if called in scalar context)
506 sub remove
{ return grep {$_->{"sign"} eq '-'} @
{shift->{"changes"}}; }
508 # Returns a list of the changes in this block that insert items
509 sub insert
{ return grep {$_->{"sign"} eq '+'} @
{shift->{"changes"}}; }
511 } # end of package Block