Makefile.am: Add HashSet.java and java/lang/ref classes.
[gcc.git] / fastjar / jargrep.c
1 /*
2 jargrep.c - main functions for jargrep utility
3 Copyright (C) 1999 Bryan Burns
4 Copyright (C) 2000 Cory Hollingsworth
5
6 Parts of this program are base on Bryan Burns work with fastjar
7 Copyright (C) 1999.
8
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License
11 as published by the Free Software Foundation; either version 2
12 of the License, or (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 */
23
24 /* $Id: jargrep.c,v 1.1 2000/12/09 03:08:23 apbianco Exp $
25
26 $Log: jargrep.c,v $
27 Revision 1.1 2000/12/09 03:08:23 apbianco
28 2000-12-08 Alexandre Petit-Bianco <apbianco@cygnus.com>
29
30 * fastjar: Imported.
31
32 Revision 1.8 2000/09/13 14:02:02 cory
33 Reformatted some of the code to more closly match the layout of the orriginal
34 fastjar utility.
35
36 Revision 1.7 2000/09/12 22:29:36 cory
37 Jargrep now seems to do what I want it to do. Performs properly on Linux x86,
38 will test some other platforms later.
39
40
41 */
42
43 #include "config.h"
44 #include <stdio.h>
45 #include <unistd.h>
46 #include <regex.h>
47 #include <errno.h>
48 #include <string.h>
49 #include <sys/types.h>
50 #include <sys/stat.h>
51 #include <fcntl.h>
52 #include "jargrep.h"
53 #include "jartool.h"
54 #include "pushback.h"
55 #include "zipfile.h"
56
57 char *Usage = { "Usage: %s [-bcinsw] <-e regexp | regexp> file(s)\n" };
58
59 extern char *optarg;
60
61 /*
62 Function name: opt_valid
63 arg: options Bitfield flag that contains the command line options of grepjar.
64 purpose: To guard agains the occurance of certain incompatible flags being used
65 together.
66 returns: TRUE if options are valid, FALSE otherwise.
67 */
68
69 int opt_valid(int options) {
70 int retflag;
71
72 if((options & JG_PRINT_COUNT) &&
73 (options & (JG_PRINT_BYTEOFFSET | JG_PRINT_LINE_NUMBER)))
74 {
75 retflag = FALSE;
76 }
77 else retflag = TRUE;
78
79 return retflag;
80 }
81
82 /*
83 Function name: create_regexp
84 args: regstr String containing the uncompiled regular expression. This may be the
85 expression as is passed in through argv.
86 options This is the flag containing the commandline options that have been
87 parsed by getopt.
88 purpose: Handle the exception handling involved with setting upt a new regular
89 expression.
90 returns: Newly allocated compile regular expression ready to be used in an regexec call.
91 */
92
93 regex_t *create_regexp(char *regstr, int options) {
94 regex_t *exp;
95 int exp_flags = 0;
96 int errcode;
97 int msgsize;
98 char *errmsg;
99
100 if(exp = (regex_t *) malloc(sizeof(regex_t)))
101 {
102 if(errcode = regcomp(exp, regstr, (options & JG_IGNORE_CASE) ? REG_ICASE : 0)) {
103 fprintf(stderr, "regcomp of regex failed,\n");
104 if(errmsg = (char *) malloc(msgsize = regerror(errcode, exp, NULL, 0) + 1)) {
105 regerror(errcode, exp, errmsg, msgsize);
106 fprintf(stderr, "Error: %s\n", errmsg);
107 free(exp);
108 free(errmsg);
109 exit(1);
110 }
111 else {
112 fprintf(stderr, "Malloc of errmsg failed.\n");
113 fprintf(stderr, "Error: %s\n", strerror(errno));
114 free(exp);
115 exit(1);
116 }
117 }
118 }
119 else {
120 fprintf(stderr, "Malloc of regex failed,\n");
121 fprintf(stderr, "Error: %s\n", strerror(errno));
122 exit(1);
123 }
124
125 return exp;
126 }
127
128 /*
129 Function name: check_sig
130 args: scratch Pointer to array of bytes containing signature.
131 pbf Pointer to push back handle for jar file.
132 purpose: Verify that checksum is correct.
133 returns: 0, 1, or 2. 0 means we are ready to read embedded file information. 1 means
134 we have read beyound the embedded file list and can exit knowing we have read all the
135 relevent information. 2 means we still haven't reached embdedded file list and need to
136 do some more reading.
137 */
138 int check_sig(ub1 *scratch, pb_file *pbfp) {
139 ub4 signature;
140 int retflag = 0;
141
142 signature = UNPACK_UB4(scratch, 0);
143
144 #ifdef DEBUG
145 printf("signature is %x\n", signature);
146 #endif
147 if(signature == 0x08074b50){
148 #ifdef DEBUG
149 printf("skipping data descriptor\n");
150 #endif
151 pb_read(pbfp, scratch, 12);
152 retflag = 2;
153 } else if(signature == 0x02014b50){
154 #ifdef DEBUG
155 printf("Central header reached.. we're all done!\n");
156 #endif
157 retflag = 1;
158 }else if(signature != 0x04034b50){
159 printf("Ick! %#x\n", signature);
160 retflag = 1;
161 }
162
163 return retflag;
164 }
165
166 /*
167 Function name: decd_siz
168 args csize Pointer to embedded file's compressed size.
169 usize Pointer to embedded file's uncmpressed size.
170 fnlen Pointer to embedded file's file name length.
171 elfen Pointer to length of extra fields in jar file.
172 flags Pointer to bitmapped flags.
173 method Pointer to indicator of storage method of embedded file.
174 file_header Pointer to string containing the above values to be unbacked.
175 Purpose: Unpack the series of values from file_header.
176 */
177
178 void decd_siz(ub4 *csize, ub4 *usize, ub2 *fnlen, ub2 *eflen, ub2 *flags, ub2 *method, ub1 *file_header) {
179 *csize = UNPACK_UB4(file_header, LOC_CSIZE);
180 #ifdef DEBUG
181 printf("Compressed size is %u\n", *csize);
182 #endif
183
184 *usize = UNPACK_UB4(file_header, LOC_USIZE);
185 #ifdef DEBUG
186 printf("Uncompressed size is %u\n", *usize);
187 #endif
188
189 *fnlen = UNPACK_UB2(file_header, LOC_FNLEN);
190 #ifdef DEBUG
191 printf("Filename length is %hu\n", *fnlen);
192 #endif
193
194 *eflen = UNPACK_UB2(file_header, LOC_EFLEN);
195 #ifdef DEBUG
196 printf("Extra field length is %hu\n", *eflen);
197 #endif
198
199 *flags = UNPACK_UB2(file_header, LOC_EXTRA);
200 #ifdef DEBUG
201 printf("Flags are %#hx\n", *flags);
202 #endif
203
204 *method = UNPACK_UB2(file_header, LOC_COMP);
205 #ifdef DEBUG
206 printf("Compression method is %#hx\n", *method);
207 #endif
208
209 }
210
211 /*
212 Function name: new_filename
213 args: pbf Pointer to push back file handle. Used for reading input file.
214 len Length of file name to be read.
215 purpose: Read in the embedded file name from jar file.
216 returns: Pointer to newly allocated string containing file name.
217 */
218
219 char *new_filename(pb_file *pbf, ub4 len) {
220 char *filename;
221
222 if(!(filename = (char *) malloc(len + 1))) {
223 fprintf(stderr, "Malloc failed of filename\n");
224 fprintf(stderr, "Error: %s\n", strerror(errno));
225 }
226 pb_read(pbf, filename, len);
227 filename[len] = '\0';
228
229 #ifdef DEBUG
230 printf("filename is %s\n", filename);
231 #endif
232
233 return filename;
234 }
235
236 /*
237 Funtion name: read_string
238 args: pbf Pointer to push back file handle. Used for reading input file.
239 size Size of embedded file in bytes.
240 purpose: Create a string containing the contents of the embedded noncompressed file.
241 returns: Pointer to newly allocated string containing embedded file contents.
242 */
243
244 char *read_string(pb_file *pbf, int size) {
245 char *page;
246
247 if(page = (char *) malloc(size + 1)) {
248 pb_read(pbf, page, size);
249 page[size] = '\0';
250 }
251 else {
252 fprintf(stderr, "Malloc of page buffer failed.\n");
253 fprintf(stderr, "Error: %s\n", strerror(errno));
254 exit(1);
255 }
256
257 return page;
258 }
259
260 /*
261 Function name: extract_line
262 args: stream String containing the full contents of a file which is to be substringed
263 in order to provide line representing our grep output.
264 begin Index into stream which regular expression first matches.
265 end Index into stream which end of match to the regular expression.
266 b Pointer to the index of what will be the beginning of the line when
267 string is returned. Used for -b option.
268 purpose: Create a string that can be printed by jargrep from the long string stream.
269 The matching line that is printed out by jargrep is generated by this function.
270 returns: Pointer to newly allocated string containing matched expression.
271 */
272
273 char *extract_line(char *stream, regoff_t begin, regoff_t end, int *b) {
274 int e;
275 int length;
276 char *retstr;
277
278 for(*b = begin; *b >= 0 && !iscntrl(stream[*b]); (*b)--);
279 (*b)++;
280 for(e = end; stream[e] == '\t' || !iscntrl(stream[e]); e++);
281 length = e - *b;
282 if(retstr = (char *) malloc(length + 1)) {
283 sprintf(retstr, "%d:", *b);
284 strncpy(retstr, &(stream[*b]), length);
285 retstr[length] = '\0';
286 }
287 else {
288 fprintf(stderr, "Malloc failed of output string.\n");
289 fprintf(stderr, "Error: %s\n", strerror(errno));
290 exit(1);
291 }
292
293 return retstr;
294 }
295
296 /*
297 Function name: chk_wrd
298 args: exp Pointer to compiled POSIX style regular expression of search target.
299 str String known to contain at least one match of exp.
300 purpose: Verify that the occurance of the regular expression in str occurs as a whole
301 word and not a substring of another word.
302 returns: TRUE if it is a word, FALSE of it is a substring.
303 */
304
305 int chk_wrd(regex_t *exp, char *str) {
306 int wrd_fnd = FALSE;
307 int regflag;
308 int frnt_ok;
309 int bck_ok;
310 char *str2;
311 regmatch_t match;
312
313 str2 = str;
314 frnt_ok = bck_ok = FALSE;
315 while(!wrd_fnd && !(regflag = regexec(exp, str2, 1, &match, 0))) {
316 if(!match.rm_so && (str2 == str)) frnt_ok = TRUE;
317 else if(!isalnum(str2[match.rm_so - 1]) && str2[match.rm_so - 1] != '_')
318 frnt_ok = TRUE;
319 else frnt_ok = FALSE;
320 if(frnt_ok) {
321 if(str2[match.rm_eo] == '\0') bck_ok = TRUE;
322 else if(!isalnum(str2[match.rm_eo]) && str2[match.rm_eo] != '_')
323 bck_ok = TRUE;
324 else bck_ok = FALSE;
325 }
326 wrd_fnd = frnt_ok && bck_ok;
327 str2 = &(str2[match.rm_eo]);
328 }
329
330 return wrd_fnd;
331 }
332
333 /*
334 Function name: prnt_mtchs
335 args: exp Pointer to compiled POSIX style regular expression of search target.
336 filename String containing the name of the embedded file which matches have
337 been found in.
338 stream String containing the processed contents of the embedded jar file
339 represended with filename.
340 pmatch Array of regmatch_t matches into stream.
341 nl_offset Array of offsets of '\n' characters in stream. May be NULL if -n is
342 not set on command line.
343 num Number of matches in pmatch array.
344 lines Number of lines in file. Not set if -n is not set on command line.
345 options Bitwise flag containing flags set to represent the command line
346 options.
347 purpose: Control output of jargrep. Output is controlled by which options have been
348 set at the command line.
349 */
350
351 void prnt_mtchs(regex_t *exp, char *filename, char *stream, regmatch_t *pmatch, regmatch_t *nl_offset, int num, int lines, int options) {
352 int i;
353 int j = 0;
354 int ln_cnt;
355 int begin;
356 int o_begin;
357 char *str;
358
359 o_begin = -1;
360 ln_cnt = 0;
361 for(i = 0; i < num; i++) {
362 str = extract_line(stream, pmatch[i].rm_so, pmatch[i].rm_eo, &begin);
363 if(begin > o_begin) {
364 if(!(options & JG_WORD_EXPRESSIONS) || chk_wrd(exp, str)) {
365 ln_cnt++;
366 if(!(options & JG_PRINT_COUNT)) {
367 printf("%s:", filename);
368 if(options & JG_PRINT_LINE_NUMBER) {
369 for(; j < lines && nl_offset[j].rm_so < begin; j++);
370 printf("%d:", j + 1);
371 }
372 if(options & JG_PRINT_BYTEOFFSET) printf("%d:", begin);
373 printf("%s\n", str);
374 }
375 }
376 }
377 o_begin = begin;
378 free(str);
379 }
380 if(options & JG_PRINT_COUNT) printf("%s:%d\n", filename, ln_cnt);
381 }
382
383 /*
384 Function name: check_crc
385 args: pbf Pointer to pushback file pointer for jar file.
386 stream String containing the non modified contents fo the extraced file entry.
387 usize Size of file in bytes.
388 purpose: Verify the CRC matches that as what is stored in the jar file.
389 */
390
391 void check_crc(pb_file *pbf, char *stream, ub4 usize) {
392 ub4 crc;
393 ub4 lcrc;
394 ub1 scratch[16];
395
396 crc = crc32(crc, NULL, 0);
397 crc = crc32(crc, stream, usize);
398 if(pb_read(pbf, scratch, 16) != 16) {
399 perror("read");
400 exit(1);
401 }
402 if(UNPACK_UB4(scratch, 0) != 0x08074b50) {
403 fprintf(stderr, "Error! Missing data descriptor!\n");
404 exit(1);
405 }
406 lcrc = UNPACK_UB4(scratch, 4);
407 if(crc != lcrc){
408 fprintf(stderr, "Error! CRCs do not match! Got %x, expected %x\n",
409 crc, lcrc);
410 exit(1);
411 }
412 }
413
414 /*
415 Function name mk_ascii
416 args: stream String that contains the contents of the extraced file entry.
417 usize String size.
418 purpose: Make certain that the contents of the file are ASCII, not binary. This
419 permits grepping of binary files as well by converting non ASCII and control characters
420 into '\n'.
421 */
422
423 void mk_ascii(char *stream, int usize) {
424 int i;
425
426 for(i = 0; i < usize; i++)
427 if(stream[i] != '\t' && (iscntrl(stream[i]) || (unsigned char) stream[i] >= 128))
428 stream[i] = '\n';
429 }
430
431 /*
432 Funtion name: fnd_match
433 args: exp Pointer to compiled POSIX style regular expression of search target.
434 str_stream String that contains the contents of the extracted file entry.
435 i Pointer to counter and index of matches.
436 purpose: Search str_stream for occurances of the regular expression exp and create
437 an array of matches.
438 returns: Pointer to newly allocated array of regmatch_t which gives indexes to start
439 and end of matches. NULL is returned upon no matches found.
440 */
441
442 regmatch_t *fnd_match(regex_t *exp, char *str_stream, int *i) {
443 int regflag;
444 regmatch_t match;
445 regmatch_t *match_array;
446 regmatch_t *tmp;
447
448 match_array = NULL;
449 for(*i = 0, regflag = regexec(exp, str_stream, 1, &match, 0); !regflag;
450 regflag = regexec(exp, &(str_stream[match.rm_eo]), 1, &match, 0), (*i)++)
451 {
452 if(tmp = (regmatch_t *)
453 realloc(match_array, sizeof(regmatch_t) * ((*i) + 1)))
454 {
455 match_array = tmp;
456 if(*i) {
457 match.rm_so += match_array[(*i) - 1].rm_eo;
458 match.rm_eo += match_array[(*i) - 1].rm_eo;
459 }
460 match_array[*i] = match;
461 }
462 else {
463 fprintf(stderr, "Realloc of match_array failed.\n");
464 fprintf(stderr, "Error: %s\n", strerror(errno));
465 exit(1);
466 }
467 }
468
469 return match_array;
470 }
471
472 /*
473 Function name: cont_grep
474 args: exp Pointer to compiled POSIX style regular expression of search target.
475 nl_exp Pointer to compiled POSIX style regular expression of newlines. This
476 argument is NULL unless the -n option is used on the command line.
477 fd File descriptor of the jar file being grepped.
478 pbf Pointer to pushback file style file stream. This is for use with
479 the pushback.c file io funtions.
480 options Bitwise flag containing flags set to represent the command line options.
481 purpose: This function handles single entries in an open jar file. The header is
482 read and then the embeded file is extracted and grepped.
483 returns: FALSE upon failure, TRUE otherwise.
484 */
485
486 int cont_grep(regex_t *exp, regex_t *nl_exp, int fd, char *jarfile, pb_file *pbf, int options) {
487 int retflag = TRUE;
488 int i;
489 int j;
490 ub4 csize;
491 ub4 usize;
492 ub2 fnlen;
493 ub2 eflen;
494 ub2 flags;
495 ub2 method;
496 ub1 file_header[30];
497 char *filename;
498 char *str_stream;
499 regmatch_t *match_array;
500 regmatch_t *nl_offsets;
501
502 if(pb_read(pbf, (file_header + 4), 26) != 26) {
503 perror("read");
504 retflag = FALSE;
505 }
506 else {
507 decd_siz(&csize, &usize, &fnlen, &eflen, &flags, &method, file_header);
508 filename = new_filename(pbf, fnlen);
509 lseek(fd, eflen, SEEK_CUR);
510 if(filename[fnlen - 1] != '/') {
511 str_stream = (method == 8 || (flags & 0x0008)) ?
512 (char *) inflate_string(pbf, &csize, &usize) :
513 read_string(pbf, csize);
514 if(flags & 0x008) check_crc(pbf, str_stream, usize);
515 mk_ascii(str_stream, usize);
516 match_array = fnd_match(exp, str_stream, &i);
517 if((options & JG_PRINT_LINE_NUMBER) && i)
518 nl_offsets = fnd_match(nl_exp, str_stream, &j);
519 prnt_mtchs(exp, filename, str_stream, match_array, nl_offsets, i, j, options);
520 if(match_array) free(match_array);
521 free(str_stream);
522 }
523 free(filename);
524 retflag = TRUE;
525 }
526
527 return retflag;
528 }
529
530 /*
531 Funtion name: jargrep
532 args: exp Pointer to compiled POSIX style regular expression of search target.
533 nl_exp Pointer to compiled regular expression for newlines or NULL. Only set
534 if -n option is present at command line.
535 jarfile Filename of jar file to be searched.
536 options Bitwise flag containing flags set to represent the command line options.
537 purpose: Open jar file. Check signatures. When right signature is found go to deeper
538 grep routine.
539 */
540
541 void jargrep(regex_t *exp, regex_t *nl_exp, char *jarfile, int options) {
542 int fd;
543 int floop = TRUE;
544 pb_file pbf;
545 ub1 scratch[16];
546
547 if((fd = open(jarfile, O_RDONLY)) == -1) {
548 if(!(options & JG_SUPRESS_ERROR))
549 fprintf(stderr, "Error reading file '%s': %s\n", jarfile, strerror(errno));
550 }
551 else {
552 pb_init(&pbf, fd);
553
554 do {
555 if(pb_read(&pbf, scratch, 4) != 4) {
556 perror("read");
557 floop = FALSE;
558 }
559 else {
560 switch (check_sig(scratch, &pbf)) {
561 case 0:
562 floop = cont_grep(exp, nl_exp, fd, jarfile, &pbf, options);
563 break;
564 case 1:
565 floop = FALSE;
566 break;
567 case 2:
568 /* fall through continue */
569 ;
570 }
571 }
572 } while(floop);
573 }
574 }
575
576 /*
577 Funtion Name: main
578 args: argc number of in coming args.
579 argv array of strings.
580 purpose: Entry point of the program. Parse command line arguments and set options.
581 Set up regular expressions. Call grep routines for each file as input.
582 returns: 1 on error 0 on success.
583 */
584
585 int main(int argc, char **argv) {
586 int c;
587 int retval = 0;
588 int fileindex;
589 int options = 0;
590 regex_t *regexp;
591 regex_t *nl_exp = NULL;
592 char *regexpstr = NULL;
593
594 while((c = getopt(argc, argv, "bce:insVw")) != -1) {
595 switch(c) {
596 case 'b':
597 options |= JG_PRINT_BYTEOFFSET;
598 break;
599 case 'c':
600 options |= JG_PRINT_COUNT;
601 break;
602 case 'e':
603 if(!(regexpstr = (char *) malloc(strlen(optarg) + 1))) {
604 fprintf(stderr, "Malloc failure.\n");
605 fprintf(stderr, "Error: %s\n", strerror(errno));
606 exit(1);
607 }
608 strcpy(regexpstr, optarg);
609 break;
610 case 'i':
611 options |= JG_IGNORE_CASE;
612 break;
613 case 'n':
614 options |= JG_PRINT_LINE_NUMBER;
615 break;
616 case 's':
617 options |= JG_SUPRESS_ERROR;
618 break;
619 case 'v':
620 options |= JG_INVERT;
621 break;
622 case 'V':
623 printf("%s\n", GVERSION);
624 exit(0);
625 case 'w':
626 options |= JG_WORD_EXPRESSIONS;
627 break;
628 default:
629 fprintf(stderr, "Unknown option -%c\n", c);
630 fprintf(stderr, Usage, argv[0]);
631 exit(1);
632 }
633 }
634 if(!regexpstr){
635 if(((argc - optind) >= 2)) {
636 regexpstr = argv[optind];
637 fileindex = optind + 1;
638 }
639 else {
640 fprintf(stderr, "Invalid arguments.\n");
641 fprintf(stderr, Usage, argv[0]);
642 exit(1);
643 }
644 }
645 else if((argc - optind) == 1) {
646 fileindex = optind;
647 }
648 else {
649 fprintf(stderr, "Invalid arguments.\n");
650 fprintf(stderr, Usage, argv[0]);
651 exit(1);
652 }
653
654 if(opt_valid(options)) {
655 regexp = create_regexp(regexpstr, options);
656 if(options & JG_PRINT_LINE_NUMBER) nl_exp = create_regexp("\n", 0);
657 init_inflation();
658 for(; fileindex < argc; fileindex++)
659 jargrep(regexp, nl_exp, argv[fileindex], options);
660 regfree(regexp);
661 if(options & JG_PRINT_LINE_NUMBER) regfree(nl_exp);
662 }
663 else {
664 retval = 1;
665 fprintf(stderr, "Error: Invalid combination of options.\n");
666 }
667
668 return retval;
669 }