2 jargrep.c - main functions for jargrep utility
3 Copyright (C) 1999 Bryan Burns
4 Copyright (C) 2000 Cory Hollingsworth
6 Parts of this program are base on Bryan Burns work with fastjar
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License
11 as published by the Free Software Foundation; either version 2
12 of the License, or (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24 /* $Id: jargrep.c,v 1.1 2000/12/09 03:08:23 apbianco Exp $
27 Revision 1.1 2000/12/09 03:08:23 apbianco
28 2000-12-08 Alexandre Petit-Bianco <apbianco@cygnus.com>
32 Revision 1.8 2000/09/13 14:02:02 cory
33 Reformatted some of the code to more closly match the layout of the orriginal
36 Revision 1.7 2000/09/12 22:29:36 cory
37 Jargrep now seems to do what I want it to do. Performs properly on Linux x86,
38 will test some other platforms later.
49 #include <sys/types.h>
57 char *Usage
= { "Usage: %s [-bcinsw] <-e regexp | regexp> file(s)\n" };
62 Function name: opt_valid
63 arg: options Bitfield flag that contains the command line options of grepjar.
64 purpose: To guard agains the occurance of certain incompatible flags being used
66 returns: TRUE if options are valid, FALSE otherwise.
69 int opt_valid(int options
) {
72 if((options
& JG_PRINT_COUNT
) &&
73 (options
& (JG_PRINT_BYTEOFFSET
| JG_PRINT_LINE_NUMBER
)))
83 Function name: create_regexp
84 args: regstr String containing the uncompiled regular expression. This may be the
85 expression as is passed in through argv.
86 options This is the flag containing the commandline options that have been
88 purpose: Handle the exception handling involved with setting upt a new regular
90 returns: Newly allocated compile regular expression ready to be used in an regexec call.
93 regex_t
*create_regexp(char *regstr
, int options
) {
100 if(exp
= (regex_t
*) malloc(sizeof(regex_t
)))
102 if(errcode
= regcomp(exp
, regstr
, (options
& JG_IGNORE_CASE
) ? REG_ICASE
: 0)) {
103 fprintf(stderr
, "regcomp of regex failed,\n");
104 if(errmsg
= (char *) malloc(msgsize
= regerror(errcode
, exp
, NULL
, 0) + 1)) {
105 regerror(errcode
, exp
, errmsg
, msgsize
);
106 fprintf(stderr
, "Error: %s\n", errmsg
);
112 fprintf(stderr
, "Malloc of errmsg failed.\n");
113 fprintf(stderr
, "Error: %s\n", strerror(errno
));
120 fprintf(stderr
, "Malloc of regex failed,\n");
121 fprintf(stderr
, "Error: %s\n", strerror(errno
));
129 Function name: check_sig
130 args: scratch Pointer to array of bytes containing signature.
131 pbf Pointer to push back handle for jar file.
132 purpose: Verify that checksum is correct.
133 returns: 0, 1, or 2. 0 means we are ready to read embedded file information. 1 means
134 we have read beyound the embedded file list and can exit knowing we have read all the
135 relevent information. 2 means we still haven't reached embdedded file list and need to
136 do some more reading.
138 int check_sig(ub1
*scratch
, pb_file
*pbfp
) {
142 signature
= UNPACK_UB4(scratch
, 0);
145 printf("signature is %x\n", signature
);
147 if(signature
== 0x08074b50){
149 printf("skipping data descriptor\n");
151 pb_read(pbfp
, scratch
, 12);
153 } else if(signature
== 0x02014b50){
155 printf("Central header reached.. we're all done!\n");
158 }else if(signature
!= 0x04034b50){
159 printf("Ick! %#x\n", signature
);
167 Function name: decd_siz
168 args csize Pointer to embedded file's compressed size.
169 usize Pointer to embedded file's uncmpressed size.
170 fnlen Pointer to embedded file's file name length.
171 elfen Pointer to length of extra fields in jar file.
172 flags Pointer to bitmapped flags.
173 method Pointer to indicator of storage method of embedded file.
174 file_header Pointer to string containing the above values to be unbacked.
175 Purpose: Unpack the series of values from file_header.
178 void decd_siz(ub4
*csize
, ub4
*usize
, ub2
*fnlen
, ub2
*eflen
, ub2
*flags
, ub2
*method
, ub1
*file_header
) {
179 *csize
= UNPACK_UB4(file_header
, LOC_CSIZE
);
181 printf("Compressed size is %u\n", *csize
);
184 *usize
= UNPACK_UB4(file_header
, LOC_USIZE
);
186 printf("Uncompressed size is %u\n", *usize
);
189 *fnlen
= UNPACK_UB2(file_header
, LOC_FNLEN
);
191 printf("Filename length is %hu\n", *fnlen
);
194 *eflen
= UNPACK_UB2(file_header
, LOC_EFLEN
);
196 printf("Extra field length is %hu\n", *eflen
);
199 *flags
= UNPACK_UB2(file_header
, LOC_EXTRA
);
201 printf("Flags are %#hx\n", *flags
);
204 *method
= UNPACK_UB2(file_header
, LOC_COMP
);
206 printf("Compression method is %#hx\n", *method
);
212 Function name: new_filename
213 args: pbf Pointer to push back file handle. Used for reading input file.
214 len Length of file name to be read.
215 purpose: Read in the embedded file name from jar file.
216 returns: Pointer to newly allocated string containing file name.
219 char *new_filename(pb_file
*pbf
, ub4 len
) {
222 if(!(filename
= (char *) malloc(len
+ 1))) {
223 fprintf(stderr
, "Malloc failed of filename\n");
224 fprintf(stderr
, "Error: %s\n", strerror(errno
));
226 pb_read(pbf
, filename
, len
);
227 filename
[len
] = '\0';
230 printf("filename is %s\n", filename
);
237 Funtion name: read_string
238 args: pbf Pointer to push back file handle. Used for reading input file.
239 size Size of embedded file in bytes.
240 purpose: Create a string containing the contents of the embedded noncompressed file.
241 returns: Pointer to newly allocated string containing embedded file contents.
244 char *read_string(pb_file
*pbf
, int size
) {
247 if(page
= (char *) malloc(size
+ 1)) {
248 pb_read(pbf
, page
, size
);
252 fprintf(stderr
, "Malloc of page buffer failed.\n");
253 fprintf(stderr
, "Error: %s\n", strerror(errno
));
261 Function name: extract_line
262 args: stream String containing the full contents of a file which is to be substringed
263 in order to provide line representing our grep output.
264 begin Index into stream which regular expression first matches.
265 end Index into stream which end of match to the regular expression.
266 b Pointer to the index of what will be the beginning of the line when
267 string is returned. Used for -b option.
268 purpose: Create a string that can be printed by jargrep from the long string stream.
269 The matching line that is printed out by jargrep is generated by this function.
270 returns: Pointer to newly allocated string containing matched expression.
273 char *extract_line(char *stream
, regoff_t begin
, regoff_t end
, int *b
) {
278 for(*b
= begin
; *b
>= 0 && !iscntrl(stream
[*b
]); (*b
)--);
280 for(e
= end
; stream
[e
] == '\t' || !iscntrl(stream
[e
]); e
++);
282 if(retstr
= (char *) malloc(length
+ 1)) {
283 sprintf(retstr
, "%d:", *b
);
284 strncpy(retstr
, &(stream
[*b
]), length
);
285 retstr
[length
] = '\0';
288 fprintf(stderr
, "Malloc failed of output string.\n");
289 fprintf(stderr
, "Error: %s\n", strerror(errno
));
297 Function name: chk_wrd
298 args: exp Pointer to compiled POSIX style regular expression of search target.
299 str String known to contain at least one match of exp.
300 purpose: Verify that the occurance of the regular expression in str occurs as a whole
301 word and not a substring of another word.
302 returns: TRUE if it is a word, FALSE of it is a substring.
305 int chk_wrd(regex_t
*exp
, char *str
) {
314 frnt_ok
= bck_ok
= FALSE
;
315 while(!wrd_fnd
&& !(regflag
= regexec(exp
, str2
, 1, &match
, 0))) {
316 if(!match
.rm_so
&& (str2
== str
)) frnt_ok
= TRUE
;
317 else if(!isalnum(str2
[match
.rm_so
- 1]) && str2
[match
.rm_so
- 1] != '_')
319 else frnt_ok
= FALSE
;
321 if(str2
[match
.rm_eo
] == '\0') bck_ok
= TRUE
;
322 else if(!isalnum(str2
[match
.rm_eo
]) && str2
[match
.rm_eo
] != '_')
326 wrd_fnd
= frnt_ok
&& bck_ok
;
327 str2
= &(str2
[match
.rm_eo
]);
334 Function name: prnt_mtchs
335 args: exp Pointer to compiled POSIX style regular expression of search target.
336 filename String containing the name of the embedded file which matches have
338 stream String containing the processed contents of the embedded jar file
339 represended with filename.
340 pmatch Array of regmatch_t matches into stream.
341 nl_offset Array of offsets of '\n' characters in stream. May be NULL if -n is
342 not set on command line.
343 num Number of matches in pmatch array.
344 lines Number of lines in file. Not set if -n is not set on command line.
345 options Bitwise flag containing flags set to represent the command line
347 purpose: Control output of jargrep. Output is controlled by which options have been
348 set at the command line.
351 void prnt_mtchs(regex_t
*exp
, char *filename
, char *stream
, regmatch_t
*pmatch
, regmatch_t
*nl_offset
, int num
, int lines
, int options
) {
361 for(i
= 0; i
< num
; i
++) {
362 str
= extract_line(stream
, pmatch
[i
].rm_so
, pmatch
[i
].rm_eo
, &begin
);
363 if(begin
> o_begin
) {
364 if(!(options
& JG_WORD_EXPRESSIONS
) || chk_wrd(exp
, str
)) {
366 if(!(options
& JG_PRINT_COUNT
)) {
367 printf("%s:", filename
);
368 if(options
& JG_PRINT_LINE_NUMBER
) {
369 for(; j
< lines
&& nl_offset
[j
].rm_so
< begin
; j
++);
370 printf("%d:", j
+ 1);
372 if(options
& JG_PRINT_BYTEOFFSET
) printf("%d:", begin
);
380 if(options
& JG_PRINT_COUNT
) printf("%s:%d\n", filename
, ln_cnt
);
384 Function name: check_crc
385 args: pbf Pointer to pushback file pointer for jar file.
386 stream String containing the non modified contents fo the extraced file entry.
387 usize Size of file in bytes.
388 purpose: Verify the CRC matches that as what is stored in the jar file.
391 void check_crc(pb_file
*pbf
, char *stream
, ub4 usize
) {
396 crc
= crc32(crc
, NULL
, 0);
397 crc
= crc32(crc
, stream
, usize
);
398 if(pb_read(pbf
, scratch
, 16) != 16) {
402 if(UNPACK_UB4(scratch
, 0) != 0x08074b50) {
403 fprintf(stderr
, "Error! Missing data descriptor!\n");
406 lcrc
= UNPACK_UB4(scratch
, 4);
408 fprintf(stderr
, "Error! CRCs do not match! Got %x, expected %x\n",
415 Function name mk_ascii
416 args: stream String that contains the contents of the extraced file entry.
418 purpose: Make certain that the contents of the file are ASCII, not binary. This
419 permits grepping of binary files as well by converting non ASCII and control characters
423 void mk_ascii(char *stream
, int usize
) {
426 for(i
= 0; i
< usize
; i
++)
427 if(stream
[i
] != '\t' && (iscntrl(stream
[i
]) || (unsigned char) stream
[i
] >= 128))
432 Funtion name: fnd_match
433 args: exp Pointer to compiled POSIX style regular expression of search target.
434 str_stream String that contains the contents of the extracted file entry.
435 i Pointer to counter and index of matches.
436 purpose: Search str_stream for occurances of the regular expression exp and create
438 returns: Pointer to newly allocated array of regmatch_t which gives indexes to start
439 and end of matches. NULL is returned upon no matches found.
442 regmatch_t
*fnd_match(regex_t
*exp
, char *str_stream
, int *i
) {
445 regmatch_t
*match_array
;
449 for(*i
= 0, regflag
= regexec(exp
, str_stream
, 1, &match
, 0); !regflag
;
450 regflag
= regexec(exp
, &(str_stream
[match
.rm_eo
]), 1, &match
, 0), (*i
)++)
452 if(tmp
= (regmatch_t
*)
453 realloc(match_array
, sizeof(regmatch_t
) * ((*i
) + 1)))
457 match
.rm_so
+= match_array
[(*i
) - 1].rm_eo
;
458 match
.rm_eo
+= match_array
[(*i
) - 1].rm_eo
;
460 match_array
[*i
] = match
;
463 fprintf(stderr
, "Realloc of match_array failed.\n");
464 fprintf(stderr
, "Error: %s\n", strerror(errno
));
473 Function name: cont_grep
474 args: exp Pointer to compiled POSIX style regular expression of search target.
475 nl_exp Pointer to compiled POSIX style regular expression of newlines. This
476 argument is NULL unless the -n option is used on the command line.
477 fd File descriptor of the jar file being grepped.
478 pbf Pointer to pushback file style file stream. This is for use with
479 the pushback.c file io funtions.
480 options Bitwise flag containing flags set to represent the command line options.
481 purpose: This function handles single entries in an open jar file. The header is
482 read and then the embeded file is extracted and grepped.
483 returns: FALSE upon failure, TRUE otherwise.
486 int cont_grep(regex_t
*exp
, regex_t
*nl_exp
, int fd
, char *jarfile
, pb_file
*pbf
, int options
) {
499 regmatch_t
*match_array
;
500 regmatch_t
*nl_offsets
;
502 if(pb_read(pbf
, (file_header
+ 4), 26) != 26) {
507 decd_siz(&csize
, &usize
, &fnlen
, &eflen
, &flags
, &method
, file_header
);
508 filename
= new_filename(pbf
, fnlen
);
509 lseek(fd
, eflen
, SEEK_CUR
);
510 if(filename
[fnlen
- 1] != '/') {
511 str_stream
= (method
== 8 || (flags
& 0x0008)) ?
512 (char *) inflate_string(pbf
, &csize
, &usize
) :
513 read_string(pbf
, csize
);
514 if(flags
& 0x008) check_crc(pbf
, str_stream
, usize
);
515 mk_ascii(str_stream
, usize
);
516 match_array
= fnd_match(exp
, str_stream
, &i
);
517 if((options
& JG_PRINT_LINE_NUMBER
) && i
)
518 nl_offsets
= fnd_match(nl_exp
, str_stream
, &j
);
519 prnt_mtchs(exp
, filename
, str_stream
, match_array
, nl_offsets
, i
, j
, options
);
520 if(match_array
) free(match_array
);
531 Funtion name: jargrep
532 args: exp Pointer to compiled POSIX style regular expression of search target.
533 nl_exp Pointer to compiled regular expression for newlines or NULL. Only set
534 if -n option is present at command line.
535 jarfile Filename of jar file to be searched.
536 options Bitwise flag containing flags set to represent the command line options.
537 purpose: Open jar file. Check signatures. When right signature is found go to deeper
541 void jargrep(regex_t
*exp
, regex_t
*nl_exp
, char *jarfile
, int options
) {
547 if((fd
= open(jarfile
, O_RDONLY
)) == -1) {
548 if(!(options
& JG_SUPRESS_ERROR
))
549 fprintf(stderr
, "Error reading file '%s': %s\n", jarfile
, strerror(errno
));
555 if(pb_read(&pbf
, scratch
, 4) != 4) {
560 switch (check_sig(scratch
, &pbf
)) {
562 floop
= cont_grep(exp
, nl_exp
, fd
, jarfile
, &pbf
, options
);
568 /* fall through continue */
578 args: argc number of in coming args.
579 argv array of strings.
580 purpose: Entry point of the program. Parse command line arguments and set options.
581 Set up regular expressions. Call grep routines for each file as input.
582 returns: 1 on error 0 on success.
585 int main(int argc
, char **argv
) {
591 regex_t
*nl_exp
= NULL
;
592 char *regexpstr
= NULL
;
594 while((c
= getopt(argc
, argv
, "bce:insVw")) != -1) {
597 options
|= JG_PRINT_BYTEOFFSET
;
600 options
|= JG_PRINT_COUNT
;
603 if(!(regexpstr
= (char *) malloc(strlen(optarg
) + 1))) {
604 fprintf(stderr
, "Malloc failure.\n");
605 fprintf(stderr
, "Error: %s\n", strerror(errno
));
608 strcpy(regexpstr
, optarg
);
611 options
|= JG_IGNORE_CASE
;
614 options
|= JG_PRINT_LINE_NUMBER
;
617 options
|= JG_SUPRESS_ERROR
;
620 options
|= JG_INVERT
;
623 printf("%s\n", GVERSION
);
626 options
|= JG_WORD_EXPRESSIONS
;
629 fprintf(stderr
, "Unknown option -%c\n", c
);
630 fprintf(stderr
, Usage
, argv
[0]);
635 if(((argc
- optind
) >= 2)) {
636 regexpstr
= argv
[optind
];
637 fileindex
= optind
+ 1;
640 fprintf(stderr
, "Invalid arguments.\n");
641 fprintf(stderr
, Usage
, argv
[0]);
645 else if((argc
- optind
) == 1) {
649 fprintf(stderr
, "Invalid arguments.\n");
650 fprintf(stderr
, Usage
, argv
[0]);
654 if(opt_valid(options
)) {
655 regexp
= create_regexp(regexpstr
, options
);
656 if(options
& JG_PRINT_LINE_NUMBER
) nl_exp
= create_regexp("\n", 0);
658 for(; fileindex
< argc
; fileindex
++)
659 jargrep(regexp
, nl_exp
, argv
[fileindex
], options
);
661 if(options
& JG_PRINT_LINE_NUMBER
) regfree(nl_exp
);
665 fprintf(stderr
, "Error: Invalid combination of options.\n");