1 | #!/usr/bin/perl |
---|
2 | # ======================================================================== # |
---|
3 | # # |
---|
4 | # File : grepx.pl # |
---|
5 | # Purpose : Replacement for grep (used from emacs) # |
---|
6 | # # |
---|
7 | # (C) November 2005 by Ralf Westram # |
---|
8 | # # |
---|
9 | # Permission to use, copy, modify, distribute and sell this software # |
---|
10 | # and its documentation for any purpose is hereby granted without fee, # |
---|
11 | # provided that the above copyright notice appear in all copies and # |
---|
12 | # that both that copyright notice and this permission notice appear # |
---|
13 | # in supporting documentation. # |
---|
14 | # # |
---|
15 | # Ralf Westram makes no representations about the suitability of this # |
---|
16 | # software for any purpose. It is provided "as is" without express or # |
---|
17 | # implied warranty. # |
---|
18 | # # |
---|
19 | # ======================================================================== # |
---|
20 | # |
---|
21 | # Improvements compared with grep: |
---|
22 | # |
---|
23 | # * prints line column information |
---|
24 | # * knows about groups of files belonging together (e.g. *.cxx *.hxx) |
---|
25 | # * knows about special file locations (e.g. emacs lisp code, /usr/include, ...) |
---|
26 | # * able to search complete CVS/SVN trees |
---|
27 | # * some ARB specific specials |
---|
28 | # |
---|
29 | # -------------------------------------------------------------------------------- |
---|
30 | |
---|
31 | use strict; |
---|
32 | use warnings; |
---|
33 | use Cwd; |
---|
34 | |
---|
35 | # -------------------------------------------------------------------------------- |
---|
36 | |
---|
37 | my $tabsize = 4; # specify your emacs tabsize here (used to correct column position) |
---|
38 | |
---|
39 | # -------------------------------------------------------------------------------- |
---|
40 | # group definitions (you may want to change here): |
---|
41 | # |
---|
42 | # Each element in groups defines a cluster of files. |
---|
43 | # One cluster consists of: |
---|
44 | # |
---|
45 | # [0] = ref to array of header extensions |
---|
46 | # [1] = ref to array of normal extensions |
---|
47 | # [2] = ref to array of add. directories to search for |
---|
48 | # [3] = ref to array of add. extensions to search |
---|
49 | # |
---|
50 | # If extension given is member of [0] (or [1] if not -h given) of a cluster, |
---|
51 | # then the cluster gets activated (we call this an AC). Extensions in [3] do |
---|
52 | # NOT activate clusters! |
---|
53 | # |
---|
54 | # If -h is given, only extensions from [0] of all ACs are searched |
---|
55 | # otherwise those from [1] and [3] are added. ([3] is todo!) |
---|
56 | # |
---|
57 | # If -g is given the add. directories from [2] of all ACs are searched as well. |
---|
58 | |
---|
59 | |
---|
60 | my @groups = ( |
---|
61 | # C/C++ |
---|
62 | [ |
---|
63 | [ '.hxx', '.hpp', '.hh', '.h' ], # header files |
---|
64 | [ '.cxx', '.cpp', '.cc', '.c' ], # code files |
---|
65 | [ |
---|
66 | '/usr/include', |
---|
67 | '/usr/include/X11', |
---|
68 | '/usr/include/g++', |
---|
69 | '/usr/include/sys', |
---|
70 | ], # additional header directories (used with -g) |
---|
71 | [ '.aisc', '.pa' ], |
---|
72 | ], |
---|
73 | # ARB code generation |
---|
74 | [ |
---|
75 | [ ], |
---|
76 | [ '.aisc', '.pa' ], |
---|
77 | [ ], |
---|
78 | [ '.cxx', '.cpp', '.cc', '.c', '.hxx', '.hpp', '.hh', '.h' ], |
---|
79 | ], |
---|
80 | # perl |
---|
81 | [ |
---|
82 | [ '.pm' ], # header files |
---|
83 | [ '.pl', '.cgi' ], # code files |
---|
84 | [ '/usr/lib/perl5' ], # additional header directories (used with -g) |
---|
85 | ], |
---|
86 | # java |
---|
87 | [ |
---|
88 | [ ], # java sucks headers |
---|
89 | [ '.java' ], |
---|
90 | ], |
---|
91 | # xml development |
---|
92 | [ |
---|
93 | [ '.dtd' ], |
---|
94 | [ '.xml', '.xsl' ], |
---|
95 | ], |
---|
96 | # lisp |
---|
97 | [ |
---|
98 | [ ], |
---|
99 | [ '.el' ], |
---|
100 | [ |
---|
101 | '/usr/share/emacs/site-lisp', |
---|
102 | '/usr/share/xemacs', |
---|
103 | ], |
---|
104 | ], |
---|
105 | # shell-scripts etc. |
---|
106 | [ |
---|
107 | [ ], |
---|
108 | [ '.sh', '.cmd', '.bat' ], |
---|
109 | ], |
---|
110 | # text files |
---|
111 | [ |
---|
112 | [ ], |
---|
113 | [ '.txt', '.readme' ], |
---|
114 | ], |
---|
115 | # html etc |
---|
116 | [ |
---|
117 | [ ], |
---|
118 | [ '.html', '.htm' ], |
---|
119 | ], |
---|
120 | # hamster scripts |
---|
121 | [ |
---|
122 | [ '.hsm' ], |
---|
123 | [ '.hsc' ], |
---|
124 | [ ], # no add. directories |
---|
125 | [ '.ini' ], # search add. but don't add cluster if included |
---|
126 | ], |
---|
127 | # Euphoria |
---|
128 | [ |
---|
129 | [ '.e' ], |
---|
130 | [ '.exw' ], |
---|
131 | ], |
---|
132 | # ARB specifics |
---|
133 | [ # anything where aci/srt commands occur |
---|
134 | [ ], |
---|
135 | [ '.menu', '.source', '.hlp', '.eft', '.ift', '.mask', '.sellst' ], |
---|
136 | [ ], |
---|
137 | [ '.c', '.cxx' ], |
---|
138 | ], |
---|
139 | ); |
---|
140 | |
---|
141 | # files always searched (not by 'same' and 'header' search) |
---|
142 | my @normally_searches = ( 'makefile' ); |
---|
143 | |
---|
144 | # files always searched by global search |
---|
145 | my @global_always_searches = ( ); |
---|
146 | |
---|
147 | # -------------------------------------------------------------------------------- |
---|
148 | |
---|
149 | my $global = 0; |
---|
150 | my $headers_only = 0; |
---|
151 | my $same_ext_only = 0; |
---|
152 | my $ignore_case = 0; |
---|
153 | my $recurse_subdirs = 0; |
---|
154 | my $one_hit_per_line = 0; |
---|
155 | my $verbose = 0; |
---|
156 | my $matchFiles = 1; |
---|
157 | my $arbSpecials = 0; |
---|
158 | my $maxhits = undef; # undef means unlimited |
---|
159 | my $searchNonCVS = 0; |
---|
160 | |
---|
161 | my $extension = undef; |
---|
162 | my $use_as_wildcard = 0; |
---|
163 | my $regexpr = undef; |
---|
164 | |
---|
165 | my $calldir = cwd(); |
---|
166 | my $startdir = undef; |
---|
167 | |
---|
168 | # -------------------------------------------------------------------------------- |
---|
169 | |
---|
170 | my $GSM_NONE = 0; |
---|
171 | my $GSM_CVS = 1; # scan a CVS/SVN tree |
---|
172 | my $GSM_PARENT = 2; # do a simple parent scan |
---|
173 | |
---|
174 | my $global_scan_mode = $GSM_NONE; |
---|
175 | |
---|
176 | # -------------------------------------------------------------------------------- |
---|
177 | |
---|
178 | sub shall_skip_file($) { |
---|
179 | my ($file) = @_; |
---|
180 | die "arbSpecials not 1" if ($arbSpecials!=1); |
---|
181 | if ($file =~ /PERL2ARB\//o) { |
---|
182 | my $rest = $'; |
---|
183 | if ($rest eq 'ARB.c' or $rest eq 'proto.h') { return 1; } |
---|
184 | } |
---|
185 | elsif ($file =~ /lib\/help\//o) { |
---|
186 | return 1; |
---|
187 | } |
---|
188 | return 0; |
---|
189 | } |
---|
190 | |
---|
191 | # -------------------------------------------------------------------------------- |
---|
192 | |
---|
193 | my @ignores = (); # directory local excludes (reg.expressions) |
---|
194 | my $ignoreCount = 0; # overall ignore count |
---|
195 | |
---|
196 | sub forget_grepxignore() { @ignores = (); } |
---|
197 | |
---|
198 | sub load_grepxignore($) { |
---|
199 | my ($grepxignore) = @_; |
---|
200 | |
---|
201 | @ignores = (); |
---|
202 | open(IGNORE,'<'.$grepxignore) || die "can't open '$grepxignore' (Reason: $!)"; |
---|
203 | foreach (<IGNORE>) { |
---|
204 | chomp; |
---|
205 | push @ignores, qr/^$_$/; |
---|
206 | } |
---|
207 | close(IGNORE); |
---|
208 | } |
---|
209 | |
---|
210 | sub is_ignored($) { |
---|
211 | my ($name) = @_; |
---|
212 | foreach (@ignores) { |
---|
213 | if ($name =~ $_) { |
---|
214 | $verbose==0 || print "Ignoring '$name' (by $_)\n"; |
---|
215 | $ignoreCount++; |
---|
216 | return 1; |
---|
217 | } |
---|
218 | } |
---|
219 | return 0; |
---|
220 | } |
---|
221 | |
---|
222 | # -------------------------------------------------------------------------------- |
---|
223 | |
---|
224 | my $reg_nameOnly = qr/\/([^\/]+)$/; |
---|
225 | my $reg_extension = qr/(\.[^\.]+)$/; |
---|
226 | # (\.[^\.]+) |
---|
227 | |
---|
228 | my ($IS_HEADER,$IS_NORMAL,$IS_OTHER,$IS_ADDITIONAL) = (4,3,2,1); |
---|
229 | |
---|
230 | my %wanted_extensions = (); |
---|
231 | my %wanted_files = (); # files that are always searched |
---|
232 | |
---|
233 | my @add_header_dirs = (); |
---|
234 | |
---|
235 | my $reg_is_cpp_std_dir = qr/^\/usr\/include\/g\+\+(\/|$)/; |
---|
236 | |
---|
237 | sub shall_search_file($$) { |
---|
238 | my ($file,$indir) = @_; |
---|
239 | |
---|
240 | if ($use_as_wildcard==0) { |
---|
241 | if ($file =~ $reg_nameOnly) { $file = $1; } # behind last / |
---|
242 | |
---|
243 | if ($file =~ /^\.?\#/ or $file =~ /~$/) { return 0; } # skip backup files etc. |
---|
244 | |
---|
245 | my $ext = ''; |
---|
246 | if ($file =~ $reg_extension) { $ext = $1; } |
---|
247 | |
---|
248 | if ($ext eq '') { |
---|
249 | if ($indir =~ $reg_is_cpp_std_dir) { |
---|
250 | # print "hack: considering $file in $indir\n"; |
---|
251 | $ext = '.h'; # special hack for new style C++ header (they suck an extension) |
---|
252 | } |
---|
253 | else { |
---|
254 | if (not $haveFile) { return 0; } |
---|
255 | my $full = $indir.'/'.$file; |
---|
256 | my $type = `file $full`; # detect filetype |
---|
257 | chomp $type; |
---|
258 | if ($type =~ /^[^:]+: (.*)/o) { |
---|
259 | $type = $1; |
---|
260 | if ($type =~ /shell.*script/o) { $ext = '.sh'; } |
---|
261 | elsif ($type =~ /perl.*script/o) { $ext = '.pl'; } |
---|
262 | elsif ($type =~ /ASCII.*text/o) { $ext = '.txt'; } |
---|
263 | elsif ($type =~ /ISO.*text/o) { $ext = '.txt'; } |
---|
264 | elsif ($type =~ /executable/o) { ; } |
---|
265 | elsif ($type =~ /symbolic.link.to/o) { ; } |
---|
266 | else { |
---|
267 | print "Unhandled file='$full'\n type='$type'\n"; |
---|
268 | } |
---|
269 | } |
---|
270 | } |
---|
271 | } |
---|
272 | |
---|
273 | $ext = lc($ext); |
---|
274 | if (exists $wanted_extensions{$ext}) { return NotIgnored($file,$wanted_extensions{$ext}); } |
---|
275 | |
---|
276 | $file = lc($file); |
---|
277 | if (exists $wanted_files{$file}) { return NotIgnored($file,$IS_OTHER); } |
---|
278 | } |
---|
279 | else { |
---|
280 | if ($file =~ /$extension/ig) { |
---|
281 | return NotIgnored($file,$IS_NORMAL); |
---|
282 | } |
---|
283 | } |
---|
284 | |
---|
285 | return 0; |
---|
286 | } |
---|
287 | |
---|
288 | sub memberOf($\@) { |
---|
289 | my ($ext, $extArray_r) = @_; |
---|
290 | foreach (@$extArray_r) { |
---|
291 | if ($ext eq $_) { return 1; } |
---|
292 | } |
---|
293 | return undef; |
---|
294 | } |
---|
295 | |
---|
296 | sub add_files(\@$) { |
---|
297 | my ($ext_array_r,$value) = @_; |
---|
298 | foreach (@$ext_array_r) { $wanted_extensions{$_} = $value; } |
---|
299 | } |
---|
300 | |
---|
301 | sub init_wanted() { |
---|
302 | %wanted_extensions = (); |
---|
303 | %wanted_files = (); |
---|
304 | |
---|
305 | if ($same_ext_only==0 and $headers_only==0) { |
---|
306 | foreach (@normally_searches) { $wanted_files{$_} = 1; } |
---|
307 | } |
---|
308 | if ($global==1) { |
---|
309 | foreach (@global_always_searches) { $wanted_files{$_} = 1; } |
---|
310 | } |
---|
311 | |
---|
312 | if ($same_ext_only) { |
---|
313 | $wanted_extensions{$extension} = $IS_NORMAL; |
---|
314 | } |
---|
315 | elsif ($extension eq '') { |
---|
316 | foreach my $group_r (@groups) { |
---|
317 | my $header_r = $$group_r[0]; |
---|
318 | my $nheader_r = $$group_r[1]; |
---|
319 | |
---|
320 | add_files(@$header_r, $IS_HEADER); |
---|
321 | if ($headers_only==0) { add_files(@$nheader_r, $IS_NORMAL); } |
---|
322 | } |
---|
323 | my $which = ''; |
---|
324 | if ($headers_only==1) { $which = 'header-'; } |
---|
325 | print "grepx: No extension given - searching all known ".$which."extensions.\n"; |
---|
326 | } |
---|
327 | else { |
---|
328 | my $found_class = 0; |
---|
329 | my $group_count = 0; |
---|
330 | eval { |
---|
331 | foreach my $group_r (@groups) { |
---|
332 | my $group_defs = scalar(@$group_r); |
---|
333 | if ($group_defs<2) { die "Not enough entries (min. 2 are expected)"; } |
---|
334 | |
---|
335 | my $header_r = $$group_r[0]; |
---|
336 | my $nheader_r = $$group_r[1]; |
---|
337 | |
---|
338 | if (memberOf($extension, @$header_r) or memberOf($extension, @$nheader_r)) { # is group active? |
---|
339 | $verbose==0 || print "'$extension' found in [@$header_r] or [@$nheader_r] - adding tables\n"; |
---|
340 | $found_class = 1; |
---|
341 | |
---|
342 | add_files(@$header_r, $IS_HEADER); |
---|
343 | if ($headers_only==0) { add_files(@$nheader_r, $IS_NORMAL); } |
---|
344 | |
---|
345 | # 3rd entry is array of directories for -h -g |
---|
346 | if ($group_defs>=3) { |
---|
347 | my $add_dir_r = $$group_r[2]; |
---|
348 | foreach my $adir (@$add_dir_r) { |
---|
349 | if (-d $adir) { push @add_header_dirs, $adir; } |
---|
350 | else { print "grepx: No such directory '$adir'\n"; } |
---|
351 | } |
---|
352 | |
---|
353 | if ($group_defs>=4) { |
---|
354 | my $add_extensions_r = $$group_r[3]; |
---|
355 | if ($verbose>0) { |
---|
356 | print "Adding add. extensions:"; |
---|
357 | foreach (@$add_extensions_r) { print " $_"; } |
---|
358 | print "\n"; |
---|
359 | } |
---|
360 | add_files(@$add_extensions_r, $IS_ADDITIONAL); |
---|
361 | } |
---|
362 | } |
---|
363 | } |
---|
364 | $group_count++; |
---|
365 | } |
---|
366 | }; |
---|
367 | if ($@) { die "Error parsing \@groups[$group_count]: $@"; } |
---|
368 | |
---|
369 | if ($found_class==0) { |
---|
370 | print "grepx: No class defined for '$extension' .. searching only '$extension' files\n"; |
---|
371 | $wanted_extensions{$extension} = $IS_NORMAL; |
---|
372 | } |
---|
373 | } |
---|
374 | |
---|
375 | if ($verbose==1) { |
---|
376 | print "grepx: Searching"; |
---|
377 | foreach (keys %wanted_extensions) { print " *$_"; } |
---|
378 | foreach (keys %wanted_files) { print " $_"; } |
---|
379 | print "\n"; |
---|
380 | } |
---|
381 | |
---|
382 | } |
---|
383 | |
---|
384 | # -------------------------------------------------------------------------------- |
---|
385 | |
---|
386 | sub print_usage() { |
---|
387 | print "Usage: grepx 'ext' 'regexpr'\n". |
---|
388 | "Options:\n". |
---|
389 | " -g -> search globally (smart detect what global means)\n". |
---|
390 | " -h -> search in header files only (depends on 'ext')\n". |
---|
391 | " -s -> search in same fileextension only (default is to search file group)\n". |
---|
392 | " -i -> ignore case\n". |
---|
393 | " -r -> recurse subdirs\n". |
---|
394 | " -o -> one hit per line (default is to report multiple hits)\n". |
---|
395 | " -v -> be verbose (for debugging)\n". |
---|
396 | " -n -> don't match filenames\n". |
---|
397 | " -A -> do ARB specials if \$ARBHOME is defined\n". |
---|
398 | " -m xxx -> report max. xxx hits\n". |
---|
399 | " -c -> search in non-CVS/SVN files as well (default is to search CVS/SVN controlled files only)". |
---|
400 | "\n". |
---|
401 | " 'ext' extension of file where grepx is called from\n". |
---|
402 | " 'regexpr' perl regular expression\n\n"; |
---|
403 | } |
---|
404 | |
---|
405 | # -------------------------------------------------------------------------------- |
---|
406 | |
---|
407 | sub parse_args() { |
---|
408 | my $args = scalar(@ARGV); |
---|
409 | my @non_opt_args = (); |
---|
410 | my $ap = 0; |
---|
411 | |
---|
412 | while ($ap<$args) { |
---|
413 | if ($ARGV[$ap] =~ /^-/) { |
---|
414 | my $option = $'; |
---|
415 | if ($option eq 'g') { $global = 1; } |
---|
416 | elsif ($option eq 'h') { $headers_only = 1; } |
---|
417 | elsif ($option eq 's') { $same_ext_only = 1; } |
---|
418 | elsif ($option eq 'i') { $ignore_case = 1; } |
---|
419 | elsif ($option eq 'r') { $recurse_subdirs = 1; } |
---|
420 | elsif ($option eq 'o') { $one_hit_per_line = 1; } |
---|
421 | elsif ($option eq 'v') { $verbose = 1; } |
---|
422 | elsif ($option eq 'n') { $matchFiles = 0; } |
---|
423 | elsif ($option eq 'A') { |
---|
424 | if (exists $ENV{'ARBHOME'}) { $arbSpecials = 1; } |
---|
425 | else { print "grepx: Ignoring -A (ARBHOME not set)"; } |
---|
426 | } |
---|
427 | elsif ($option eq 'm') { $maxhits = int($ARGV[++$ap]); } |
---|
428 | elsif ($option eq 'c') { $searchNonCVS = 1; } |
---|
429 | else { die "Unknown option '-$option'\n"; } |
---|
430 | } |
---|
431 | else { |
---|
432 | if ($ARGV[$ap] ne '/dev/null') { |
---|
433 | push @non_opt_args, $ARGV[$ap]; |
---|
434 | } |
---|
435 | } |
---|
436 | $ap++; |
---|
437 | } |
---|
438 | |
---|
439 | my $restargs = scalar(@non_opt_args); |
---|
440 | # print "\@non_opt_args=@non_opt_args\n"; |
---|
441 | if ($restargs!=2) { die "Expected exactly two normal arguments (non-switches), found $restargs\n"; } |
---|
442 | |
---|
443 | $extension = $non_opt_args[0]; |
---|
444 | $regexpr = $non_opt_args[1]; |
---|
445 | $verbose==0 || print "grepx: Using regular expression '$regexpr'\n"; |
---|
446 | |
---|
447 | if ($ignore_case==1) { $regexpr = qr/$regexpr/i; } |
---|
448 | else { $regexpr = qr/$regexpr/; } |
---|
449 | |
---|
450 | if ($headers_only==1 and $same_ext_only==1) { die "Options -s and -h may not be used together\n"; } |
---|
451 | } |
---|
452 | |
---|
453 | # -------------------------------------------------------------------------------- |
---|
454 | |
---|
455 | sub pos_correction($$) { |
---|
456 | my ($line,$pos) = @_; |
---|
457 | my $prematch = substr($line,0,$pos); |
---|
458 | $prematch =~ s/[^\t]//go; |
---|
459 | return length($prematch)*($tabsize-1); |
---|
460 | } |
---|
461 | |
---|
462 | my $lines_examined = 0; |
---|
463 | my $reg_startdir = undef; |
---|
464 | |
---|
465 | sub grepfile($$\$) { |
---|
466 | my ($file,$entering,$entering_shown_r) = @_; |
---|
467 | |
---|
468 | my $matches = 0; |
---|
469 | my $reported = 0; |
---|
470 | my $show = $file; |
---|
471 | |
---|
472 | if ($file =~ $reg_startdir) { $show = $'; } |
---|
473 | |
---|
474 | open(FILE,"<$file") || die "can't read file '$file' (Reason: $!)"; |
---|
475 | while (my $line = <FILE>) { |
---|
476 | if ($line =~ $regexpr) { |
---|
477 | if ((not defined $maxhits) or ($maxhits>0)) { |
---|
478 | my $rest = $'; |
---|
479 | my $hitlen = $+[0] - $-[0]; |
---|
480 | my $pos; |
---|
481 | |
---|
482 | $hitlen>0 || die "Non-positive hitlen (=$hitlen) [1]"; |
---|
483 | |
---|
484 | if ($#+ > 0) { # regexpr has subgroups -> point to start of first subgroup |
---|
485 | $pos = $-[$#+] + 1; # start of first subgroup |
---|
486 | } |
---|
487 | else { |
---|
488 | $pos = $-[0] + 1; # start of regexpr |
---|
489 | } |
---|
490 | |
---|
491 | if ($matches==0 and $arbSpecials==1) { |
---|
492 | if (shall_skip_file($file)==1) { |
---|
493 | print "grepx: Unlisted occurrence(s) in $file\n"; |
---|
494 | return (0,0); |
---|
495 | } |
---|
496 | } |
---|
497 | |
---|
498 | my $correct = pos_correction($line,$pos); |
---|
499 | $line =~ s/\r//o; |
---|
500 | $line =~ s/\n//o; |
---|
501 | chomp($line); |
---|
502 | $pos += $correct; |
---|
503 | $line =~ s/^([\s\t]+)//o; |
---|
504 | my $hits = 1; |
---|
505 | |
---|
506 | if ($one_hit_per_line==0) { |
---|
507 | if ($$entering_shown_r==0) { $$entering_shown_r=1; print $entering; } |
---|
508 | print "$show:$.:$pos: $line\n"; |
---|
509 | $rest =~ s/\r//o; |
---|
510 | $rest =~ s/\n//o; |
---|
511 | chomp($rest); |
---|
512 | |
---|
513 | while ($rest =~ $regexpr) { |
---|
514 | my $start_pos = $pos+$hitlen-1; |
---|
515 | |
---|
516 | $start_pos >= 0 || die "Negative start_pos(=$start_pos, pos=$pos, hitlen=$hitlen)"; |
---|
517 | |
---|
518 | $hitlen = $+[0] - $-[0]; |
---|
519 | $hitlen>0 || die "Non-positive hitlen (=$hitlen) [2]"; |
---|
520 | |
---|
521 | if ($#+ > 0) { |
---|
522 | $pos = $-[$#+] + 1; |
---|
523 | } |
---|
524 | else { |
---|
525 | $pos = $-[0] + 1; |
---|
526 | } |
---|
527 | $correct = pos_correction($rest,$pos); |
---|
528 | $pos += $start_pos+$correct; |
---|
529 | |
---|
530 | $pos >= 0 || die "Negative pos"; |
---|
531 | |
---|
532 | if ($$entering_shown_r==0) { $$entering_shown_r=1; print $entering; } |
---|
533 | print "$show:$.:$pos: [same] $line\n"; |
---|
534 | $hits++; |
---|
535 | $rest = $'; |
---|
536 | } |
---|
537 | } |
---|
538 | else { |
---|
539 | if ($$entering_shown_r==0) { $$entering_shown_r=1; print $entering; } |
---|
540 | print "$show:$.:$pos: $line\n"; |
---|
541 | } |
---|
542 | |
---|
543 | $reported += $hits; |
---|
544 | if (defined $maxhits) { $maxhits -= $hits; } |
---|
545 | } |
---|
546 | $matches++; |
---|
547 | } |
---|
548 | $lines_examined++; |
---|
549 | } |
---|
550 | close(FILE); |
---|
551 | return ($matches,$reported); |
---|
552 | } |
---|
553 | |
---|
554 | # -------------------------------------------------------------------------------- |
---|
555 | |
---|
556 | my $versionControl = '<unknown version control>'; |
---|
557 | |
---|
558 | sub CVS_controlled($) { |
---|
559 | my ($dir) = @_; |
---|
560 | my $SVN_entries = $dir.'/.svn/entries'; |
---|
561 | if (-f $SVN_entries) { |
---|
562 | $versionControl = 'subversion'; |
---|
563 | 1; |
---|
564 | } |
---|
565 | else { |
---|
566 | my $CVS_Repository = $dir.'/CVS/Repository'; |
---|
567 | if (-f $CVS_Repository) { |
---|
568 | $versionControl = 'CVS'; |
---|
569 | 1; |
---|
570 | } |
---|
571 | else { |
---|
572 | 0; |
---|
573 | } |
---|
574 | } |
---|
575 | } |
---|
576 | |
---|
577 | sub parent_directory($) { |
---|
578 | my ($dir) = @_; |
---|
579 | if ($dir =~ /\/[^\/]+$/) { |
---|
580 | return $`; |
---|
581 | } |
---|
582 | return undef; |
---|
583 | } |
---|
584 | |
---|
585 | # -------------------------------------------------------------------------------- |
---|
586 | |
---|
587 | sub collect_files($\%$$); |
---|
588 | sub collect_files($\%$$) { |
---|
589 | my ($dir,$files_r,$is_additional_directory,$follow_file_links) = @_; |
---|
590 | |
---|
591 | my @files = (); |
---|
592 | my @subdirs = (); |
---|
593 | |
---|
594 | opendir(DIR, $dir) || die "can't read directory '$dir' (Reason: $!)"; |
---|
595 | foreach (readdir(DIR)) { |
---|
596 | if ($_ ne '.' and $_ ne '..') { |
---|
597 | my $full = $dir.'/'.$_; |
---|
598 | if (-l $full and ($follow_file_links==0 or -d $full)) { $verbose==0 || print "Skipping $full (symbolic link)\b"; } |
---|
599 | elsif (-f $full) { push @files, $full; } |
---|
600 | elsif (-d $full) { push @subdirs, $full; } |
---|
601 | else { $verbose==0 || print "Skipping $full (not a file or directory)\n"; } |
---|
602 | } |
---|
603 | } |
---|
604 | closedir(DIR); |
---|
605 | |
---|
606 | my $grepxignore = $dir.'/.grepxignore'; |
---|
607 | if (-f $grepxignore) { load_grepxignore($grepxignore); } |
---|
608 | else { forget_grepxignore(); } |
---|
609 | |
---|
610 | foreach (@files) { |
---|
611 | my $shall = shall_search_file($_,$dir); |
---|
612 | if ($shall) { |
---|
613 | $verbose==0 || print "Searching $_\n"; |
---|
614 | # $matches += grepfile($_); |
---|
615 | # $searched++; |
---|
616 | $$files_r{$_} = $shall; |
---|
617 | } |
---|
618 | else { |
---|
619 | $verbose==0 || print "Skipping '$_' (unwanted)\n"; |
---|
620 | } |
---|
621 | } |
---|
622 | |
---|
623 | if ($recurse_subdirs==1) { |
---|
624 | my @descent_into = (); |
---|
625 | |
---|
626 | foreach (@subdirs) { |
---|
627 | my $descent = 1; |
---|
628 | my $reason = 'not specified'; |
---|
629 | if ($global_scan_mode==$GSM_CVS and not $is_additional_directory and not CVS_controlled($_)) { |
---|
630 | if ($arbSpecials==1 and $_ =~ /\/GEN[CH]$/) { |
---|
631 | $verbose==0 || print "Descending non-$versionControl dir '$_' (caused by ARB mode)\n"; |
---|
632 | } |
---|
633 | else { |
---|
634 | $descent = 0; |
---|
635 | $reason = 'not version-controlled'; |
---|
636 | } |
---|
637 | } |
---|
638 | |
---|
639 | if ($descent==1) { |
---|
640 | $descent = NotIgnored($_,1); |
---|
641 | if ($descent==0) { $reason = 'Excluded by .grepxignore'; } |
---|
642 | } |
---|
643 | |
---|
644 | if ($descent==1) { |
---|
645 | push @descent_into, $_; |
---|
646 | } |
---|
647 | else { |
---|
648 | $verbose==0 || print "Skipping subdirectory '$_' ($reason)\n"; |
---|
649 | } |
---|
650 | } |
---|
651 | |
---|
652 | foreach (@descent_into) { |
---|
653 | collect_files($_, %$files_r, $is_additional_directory,$follow_file_links); |
---|
654 | } |
---|
655 | } |
---|
656 | } |
---|
657 | |
---|
658 | sub grep_collected_files(\%$) { |
---|
659 | my ($files_r,$entering) = @_; |
---|
660 | |
---|
661 | my $entering_shown = 0; |
---|
662 | |
---|
663 | my %depth = map { |
---|
664 | my $d = $_; |
---|
665 | $d =~ s/[^\/\\]//ig; |
---|
666 | $_ => length($d); |
---|
667 | } keys %$files_r; |
---|
668 | |
---|
669 | my @files = sort { |
---|
670 | my $cmp = $$files_r{$b} <=> $$files_r{$a}; # file importance |
---|
671 | if ($cmp==0) { |
---|
672 | $cmp = $depth{$a} <=> $depth{$b}; # depth in directory tree |
---|
673 | if ($cmp==0) { |
---|
674 | $cmp = $a cmp $b; # alphabetically |
---|
675 | } |
---|
676 | } |
---|
677 | return $cmp; |
---|
678 | } keys %$files_r; |
---|
679 | |
---|
680 | my $searched = scalar(@files); |
---|
681 | my $matches = 0; |
---|
682 | my $reported = 0; |
---|
683 | |
---|
684 | if ($matchFiles==1) { |
---|
685 | my @matching_files = (); # files were regexp matches filename |
---|
686 | my $reg_name = qr/\/([^\/]+)$/; |
---|
687 | |
---|
688 | foreach (@files) { |
---|
689 | if ($_ =~ $reg_name) { # match the name part |
---|
690 | if ($1 =~ $regexpr) { push @matching_files, $_; } |
---|
691 | } |
---|
692 | else { die "can't parse name from '$_'"; } |
---|
693 | } |
---|
694 | my $matching_files = scalar(@matching_files); |
---|
695 | if ($matching_files>0) { |
---|
696 | print "grepx: Some filenames match your expression:\n"; |
---|
697 | foreach (@matching_files) { |
---|
698 | my $show = $_; |
---|
699 | if ($_ =~ $reg_startdir) { $show = $'; } |
---|
700 | if ($entering_shown==0) { $entering_shown=1; print $entering; } |
---|
701 | print "$show:0: <filename matched>\n"; |
---|
702 | } |
---|
703 | } |
---|
704 | } |
---|
705 | |
---|
706 | # print "grepx: Searching $searched files..\n"; |
---|
707 | foreach (@files) { |
---|
708 | $verbose==0 || print "searching '$_' (depth=$depth{$_}, importance=$$files_r{$_})\n"; |
---|
709 | my ($m,$r) = grepfile($_,$entering,$entering_shown); |
---|
710 | $matches += $m; |
---|
711 | $reported += $r; |
---|
712 | } |
---|
713 | |
---|
714 | |
---|
715 | return ($searched,$matches,$reported); |
---|
716 | } |
---|
717 | |
---|
718 | sub perform_grep($$$) { |
---|
719 | my ($startdir, $is_additional_directory, $follow_file_links) = @_; |
---|
720 | my %files = (); # key=file, value=file-importance |
---|
721 | collect_files($startdir,%files,$is_additional_directory,$follow_file_links); |
---|
722 | |
---|
723 | my $max_importance = -1; |
---|
724 | foreach (values %files) { |
---|
725 | if ($_ > $max_importance) { $max_importance = $_; } |
---|
726 | } |
---|
727 | |
---|
728 | if ($max_importance<=$IS_OTHER) { |
---|
729 | print "grepx: Only found files with importance==$max_importance (aborting)\n"; |
---|
730 | %files = (); |
---|
731 | } |
---|
732 | |
---|
733 | my ($searched,$matches,$reported) = (0,0,0); |
---|
734 | if (scalar(%files)) { |
---|
735 | my $entering = "grepx: Entering directory `$startdir'\n"; |
---|
736 | ($searched,$matches,$reported) = grep_collected_files(%files,$entering); |
---|
737 | if ($reported>0) { print "grepx: Leaving directory `$startdir'\n"; } |
---|
738 | } |
---|
739 | return ($searched,$matches,$reported); |
---|
740 | } |
---|
741 | |
---|
742 | sub grep_add_directories() { |
---|
743 | my ($searched,$matches,$reported) = (0,0,0); |
---|
744 | foreach (@add_header_dirs) { |
---|
745 | my ($s,$m,$r) = perform_grep($_,1,0); |
---|
746 | ($searched,$matches,$reported) = ($searched+$s,$matches+$m,$reported+$r); |
---|
747 | } |
---|
748 | return ($searched,$matches,$reported); |
---|
749 | } |
---|
750 | |
---|
751 | # -------------------------------------------------------------------------------- |
---|
752 | |
---|
753 | |
---|
754 | sub detect_wanted_startdir($) { |
---|
755 | my ($calldir) = @_; |
---|
756 | if ($global==1) { |
---|
757 | my $know_whats_global = 0; |
---|
758 | |
---|
759 | if (CVS_controlled($calldir)) { |
---|
760 | my $updir = parent_directory($calldir); |
---|
761 | while (defined $updir and -d $updir and CVS_controlled($updir)) { |
---|
762 | $calldir = $updir; |
---|
763 | $updir = parent_directory($updir); |
---|
764 | } |
---|
765 | print "grepx: Starting global search from root of $versionControl controlled directory-tree\n"; |
---|
766 | $global_scan_mode = $GSM_CVS; |
---|
767 | $know_whats_global = 1; |
---|
768 | } |
---|
769 | |
---|
770 | if ($know_whats_global==0) { |
---|
771 | print "grepx: Don't know what 'global search' means here.. using parent directory\n"; |
---|
772 | $global_scan_mode = $GSM_PARENT; |
---|
773 | my $updir = parent_directory($calldir); |
---|
774 | if (defined $updir and -d $updir) { $calldir = $updir; } |
---|
775 | } |
---|
776 | } |
---|
777 | return $calldir; |
---|
778 | } |
---|
779 | |
---|
780 | sub megagiga($) { |
---|
781 | my ($val) = @_; |
---|
782 | if ($val<1024) { return "$val "; } |
---|
783 | |
---|
784 | my $pot = 0; |
---|
785 | while ($val>=1024) { |
---|
786 | $val = int($val/1024+0.5); |
---|
787 | $pot++; |
---|
788 | } |
---|
789 | return "$val ".substr("kMGTP", $pot-1, 1); |
---|
790 | } |
---|
791 | |
---|
792 | # -------------------------------------------------------------------------------- |
---|
793 | |
---|
794 | eval { |
---|
795 | my $start_time = time; |
---|
796 | parse_args(); |
---|
797 | |
---|
798 | $startdir = detect_wanted_startdir($calldir); |
---|
799 | $reg_startdir = quotemeta($startdir.'/'); |
---|
800 | $reg_startdir = qr/^$reg_startdir/; |
---|
801 | |
---|
802 | init_wanted(); |
---|
803 | |
---|
804 | my ($searched,$matches,$reported) = perform_grep($startdir,0,0); |
---|
805 | if ($matches==0) { |
---|
806 | print "grepx: No results - retry with links..\n"; |
---|
807 | ($searched,$matches,$reported) = perform_grep($startdir,0,1); # retry following links |
---|
808 | } |
---|
809 | |
---|
810 | if ($global==1 and scalar(@add_header_dirs)>0) { |
---|
811 | if ($reported==$matches) { |
---|
812 | print "grepx: ------------------------------ Searching in add. directories:\n"; |
---|
813 | my ($s,$m,$r) = grep_add_directories(); |
---|
814 | ($searched,$matches,$reported) = ($searched+$s,$matches+$m,$reported+$r); |
---|
815 | } |
---|
816 | else { |
---|
817 | print "grepx: Skipping search of add. directories - already got enough matches.\n"; |
---|
818 | } |
---|
819 | } |
---|
820 | |
---|
821 | if ($searched == 0) { |
---|
822 | print "grepx: No files matched.\n"; |
---|
823 | print "grepx: Retrying using '$extension' as wildcard.\n"; |
---|
824 | |
---|
825 | $use_as_wildcard = 1; |
---|
826 | ($searched,$matches,$reported) = perform_grep($startdir,0,0); |
---|
827 | if ($matches==0) { |
---|
828 | print "grepx: No results - retry with links..\n"; |
---|
829 | ($searched,$matches,$reported) = perform_grep($startdir,0,1); # retry following links |
---|
830 | } |
---|
831 | if ($searched == 0) { print "grepx: No files matched.\n"; } |
---|
832 | } |
---|
833 | |
---|
834 | if ($searched>0) { |
---|
835 | my $info = "Searched $searched files (".megagiga($lines_examined)."LOC). "; |
---|
836 | if ($matches>0) { |
---|
837 | if ($reported == $matches) { $info .= "Found $matches"; } |
---|
838 | else { $info .= "Reported $reported (of $matches found)"; } |
---|
839 | $info .= " matches in ".(time-$start_time)." seconds."; |
---|
840 | } |
---|
841 | else { $info .= "No matches were found."; } |
---|
842 | print "grepx: $info\n"; |
---|
843 | } |
---|
844 | |
---|
845 | if ($ignoreCount>0) { |
---|
846 | print "grepx: excluded by .grepxignore: $ignoreCount files/directories\n"; |
---|
847 | } |
---|
848 | }; |
---|
849 | if ($@) { |
---|
850 | print_usage(); |
---|
851 | die "Error: $@"; |
---|
852 | } |
---|
853 | |
---|
854 | # -------------------------------------------------------------------------------- |
---|