1 | #!/usr/bin/perl |
---|
2 | |
---|
3 | use strict; |
---|
4 | use warnings; |
---|
5 | |
---|
6 | my $debug_matching = 0; |
---|
7 | my $ignore_unknown = 0; |
---|
8 | |
---|
9 | # ------------------------------------------------------------ |
---|
10 | # skipped_directories and files inside are never examined: |
---|
11 | |
---|
12 | my @skipped_directories = ( |
---|
13 | qr/_GEN$/o, |
---|
14 | qr/_COM\/GEN[CH]$/o, |
---|
15 | qr/_COM\/O$/o, |
---|
16 | qr/\/.+\/bin$/o, |
---|
17 | qr/\/HELP_SOURCE\/Xml$/o, |
---|
18 | qr/\/PERL2ARB\/blib$/o, |
---|
19 | qr/^\.\/ARB_SOURCE_DOC/o, |
---|
20 | qr/^\.\/INCLUDE$/o, |
---|
21 | qr/^\.\/PERL5$/o, |
---|
22 | qr/^\.\/lib\/pts$/o, |
---|
23 | qr/^\.\/lib\/help$/o, |
---|
24 | qr/^\.\/lib\/help_html$/o, |
---|
25 | qr/^\.\/ARB_SOURCE_DOC/o, |
---|
26 | qr/^\.\/MAKEBIN$/o, |
---|
27 | qr/^\.\/LIBLINK$/o, |
---|
28 | qr/\/ignore\./o, |
---|
29 | ); |
---|
30 | |
---|
31 | # first used/skipped match wins (exception see @3 below) |
---|
32 | |
---|
33 | my %used_files = map { $_ => 1; } ( |
---|
34 | '!BRANCH_STATE', |
---|
35 | 'demo.arb', |
---|
36 | 'export2sub', |
---|
37 | 'Doxyfile', |
---|
38 | 'Makefile', |
---|
39 | 'Makefile.org', |
---|
40 | 'AUTHORS', |
---|
41 | 'COPYING', |
---|
42 | ); |
---|
43 | |
---|
44 | my %skipped_files = map { $_ => 1; } ( |
---|
45 | '.cvsignore', |
---|
46 | '.build.lst', |
---|
47 | '.depends', |
---|
48 | 'config.makefile', |
---|
49 | 'ChangeLog', |
---|
50 | 'ARB_GDEmenus', |
---|
51 | 'helpfiles.lst', |
---|
52 | 'nt_date.h', |
---|
53 | 'TAGS', |
---|
54 | ); |
---|
55 | |
---|
56 | my %used_extensions = map { $_ => 1; } ( |
---|
57 | 'c', 'cpp', 'cxx', |
---|
58 | 'h', 'hpp', 'hxx', |
---|
59 | 'f', |
---|
60 | 'pl', 'pm', 'PL', 'cgi', |
---|
61 | 'java', 'manifest', |
---|
62 | 'inc', |
---|
63 | 'sh', |
---|
64 | 'aisc', 'pa', |
---|
65 | 'template', 'default', |
---|
66 | 'script', |
---|
67 | 'txt', 'doc', 'ps', 'pdf', |
---|
68 | 'bitmap', |
---|
69 | 'source', 'menu', |
---|
70 | 'head', 'header', |
---|
71 | 'footer', |
---|
72 | 'dtd', 'xsl', |
---|
73 | 'makefile', |
---|
74 | ); |
---|
75 | |
---|
76 | my %skipped_extensions = map { $_ => 1; } ( |
---|
77 | 'o', |
---|
78 | 'so', |
---|
79 | 'a', |
---|
80 | 'genmenu', |
---|
81 | 'class', |
---|
82 | 'jar', |
---|
83 | 'stamp', |
---|
84 | 'list', |
---|
85 | 'log', |
---|
86 | 'swp', |
---|
87 | 'bak', |
---|
88 | 'old', |
---|
89 | 'last_gcc', |
---|
90 | 'yml', 'json', # perl2arb |
---|
91 | ); |
---|
92 | |
---|
93 | |
---|
94 | # used_when_matches, skipped_when_matches and used_when_matchesFull are only tested, |
---|
95 | # if above filters did not match: |
---|
96 | |
---|
97 | my @used_when_matches = ( |
---|
98 | qr/^arb_.*\.txt$/o, |
---|
99 | qr/license/io, |
---|
100 | qr/disclaimer/io, |
---|
101 | qr/readme$/io, |
---|
102 | qr/unused.*source.*\.tgz$/io, |
---|
103 | ); |
---|
104 | |
---|
105 | my @skipped_when_matches = ( |
---|
106 | qr/^arbsrc\.lst$/o, |
---|
107 | qr/^arbsrc\.lst\.tmp$/o, |
---|
108 | qr/^arbsrc.*\.tgz$/o, |
---|
109 | qr/\#.*\#$/o, |
---|
110 | qr/\.\#.*$/o, |
---|
111 | qr/.*~$/o, # backups |
---|
112 | ); |
---|
113 | |
---|
114 | my @used_when_matchesFull = ( |
---|
115 | qr/\/EISPACK\/rg\.html$/o, |
---|
116 | qr/\/CLUSTALW\/.*$/o, |
---|
117 | qr/\/HGL_SRC\/plot\.icon$/o, |
---|
118 | qr/\/PHYLIP\/doc\//o, |
---|
119 | qr/\/GDE\/.*\.html$/o, |
---|
120 | qr/GDE\/.*\/Makefile\.[^\/]+$/io, |
---|
121 | qr/\/GDEHELP\/GDE.*/o, |
---|
122 | qr/\/GDEHELP\/Makefile\.helpfiles/o, |
---|
123 | qr/\/GDEHELP\/DATA_FILES/o, |
---|
124 | qr/\/GDEHELP\/FASTA/o, |
---|
125 | qr/\/GDEHELP\/HELP_PLAIN/o, |
---|
126 | qr/\/GDEHELP\/HELP_WRITTEN/o, |
---|
127 | qr/\/HEADERLIBS\/.*COPYING$/o, |
---|
128 | qr/\/HEADERLIBS\/.*\.tgz$/o, |
---|
129 | qr/\/HELP_SOURCE\/.*\.gif$/o, |
---|
130 | qr/\/HELP_SOURCE\/oldhelp\/.*\.hlp$/o, |
---|
131 | qr/\/HELP_SOURCE\/oldhelp\/.*\.(ps|pdf)\.gz$/o, |
---|
132 | qr/\/TREEPUZZLE\/.*\.gif$/o, |
---|
133 | qr/\/PERL2ARB\/.*\.html$/o, |
---|
134 | qr/\/PERL2ARB\/typemap$/o, |
---|
135 | qr/\/PERL2ARB\/Makefile.main$/o, |
---|
136 | qr/\/PROBE_SERVER\/.*\.conf$/o, |
---|
137 | qr/\/READSEQ\/Formats$/o, |
---|
138 | qr/\/READSEQ\/.*\.help$/o, |
---|
139 | qr/\/SH\/[^\/\.]*$/o, |
---|
140 | qr/\/SOURCE_TOOLS\//o, |
---|
141 | qr/^\.\/etc\//o, |
---|
142 | qr/^\.\/lib\/arb_tcp_org\.dat$/o, |
---|
143 | qr/^\.\/lib\/config\.[^\.]+$/io, |
---|
144 | qr/^\.\/lib\/arb_default\/.*\.arb$/o, |
---|
145 | qr/^\.\/lib\/export\/.*\.eft$/o, |
---|
146 | qr/^\.\/lib\/import\/.*\.ift2?$/o, |
---|
147 | qr/^\.\/lib\/inputMasks\/.*\.mask$/o, |
---|
148 | qr/^\.\/lib\/macros\/.*\.amc$/o, |
---|
149 | qr/^\.\/lib\/nas\/names\.dat\.template$/o, |
---|
150 | qr/^\.\/lib\/pictures\/.*\.(fig|vfont)$/o, |
---|
151 | qr/^\.\/lib\/pixmaps\/.*\.xpm$/o, |
---|
152 | qr/^\.\/lib\/rna3d\/.*\.(pdb|data)$/o, |
---|
153 | qr/^\.\/lib\/rna3d\/images\/.*\.png$/o, |
---|
154 | qr/^\.\/lib\/sellists\/.*\.sellst$/o, |
---|
155 | qr/^\.\/lib\/protein_2nd_structure\/.*\.dat$/o, |
---|
156 | qr/^\.\/lib\/submit\//o, |
---|
157 | qr/^\.\/util\/arb_.*$/o, |
---|
158 | qr/^\.\/util\/config\..*$/o, |
---|
159 | ); |
---|
160 | |
---|
161 | # skipped_when_matchesFull and forced_when_matchesFull are always tested! (@3) |
---|
162 | |
---|
163 | my @skipped_when_matchesFull = ( |
---|
164 | qr/date\.xsl$/o, |
---|
165 | qr/\/genhelp\/.*\.hlp$/o, |
---|
166 | qr/^\.\/GDE\/CORE\/functions.h$/o, |
---|
167 | qr/^\.\/PERL2ARB\/.*\.h$/o, |
---|
168 | qr/^\.\/PERL2ARB\/ARB\.xs$/o, |
---|
169 | qr/^\.\/PERL2ARB\/ARB\.c$/o, |
---|
170 | qr/^\.\/PERL2ARB\/ARB\.bs$/o, |
---|
171 | qr/^\.\/PERL2ARB\/pm_to_blib$/o, |
---|
172 | qr/^\.\/PERL2ARB\/Makefile$/o, |
---|
173 | qr/^\.\/PERL2ARB\/Makefile.PL$/o, |
---|
174 | qr/^\.\/PERL2ARB\/perlmain.c$/o, |
---|
175 | qr/^\.\/TEMPLATES\/arb_build\.h$/o, |
---|
176 | qr/^\.\/lib\/ARB\.pm$/o, |
---|
177 | qr/^\.\/lib\/nas\/names.*\.dat$/o, |
---|
178 | qr/^\.\/lib\/arb_tcp\.dat$/o, |
---|
179 | qr/^\.\/arb.*\.tgz$/o, |
---|
180 | qr/^\.\/SOURCE_TOOLS\/valgrind2grep\.lst$/o, |
---|
181 | qr/\/lib\/addlibs\/(lib.*\.so\..*)$/o, |
---|
182 | ); |
---|
183 | |
---|
184 | my @forced_when_matchesFull = ( |
---|
185 | qr/\/PROBE_WEB\/SERVER\/.*\.jar$/o, |
---|
186 | ); |
---|
187 | |
---|
188 | # files that are even packed when generated and not in VC |
---|
189 | my @pack_fullGenerated = ( |
---|
190 | qr/\/TEMPLATES\/svn_revision\.h$/o, |
---|
191 | ); |
---|
192 | |
---|
193 | # ------------------------------------------------------------ |
---|
194 | # sanity checks |
---|
195 | |
---|
196 | foreach (keys %used_extensions) { |
---|
197 | if (exists $skipped_extensions{$_}) { die "'$_' in \$used_extensions and \$skipped_extensions"; } |
---|
198 | } |
---|
199 | foreach (keys %used_files) { |
---|
200 | if (exists $skipped_files{$_}) { die "'$_' in \$used_files and \$skipped_files"; } |
---|
201 | } |
---|
202 | |
---|
203 | # ------------------------------------------------------------ |
---|
204 | |
---|
205 | sub useDir($) { |
---|
206 | my ($dir) = @_; |
---|
207 | |
---|
208 | if ($dir =~ /.svn$/o) { return 0; } |
---|
209 | if ($dir =~ /CVS$/o) { return 0; } |
---|
210 | foreach (@skipped_directories) { |
---|
211 | if ($dir =~ $_) { return 0; } |
---|
212 | } |
---|
213 | return 1; |
---|
214 | } |
---|
215 | |
---|
216 | sub matchingExpr($\@) { |
---|
217 | # return 0 if no regexp matched, return index+1 otherwise |
---|
218 | my ($str,$regexp_arr_r) = @_; |
---|
219 | |
---|
220 | my $regexps = scalar(@$regexp_arr_r); |
---|
221 | for (my $r=0; $r<$regexps; $r++) { |
---|
222 | my $reg = $$regexp_arr_r[$r]; |
---|
223 | if ($str =~ $reg) { |
---|
224 | return $r+1; |
---|
225 | } |
---|
226 | } |
---|
227 | return 0; |
---|
228 | } |
---|
229 | |
---|
230 | sub useIfMatching($\@\$) { |
---|
231 | my ($str,$regexp_arr_r,$use_r) = @_; |
---|
232 | my $matches = matchingExpr($str,@$regexp_arr_r); |
---|
233 | if ($matches>0) { |
---|
234 | if ($debug_matching!=0) { print "'$str' matches '".$$regexp_arr_r[$matches-1]."' => use!\n"; } |
---|
235 | $$use_r = 1; |
---|
236 | } |
---|
237 | } |
---|
238 | sub dontUseIfMatching($\@\$) { |
---|
239 | my ($str,$regexp_arr_r,$use_r) = @_; |
---|
240 | my $matches = matchingExpr($str,@$regexp_arr_r); |
---|
241 | if ($matches>0) { |
---|
242 | if ($debug_matching!=0) { print "'$str' matches '".$$regexp_arr_r[$matches-1]."' => don't use!\n"; } |
---|
243 | $$use_r = 0; |
---|
244 | } |
---|
245 | } |
---|
246 | |
---|
247 | sub useFile($$) { |
---|
248 | my ($dir,$file) = @_; |
---|
249 | |
---|
250 | my $use = undef; |
---|
251 | if (exists $used_files{$file}) { $use = 1; } |
---|
252 | elsif (exists $skipped_files{$file}) { $use = 0; } |
---|
253 | |
---|
254 | my $hasExt = 0; |
---|
255 | if (not defined $use) { |
---|
256 | if ($file =~ /\.([^\.]+)$/o) { |
---|
257 | my $ext = $1; |
---|
258 | $hasExt = 1; |
---|
259 | if (exists $used_extensions{$ext}) { $use = 1; } |
---|
260 | elsif (exists $skipped_extensions{$ext}) { $use = 0; } |
---|
261 | } |
---|
262 | } |
---|
263 | |
---|
264 | if (not defined $use) { |
---|
265 | useIfMatching($file,@used_when_matches, $use); |
---|
266 | } |
---|
267 | |
---|
268 | if (not defined $use) { |
---|
269 | dontUseIfMatching($file,@skipped_when_matches, $use); |
---|
270 | } |
---|
271 | |
---|
272 | my $full; |
---|
273 | if (not defined $use) { |
---|
274 | $full = $dir.'/'.$file; |
---|
275 | |
---|
276 | useIfMatching($full,@used_when_matchesFull, $use); |
---|
277 | if (not defined $use) { |
---|
278 | if (-X $full and $hasExt==0) { $use = 0; } # exclude binaries by default (wrong for scripts) |
---|
279 | } |
---|
280 | } |
---|
281 | |
---|
282 | if (not defined $use or $use==1) { |
---|
283 | if (not defined $full) { $full = $dir.'/'.$file; } |
---|
284 | |
---|
285 | dontUseIfMatching($full,@skipped_when_matchesFull, $use); |
---|
286 | } |
---|
287 | if (not defined $use or $use==0) { |
---|
288 | if (not defined $full) { $full = $dir.'/'.$file; } |
---|
289 | useIfMatching($full,@forced_when_matchesFull, $use); |
---|
290 | } |
---|
291 | |
---|
292 | if (not defined $use) { |
---|
293 | if ($ignore_unknown==0) { |
---|
294 | die "Don't know whether to use or skip '$file' (in $dir)"; |
---|
295 | } |
---|
296 | $use = 1; |
---|
297 | } |
---|
298 | |
---|
299 | return $use; |
---|
300 | } |
---|
301 | |
---|
302 | # ------------------------------------------------------------ |
---|
303 | |
---|
304 | sub getSVNEntries($\%) { |
---|
305 | my ($dir,$SVN_r) = @_; |
---|
306 | |
---|
307 | my $svnentries = $dir.'/.svn/entries'; |
---|
308 | if (-f $svnentries) { |
---|
309 | open(SVN,'<'.$svnentries) || die "can't read '$svnentries' (Reason: $!)"; |
---|
310 | # print "reading $svnentries\n"; |
---|
311 | |
---|
312 | my $line; |
---|
313 | LINE: while (defined($line=<SVN>)) { |
---|
314 | if (length($line)==2 and ord($line)==12) { # entrymarker (^L) |
---|
315 | my $name=<SVN>; |
---|
316 | my $type=<SVN>; |
---|
317 | |
---|
318 | defined $name or last LINE; |
---|
319 | defined $type or die "Expected two or no lines after ^L"; |
---|
320 | |
---|
321 | chomp($name); |
---|
322 | chomp($type); |
---|
323 | |
---|
324 | if ($type eq 'file') { |
---|
325 | $$SVN_r{$name} = 1; |
---|
326 | } |
---|
327 | elsif ($type eq 'dir') { |
---|
328 | $$SVN_r{$name} = 2; |
---|
329 | } |
---|
330 | else { |
---|
331 | die "Unknown type '$type' for '$name' in $svnentries"; |
---|
332 | } |
---|
333 | # print "name='$name' type='$type'\n"; |
---|
334 | } |
---|
335 | } |
---|
336 | |
---|
337 | close(SVN); |
---|
338 | return 1; |
---|
339 | } |
---|
340 | print "No such file: '$svnentries'\n"; |
---|
341 | return 0; |
---|
342 | } |
---|
343 | |
---|
344 | sub getCVSEntries($\%) { |
---|
345 | my ($dir,$CVS_r) = @_; |
---|
346 | |
---|
347 | my $cvsentries = $dir.'/CVS/Entries'; |
---|
348 | if (-f $cvsentries) { |
---|
349 | open(CVS,'<'.$cvsentries) || die "can't read '$cvsentries' (Reason: $!)"; |
---|
350 | eval { |
---|
351 | foreach (<CVS>) { |
---|
352 | chomp; |
---|
353 | if (/^D\/([^\/]+)\//o) { # directory |
---|
354 | $$CVS_r{$1} = 2; |
---|
355 | } |
---|
356 | elsif (/^\/([^\/]+)\//o) { # file |
---|
357 | $$CVS_r{$1} = 1; |
---|
358 | } |
---|
359 | elsif (/^D$/o) { |
---|
360 | ; |
---|
361 | } |
---|
362 | else { |
---|
363 | die "can't parse line '$_'"; |
---|
364 | } |
---|
365 | } |
---|
366 | }; |
---|
367 | if ($@) { die "$@ while reading $cvsentries"; } |
---|
368 | close(CVS); |
---|
369 | return 1; |
---|
370 | } |
---|
371 | return 0; |
---|
372 | } |
---|
373 | |
---|
374 | my $VC = '<no VC>'; |
---|
375 | |
---|
376 | sub getVCEntries($\%) { |
---|
377 | my ($dir,$VC_r) = @_; |
---|
378 | |
---|
379 | my $res = 1; |
---|
380 | if (getSVNEntries($dir,%$VC_r)==0) { |
---|
381 | if (getCVSEntries($dir,%$VC_r)==0) { |
---|
382 | $VC = '<no VC>'; |
---|
383 | $res = 0; |
---|
384 | } |
---|
385 | else { |
---|
386 | $VC = 'CVS'; |
---|
387 | } |
---|
388 | } |
---|
389 | else { |
---|
390 | $VC = 'SVN'; |
---|
391 | } |
---|
392 | |
---|
393 | if (0) { |
---|
394 | print "$VC entries for $dir:\n"; |
---|
395 | foreach (sort keys %$VC_r) { |
---|
396 | print " ".$$VC_r{$_}.": $_\n"; |
---|
397 | } |
---|
398 | } |
---|
399 | |
---|
400 | return $res; |
---|
401 | } |
---|
402 | |
---|
403 | # ------------------------------------------------------------ |
---|
404 | |
---|
405 | sub expectVCmember($$\%) { |
---|
406 | my ($full,$item,$VC_r) = @_; |
---|
407 | if ((not defined $$VC_r{$item}) and ($ignore_unknown==0)) { |
---|
408 | if (not matchingExpr($full,@pack_fullGenerated)) { |
---|
409 | die "'$full' ($_) included, but not in $VC (seems to be generated)"; |
---|
410 | } |
---|
411 | } |
---|
412 | } |
---|
413 | |
---|
414 | my %unpackedCVSmember = map { $_ => 1; } ( |
---|
415 | '.cvsignore', |
---|
416 | 'ChangeLog', |
---|
417 | ); |
---|
418 | |
---|
419 | sub unexpectVCmember($$\%) { |
---|
420 | my ($full,$item,$VC_r) = @_; |
---|
421 | if (defined $$VC_r{$item}) { |
---|
422 | if (not exists $unpackedCVSmember{$item} and $ignore_unknown==0) { |
---|
423 | die "'$full' excluded, but in $VC"; |
---|
424 | } |
---|
425 | } |
---|
426 | } |
---|
427 | |
---|
428 | sub dumpFiles($); |
---|
429 | sub dumpFiles($) { |
---|
430 | my ($dir) = @_; |
---|
431 | |
---|
432 | my @subdirs; |
---|
433 | my @files; |
---|
434 | |
---|
435 | my %CVS; |
---|
436 | getVCEntries($dir,%CVS); |
---|
437 | |
---|
438 | opendir(DIR,$dir) || die "can't read directory '$dir' (Reason: $!)"; |
---|
439 | foreach (readdir(DIR)) { |
---|
440 | if ($_ ne '.' and $_ ne '..') { |
---|
441 | my $full = $dir.'/'.$_; |
---|
442 | if (not -l $full) { |
---|
443 | if (-d $full) { |
---|
444 | if (useDir($full)==1) { |
---|
445 | expectVCmember($full,$_,%CVS); |
---|
446 | push @subdirs, $full; |
---|
447 | } |
---|
448 | else { unexpectVCmember($full,$_,%CVS); } |
---|
449 | } |
---|
450 | elsif (-f $full) { |
---|
451 | if (useFile($dir,$_)==1) { |
---|
452 | expectVCmember($full,$_,%CVS); |
---|
453 | push @files, $full; |
---|
454 | } |
---|
455 | else { unexpectVCmember($full,$_,%CVS); } |
---|
456 | } |
---|
457 | else { die "Unknown: '$full'"; } |
---|
458 | } |
---|
459 | } |
---|
460 | } |
---|
461 | closedir(DIR); |
---|
462 | |
---|
463 | foreach (@files) { print $_."\n"; } |
---|
464 | foreach (@subdirs) { dumpFiles($_); } |
---|
465 | } |
---|
466 | |
---|
467 | my $args = scalar(@ARGV); |
---|
468 | if ($args==0) { |
---|
469 | dumpFiles('.'); |
---|
470 | } |
---|
471 | else { |
---|
472 | my $arg = $ARGV[0]; |
---|
473 | if ($arg eq 'ignore') { |
---|
474 | $ignore_unknown = 1; |
---|
475 | dumpFiles('.'); |
---|
476 | } |
---|
477 | elsif ($arg eq 'matching') { |
---|
478 | $debug_matching = 1; |
---|
479 | dumpFiles('.'); |
---|
480 | } |
---|
481 | else { |
---|
482 | die "Usage: arb_srclst.pl [ignore|matching]\n"; |
---|
483 | } |
---|
484 | } |
---|