1 | #!/usr/bin/perl |
---|
2 | |
---|
3 | use strict; |
---|
4 | use warnings; |
---|
5 | |
---|
6 | # ---------------------------------------- |
---|
7 | # hardcoded config for help-map |
---|
8 | |
---|
9 | # ignore these pages (they are linked too frequently) |
---|
10 | my %ignore = ( |
---|
11 | 'glossary' => 1, |
---|
12 | 'arb' => 1, |
---|
13 | ); |
---|
14 | |
---|
15 | my $start_from = 'pa_optimizer'; # start-page |
---|
16 | my $depth = 3; # show pages reachable by that many links (both link directions) |
---|
17 | |
---|
18 | # ---------------------------------------- |
---|
19 | |
---|
20 | |
---|
21 | sub read_xml($); |
---|
22 | sub read_xml($) { |
---|
23 | my ($xml_dir) = @_; |
---|
24 | |
---|
25 | my @xml = (); |
---|
26 | my @sub = (); |
---|
27 | |
---|
28 | opendir(DIR,$xml_dir) || die "Failed to read '$xml_dir' (Reason: $!)"; |
---|
29 | foreach (readdir(DIR)) { |
---|
30 | if ($_ ne '.' and $_ ne '..') { |
---|
31 | my $full = $xml_dir.'/'.$_; |
---|
32 | if (-d $full) { |
---|
33 | push @sub, $_; |
---|
34 | } |
---|
35 | elsif (/\.xml$/o) { |
---|
36 | push @xml, $_; |
---|
37 | } |
---|
38 | } |
---|
39 | } |
---|
40 | closedir(DIR); |
---|
41 | |
---|
42 | foreach my $sub (@sub) { |
---|
43 | my @subxml = read_xml($xml_dir.'/'.$sub); |
---|
44 | foreach (@subxml) { |
---|
45 | push @xml, $sub.'/'.$_; |
---|
46 | } |
---|
47 | } |
---|
48 | |
---|
49 | return @xml; |
---|
50 | } |
---|
51 | |
---|
52 | sub print_index(\@) { |
---|
53 | my ($xml_r) = @_; |
---|
54 | |
---|
55 | my $header=<<HEADER; |
---|
56 | <?xml version="1.0" encoding="UTF-8" standalone="no"?> |
---|
57 | <!DOCTYPE PAGE SYSTEM 'arb_help.dtd' [ |
---|
58 | <!ENTITY nbsp " "> |
---|
59 | <!ENTITY acute "´"> |
---|
60 | <!ENTITY eacute "é"> |
---|
61 | <!ENTITY apostr "'"> |
---|
62 | <!ENTITY semi ";"> |
---|
63 | ]> |
---|
64 | <!-- This file has been generated by ../generate_index.pl --> |
---|
65 | <PAGE name="help_index" edit_warning="devel"> |
---|
66 | <TITLE>ARB help index</TITLE> |
---|
67 | <SECTION name="List of existing helpfiles"> |
---|
68 | <LIST> |
---|
69 | HEADER |
---|
70 | my $footer=<<FOOTER; |
---|
71 | </LIST> |
---|
72 | </SECTION> |
---|
73 | </PAGE> |
---|
74 | FOOTER |
---|
75 | |
---|
76 | print $header; |
---|
77 | foreach my $xml (@$xml_r) { |
---|
78 | my $hlp = $xml; |
---|
79 | $hlp =~ s/\.xml$/\.hlp/o; |
---|
80 | my $link = ' <T><LINK dest="'.$hlp.'" type="hlp" quoted="0"/></T>'; |
---|
81 | print $link."\n"; |
---|
82 | } |
---|
83 | print $footer; |
---|
84 | |
---|
85 | } |
---|
86 | |
---|
87 | sub find_indexed_xmls($$) { |
---|
88 | my ($index_name,$xml_dir) = @_; |
---|
89 | |
---|
90 | my @xml = read_xml($xml_dir); |
---|
91 | @xml = sort map { |
---|
92 | if ($_ eq $index_name) { ; } # dont index the index |
---|
93 | else { $_; } |
---|
94 | } @xml; |
---|
95 | return @xml; |
---|
96 | } |
---|
97 | |
---|
98 | my %link = (); # key='from>to' value=bitvalue(1=uplink,2=sublink) |
---|
99 | |
---|
100 | sub storeLink($$$) { |
---|
101 | my ($from,$to,$us) = @_; |
---|
102 | # print STDERR "storeLink from='$from' to='$to' us='$us'\n"; |
---|
103 | |
---|
104 | my $concat = $from.' '.$to; |
---|
105 | die "invalid char '>' in '$concat'" if $concat =~ />/o; |
---|
106 | |
---|
107 | my $bit = 0; |
---|
108 | if ($us eq 'UP') { $bit = 1; } |
---|
109 | elsif ($us eq 'SUB') { $bit = 2; } |
---|
110 | |
---|
111 | my $key = $from.'>'.$to; |
---|
112 | my $val = $link{$key}; |
---|
113 | if (not defined $val) { $val = 0; } |
---|
114 | |
---|
115 | $val = $val | $bit; |
---|
116 | $link{$key} = $val; |
---|
117 | } |
---|
118 | |
---|
119 | my %title_line = (); # key=xml-filename, value=lineno of <TITLE>.. |
---|
120 | my %source_file = (); # key=xml-filename, value=source/filename.hlp |
---|
121 | |
---|
122 | sub parse_titles($\@\%) { |
---|
123 | my ($xml_dir,$xml_r, $title_r) = @_; |
---|
124 | foreach my $name (@$xml_r) { |
---|
125 | my $xml = $xml_dir.'/'.$name; |
---|
126 | open(FILE,'<'.$xml) || die "can't read '$xml' (Reason: $!)"; |
---|
127 | |
---|
128 | my $namePlain = $name; |
---|
129 | if ($namePlain=~ /\.xml$/o) { $namePlain = $`; } |
---|
130 | |
---|
131 | my $line; |
---|
132 | LINE: while (defined($line=<FILE>)) { |
---|
133 | if ($line =~ /<TITLE>(.*)<\/TITLE>/o) { |
---|
134 | $$title_r{$name} = $1; |
---|
135 | $title_line{$name} = $.; |
---|
136 | last LINE; # TITLE occurs behind UP/SUB links -> done here |
---|
137 | } |
---|
138 | if ($line =~ /<(UP|SUB)\s+dest=/o) { |
---|
139 | my $us = $1; |
---|
140 | if ($line =~ /"(.*)"\s+type="(.*)"/o) { |
---|
141 | my ($dest,$type) = ($1,$2); |
---|
142 | if ($dest =~ /\.hlp$/o) { $dest = $`; } |
---|
143 | storeLink($namePlain,$dest,$us); |
---|
144 | } |
---|
145 | } |
---|
146 | if ($line =~ /<PAGE.*source=/o) { |
---|
147 | my $rest = $'; |
---|
148 | if ($rest =~ /\"([^\"]*)\"/o) { |
---|
149 | my $source = $1; |
---|
150 | $source_file{$name} = $source; |
---|
151 | } |
---|
152 | } |
---|
153 | } |
---|
154 | close(FILE); |
---|
155 | |
---|
156 | if (not defined $$title_r{$name}) { |
---|
157 | die "$xml:1: Failed to parse title\n "; |
---|
158 | } |
---|
159 | } |
---|
160 | } |
---|
161 | |
---|
162 | sub warn_duplicate_titles($\%) { |
---|
163 | my ($xml_dir,$title_r) = @_; |
---|
164 | my $hlpdir = $xml_dir; |
---|
165 | my %seen = (); |
---|
166 | foreach my $file (keys %$title_r) { |
---|
167 | my $title = $$title_r{$file}; |
---|
168 | if (defined $seen{$title}) { |
---|
169 | my $firstFile = $seen{$title}; |
---|
170 | my $thisLine = $title_line{$file}; |
---|
171 | my $firstLine = $title_line{$firstFile}; |
---|
172 | |
---|
173 | print STDERR "${xml_dir}/${file}:${thisLine}: Warning: duplicated title '$title' ..\n"; |
---|
174 | print STDERR "${xml_dir}/${firstFile}:${firstLine}: Warning: .. first seen here.\n"; |
---|
175 | |
---|
176 | my $src2 = $source_file{$file}; |
---|
177 | my $src1 = $source_file{$firstFile}; |
---|
178 | my $auto_del = undef; |
---|
179 | my $miss_src = undef; |
---|
180 | |
---|
181 | if (-f $src1) { |
---|
182 | if (not -f $src2) { |
---|
183 | # src2 is missing -> auto-delete $file |
---|
184 | $miss_src = $src2; |
---|
185 | $auto_del = "${xml_dir}/${file}"; |
---|
186 | } |
---|
187 | } |
---|
188 | elsif (-f $src2) { |
---|
189 | if (not -f $src1) { |
---|
190 | # src1 is missing -> auto-delete $firstFile |
---|
191 | $miss_src = $src1; |
---|
192 | $auto_del = "${xml_dir}/${firstFile}"; |
---|
193 | } |
---|
194 | } |
---|
195 | |
---|
196 | # print STDERR "src1: $src1\n"; |
---|
197 | # print STDERR "src2: $src2\n"; |
---|
198 | # print STDERR "auto_del: $auto_del\n"; |
---|
199 | # print STDERR "miss_src: $miss_src\n"; |
---|
200 | |
---|
201 | if (defined $miss_src) { |
---|
202 | die 'internal error' if not defined $auto_del; |
---|
203 | print STDERR "Note: auto-deleting '${auto_del}'\n"; |
---|
204 | print STDERR "Reason: source '${miss_src}' has disappeared\n"; |
---|
205 | if (not unlink($auto_del)) { |
---|
206 | print STDERR "Failed to auto-delete '$auto_del' (Reason: $!)\n"; |
---|
207 | } |
---|
208 | } |
---|
209 | } |
---|
210 | else { |
---|
211 | $seen{$title} = $file; |
---|
212 | } |
---|
213 | } |
---|
214 | } |
---|
215 | |
---|
216 | sub generate_index($$) { |
---|
217 | my ($index_name,$xml_dir) = @_; |
---|
218 | |
---|
219 | my @xml = find_indexed_xmls($index_name,$xml_dir); |
---|
220 | my %title = (); |
---|
221 | parse_titles($xml_dir,@xml,%title); |
---|
222 | |
---|
223 | warn_duplicate_titles($xml_dir,%title); |
---|
224 | |
---|
225 | @xml = sort { $title{$a} cmp $title{$b}; } @xml; |
---|
226 | |
---|
227 | print_index(@xml); |
---|
228 | } |
---|
229 | |
---|
230 | sub dot_label($) { |
---|
231 | my ($target) = @_; |
---|
232 | return '"'.$target.'"'; |
---|
233 | } |
---|
234 | |
---|
235 | sub generate_map($) { |
---|
236 | my ($map_name) = @_; |
---|
237 | |
---|
238 | # my $maxsize = 17; # inch |
---|
239 | # my $maxsize = 20; # inch |
---|
240 | # my $maxsize = 40; # inch |
---|
241 | my $maxsize = 80; # inch |
---|
242 | open(DOT,'>'.$map_name) || die "can't write '$map_name' (Reason: $!)"; |
---|
243 | |
---|
244 | print DOT "digraph ARBHELPDEP {\n"; |
---|
245 | # print DOT " rankdir=LR;\n"; |
---|
246 | print DOT " concentrate=true;\n"; |
---|
247 | print DOT " searchsize=1000;\n"; |
---|
248 | print DOT " Damping=2.0;\n"; |
---|
249 | print DOT " size=\"$maxsize,$maxsize\";\n"; |
---|
250 | # print DOT " orientation=portrait;\n"; |
---|
251 | print DOT "\n"; |
---|
252 | |
---|
253 | my %use = ( $start_from => 1 ); |
---|
254 | |
---|
255 | my $added = 1; |
---|
256 | while ($added==1) { |
---|
257 | $added = 0; |
---|
258 | foreach (keys %link) { |
---|
259 | die if (not $_ =~ />/o); |
---|
260 | my ($from,$to) = ($`,$'); |
---|
261 | |
---|
262 | die "helpfile '$to' links to itself" if ($to eq $from); |
---|
263 | |
---|
264 | if (exists $use{$from}) { |
---|
265 | my $next = $use{$from}+1; |
---|
266 | if (($next<=$depth) and ((not exists $use{$to}) or ($use{$to}>$next)) and (not $ignore{$to})) { |
---|
267 | $use{$to} = $next; |
---|
268 | # print STDERR "'$to' set to $next (triggered by '$from' with use=".$use{$from}.")\n"; |
---|
269 | $added = 1; |
---|
270 | } |
---|
271 | } |
---|
272 | if (exists $use{$to}) { |
---|
273 | my $next = $use{$to}+1; |
---|
274 | if (($next<=$depth) and ((not exists $use{$from}) or ($use{$from}>$next)) and (not $ignore{$from})) { |
---|
275 | $use{$from} = $next; |
---|
276 | # print STDERR "'$from' set to $next (triggered by '$to' with use=".$use{$to}.")\n"; |
---|
277 | $added = 1; |
---|
278 | } |
---|
279 | } |
---|
280 | } |
---|
281 | } |
---|
282 | |
---|
283 | foreach (keys %link) { |
---|
284 | die if (not $_ =~ />/o); |
---|
285 | my ($from,$to) = ($`,$'); |
---|
286 | if ((not exists $ignore{$from}) and (not exists $ignore{$to})) { |
---|
287 | if ((exists $use{$from}) and (exists $use{$to})) { |
---|
288 | ($from,$to) = (dot_label($from.'['.$use{$from}.']'),dot_label($to.'['.$use{$to}.']')); |
---|
289 | print DOT ' '.$from.' -> '.$to.';'."\n"; |
---|
290 | } |
---|
291 | } |
---|
292 | } |
---|
293 | print DOT "}\n"; |
---|
294 | close(DOT); |
---|
295 | } |
---|
296 | |
---|
297 | sub main() { |
---|
298 | my $args = scalar(@ARGV); |
---|
299 | if ($args != 3) { |
---|
300 | print "Usage: generate_index.pl XMLDIRECTORY NAME_OF_INDEX.xml MAP.dot\n"; |
---|
301 | print "Scans for xml-helpfiles in and below XMLDIRECTORY.\n"; |
---|
302 | print "Generates\n"; |
---|
303 | print "- list of all found helpfiles to STDOUT (assuming it is piped to NAME_OF_INDEX.xml)\n"; |
---|
304 | print "- a (partial) help-map in file MAP.dot\n"; |
---|
305 | die "Error: missing arguments"; |
---|
306 | } |
---|
307 | |
---|
308 | my $xml_dir = $ARGV[0]; |
---|
309 | my $index_name = $ARGV[1]; |
---|
310 | my $map_name = $ARGV[2]; |
---|
311 | |
---|
312 | if (not -d $xml_dir) { die "No such directory '$xml_dir'"; } |
---|
313 | |
---|
314 | generate_index($index_name,$xml_dir); |
---|
315 | generate_map($map_name); |
---|
316 | } |
---|
317 | main(); |
---|