| 1 | #!/usr/bin/perl |
|---|
| 2 | |
|---|
| 3 | use strict; |
|---|
| 4 | use warnings; |
|---|
| 5 | |
|---|
| 6 | # ---------------------------------------- |
|---|
| 7 | # hardcoded config for help-map |
|---|
| 8 | |
|---|
| 9 | # ignore these pages (they are linked too frequently) |
|---|
| 10 | my %ignore = ( |
|---|
| 11 | 'glossary' => 1, |
|---|
| 12 | 'arb' => 1, |
|---|
| 13 | ); |
|---|
| 14 | |
|---|
| 15 | my $start_from = 'pa_optimizer'; # start-page |
|---|
| 16 | my $depth = 3; # show pages reachable by that many links (both link directions) |
|---|
| 17 | |
|---|
| 18 | # ---------------------------------------- |
|---|
| 19 | |
|---|
| 20 | |
|---|
| 21 | sub read_xml($); |
|---|
| 22 | sub read_xml($) { |
|---|
| 23 | my ($xml_dir) = @_; |
|---|
| 24 | |
|---|
| 25 | my @xml = (); |
|---|
| 26 | my @sub = (); |
|---|
| 27 | |
|---|
| 28 | opendir(DIR,$xml_dir) || die "Failed to read '$xml_dir' (Reason: $!)"; |
|---|
| 29 | foreach (readdir(DIR)) { |
|---|
| 30 | if ($_ ne '.' and $_ ne '..') { |
|---|
| 31 | my $full = $xml_dir.'/'.$_; |
|---|
| 32 | if (-d $full) { |
|---|
| 33 | push @sub, $_; |
|---|
| 34 | } |
|---|
| 35 | elsif (/\.xml$/o) { |
|---|
| 36 | push @xml, $_; |
|---|
| 37 | } |
|---|
| 38 | } |
|---|
| 39 | } |
|---|
| 40 | closedir(DIR); |
|---|
| 41 | |
|---|
| 42 | foreach my $sub (@sub) { |
|---|
| 43 | my @subxml = read_xml($xml_dir.'/'.$sub); |
|---|
| 44 | foreach (@subxml) { |
|---|
| 45 | push @xml, $sub.'/'.$_; |
|---|
| 46 | } |
|---|
| 47 | } |
|---|
| 48 | |
|---|
| 49 | return @xml; |
|---|
| 50 | } |
|---|
| 51 | |
|---|
| 52 | sub print_index(\@) { |
|---|
| 53 | my ($xml_r) = @_; |
|---|
| 54 | |
|---|
| 55 | my $header=<<HEADER; |
|---|
| 56 | <?xml version="1.0" encoding="UTF-8" standalone="no"?> |
|---|
| 57 | <!DOCTYPE PAGE SYSTEM 'arb_help.dtd' [ |
|---|
| 58 | <!ENTITY nbsp " "> |
|---|
| 59 | <!ENTITY acute "´"> |
|---|
| 60 | <!ENTITY eacute "é"> |
|---|
| 61 | <!ENTITY apostr "'"> |
|---|
| 62 | <!ENTITY semi ";"> |
|---|
| 63 | ]> |
|---|
| 64 | <!-- This file has been generated by ../generate_index.pl --> |
|---|
| 65 | <PAGE name="help_index" edit_warning="devel" source="generate_index.pl"> |
|---|
| 66 | <TITLE>ARB help index</TITLE> |
|---|
| 67 | <SECTION name="List of existing helpfiles"> |
|---|
| 68 | <LIST> |
|---|
| 69 | <ENTRY> |
|---|
| 70 | HEADER |
|---|
| 71 | my $footer=<<FOOTER; |
|---|
| 72 | </ENTRY> |
|---|
| 73 | </LIST> |
|---|
| 74 | </SECTION> |
|---|
| 75 | </PAGE> |
|---|
| 76 | FOOTER |
|---|
| 77 | |
|---|
| 78 | print $header; |
|---|
| 79 | foreach my $xml (@$xml_r) { |
|---|
| 80 | my $hlp = $xml; |
|---|
| 81 | $hlp =~ s/\.xml$/\.hlp/o; |
|---|
| 82 | my $link = ' <T reflow="0"><LINK dest="'.$hlp.'" type="hlp" quoted="0"/></T>'; |
|---|
| 83 | print $link."\n"; |
|---|
| 84 | } |
|---|
| 85 | print $footer; |
|---|
| 86 | |
|---|
| 87 | } |
|---|
| 88 | |
|---|
| 89 | sub find_indexed_xmls($$) { |
|---|
| 90 | my ($index_name,$xml_dir) = @_; |
|---|
| 91 | |
|---|
| 92 | my @xml = read_xml($xml_dir); |
|---|
| 93 | @xml = sort map { |
|---|
| 94 | if ($_ eq $index_name) { ; } # dont index the index |
|---|
| 95 | else { $_; } |
|---|
| 96 | } @xml; |
|---|
| 97 | return @xml; |
|---|
| 98 | } |
|---|
| 99 | |
|---|
| 100 | my %link = (); # key='from>to' value=bitvalue(1=uplink,2=sublink) |
|---|
| 101 | |
|---|
| 102 | sub storeLink($$$) { |
|---|
| 103 | my ($from,$to,$us) = @_; |
|---|
| 104 | # print STDERR "storeLink from='$from' to='$to' us='$us'\n"; |
|---|
| 105 | |
|---|
| 106 | my $concat = $from.' '.$to; |
|---|
| 107 | die "invalid char '>' in '$concat'" if $concat =~ />/o; |
|---|
| 108 | |
|---|
| 109 | my $bit = 0; |
|---|
| 110 | if ($us eq 'UP') { $bit = 1; } |
|---|
| 111 | elsif ($us eq 'SUB') { $bit = 2; } |
|---|
| 112 | |
|---|
| 113 | my $key = $from.'>'.$to; |
|---|
| 114 | my $val = $link{$key}; |
|---|
| 115 | if (not defined $val) { $val = 0; } |
|---|
| 116 | |
|---|
| 117 | $val = $val | $bit; |
|---|
| 118 | $link{$key} = $val; |
|---|
| 119 | } |
|---|
| 120 | |
|---|
| 121 | my %title_line = (); # key=xml-filename, value=lineno of <TITLE>.. |
|---|
| 122 | my %source_file = (); # key=xml-filename, value=source/filename.hlp |
|---|
| 123 | |
|---|
| 124 | sub parse_titles($\@\%) { |
|---|
| 125 | my ($xml_dir,$xml_r, $title_r) = @_; |
|---|
| 126 | foreach my $name (@$xml_r) { |
|---|
| 127 | my $xml = $xml_dir.'/'.$name; |
|---|
| 128 | open(FILE,'<'.$xml) || die "can't read '$xml' (Reason: $!)"; |
|---|
| 129 | |
|---|
| 130 | my $namePlain = $name; |
|---|
| 131 | if ($namePlain=~ /\.xml$/o) { $namePlain = $`; } |
|---|
| 132 | |
|---|
| 133 | my $line; |
|---|
| 134 | LINE: while (defined($line=<FILE>)) { |
|---|
| 135 | if ($line =~ /<TITLE>(.*)<\/TITLE>/o) { |
|---|
| 136 | $$title_r{$name} = $1; |
|---|
| 137 | $title_line{$name} = $.; |
|---|
| 138 | last LINE; # TITLE occurs behind UP/SUB links -> done here |
|---|
| 139 | } |
|---|
| 140 | if ($line =~ /<(UP|SUB)\s+dest=/o) { |
|---|
| 141 | my $us = $1; |
|---|
| 142 | if ($line =~ /"(.*)"\s+type="(.*)"/o) { |
|---|
| 143 | my ($dest,$type) = ($1,$2); |
|---|
| 144 | if ($dest =~ /\.hlp$/o) { $dest = $`; } |
|---|
| 145 | storeLink($namePlain,$dest,$us); |
|---|
| 146 | } |
|---|
| 147 | } |
|---|
| 148 | if ($line =~ /<PAGE.*source=/o) { |
|---|
| 149 | my $rest = $'; |
|---|
| 150 | if ($rest =~ /\"([^\"]*)\"/o) { |
|---|
| 151 | my $source = $1; |
|---|
| 152 | $source_file{$name} = $source; |
|---|
| 153 | } |
|---|
| 154 | } |
|---|
| 155 | } |
|---|
| 156 | close(FILE); |
|---|
| 157 | |
|---|
| 158 | if (not defined $$title_r{$name}) { |
|---|
| 159 | die "$xml:1: Failed to parse title\n "; |
|---|
| 160 | } |
|---|
| 161 | } |
|---|
| 162 | } |
|---|
| 163 | |
|---|
| 164 | sub warn_duplicate_titles($$$\%) { |
|---|
| 165 | my ($xml_dir, $hlp_dest_dir, $html_dest_dir, $title_r) = @_; |
|---|
| 166 | my $hlpdir = $xml_dir; |
|---|
| 167 | my %seen = (); |
|---|
| 168 | foreach my $file (keys %$title_r) { |
|---|
| 169 | my $title = $$title_r{$file}; |
|---|
| 170 | if (defined $seen{$title}) { |
|---|
| 171 | my $firstFile = $seen{$title}; |
|---|
| 172 | my $thisLine = $title_line{$file}; |
|---|
| 173 | my $firstLine = $title_line{$firstFile}; |
|---|
| 174 | |
|---|
| 175 | print STDERR "${xml_dir}/${file}:${thisLine}: Warning: duplicated title '$title' ..\n"; |
|---|
| 176 | print STDERR "${xml_dir}/${firstFile}:${firstLine}: Warning: .. first seen here.\n"; |
|---|
| 177 | |
|---|
| 178 | my $src2 = $source_file{$file}; |
|---|
| 179 | my $src1 = $source_file{$firstFile}; |
|---|
| 180 | my $auto_del = undef; |
|---|
| 181 | my $miss_src = undef; |
|---|
| 182 | |
|---|
| 183 | if (-f $src1) { |
|---|
| 184 | if (not -f $src2) { |
|---|
| 185 | # src2 is missing -> auto-delete $file |
|---|
| 186 | $miss_src = $src2; |
|---|
| 187 | $auto_del = "${xml_dir}/${file}"; |
|---|
| 188 | } |
|---|
| 189 | } |
|---|
| 190 | elsif (-f $src2) { |
|---|
| 191 | if (not -f $src1) { |
|---|
| 192 | # src1 is missing -> auto-delete $firstFile |
|---|
| 193 | $miss_src = $src1; |
|---|
| 194 | $auto_del = "${xml_dir}/${firstFile}"; |
|---|
| 195 | } |
|---|
| 196 | } |
|---|
| 197 | |
|---|
| 198 | # print STDERR "src1: $src1\n"; |
|---|
| 199 | # print STDERR "src2: $src2\n"; |
|---|
| 200 | # print STDERR "auto_del: $auto_del\n"; |
|---|
| 201 | # print STDERR "miss_src: $miss_src\n"; |
|---|
| 202 | |
|---|
| 203 | if (defined $miss_src) { |
|---|
| 204 | die 'internal error' if not defined $auto_del; |
|---|
| 205 | print STDERR "Warning: source '${miss_src}' has disappeared\n"; |
|---|
| 206 | |
|---|
| 207 | my @auto_deletes = ( $auto_del ); |
|---|
| 208 | |
|---|
| 209 | my $name = undef; |
|---|
| 210 | if ($auto_del =~ /^${xml_dir}/) { |
|---|
| 211 | my $nameExt = $'; |
|---|
| 212 | if ($nameExt =~ /\.xml$/o) { |
|---|
| 213 | $name = $`; |
|---|
| 214 | } |
|---|
| 215 | } |
|---|
| 216 | defined $name || die "failed to detect name of generated help page from '$auto_del'"; |
|---|
| 217 | |
|---|
| 218 | my $gen_hlp = $hlp_dest_dir.$name.'.hlp'; |
|---|
| 219 | my $gen_html = $html_dest_dir.$name.'.html'; |
|---|
| 220 | if (-f $gen_hlp) { push @auto_deletes, $gen_hlp; } else { print "no such file: $gen_hlp (while checking for relict)\n"; } |
|---|
| 221 | if (-f $gen_html) { push @auto_deletes, $gen_html; } else { print "no such file: $gen_html (while checking for relict)\n"; } |
|---|
| 222 | |
|---|
| 223 | foreach (@auto_deletes) { |
|---|
| 224 | print STDERR "Note: auto-deleting '${_}'\n"; |
|---|
| 225 | if (not unlink($_)) { print STDERR "Failed to auto-delete '$_' (Reason: $!)\n"; } |
|---|
| 226 | } |
|---|
| 227 | } |
|---|
| 228 | } |
|---|
| 229 | else { |
|---|
| 230 | $seen{$title} = $file; |
|---|
| 231 | } |
|---|
| 232 | } |
|---|
| 233 | } |
|---|
| 234 | |
|---|
| 235 | sub generate_index($$$$) { |
|---|
| 236 | my ($index_name, $xml_dir, $hlp_dest_dir, $html_dest_dir) = @_; |
|---|
| 237 | |
|---|
| 238 | my @xml = find_indexed_xmls($index_name,$xml_dir); |
|---|
| 239 | my %title = (); |
|---|
| 240 | parse_titles($xml_dir,@xml,%title); |
|---|
| 241 | |
|---|
| 242 | warn_duplicate_titles($xml_dir, $hlp_dest_dir, $html_dest_dir,%title); |
|---|
| 243 | |
|---|
| 244 | @xml = sort { $title{$a} cmp $title{$b}; } @xml; |
|---|
| 245 | |
|---|
| 246 | print_index(@xml); |
|---|
| 247 | } |
|---|
| 248 | |
|---|
| 249 | sub dot_label($) { |
|---|
| 250 | my ($target) = @_; |
|---|
| 251 | return '"'.$target.'"'; |
|---|
| 252 | } |
|---|
| 253 | |
|---|
| 254 | sub generate_map($) { |
|---|
| 255 | my ($map_name) = @_; |
|---|
| 256 | |
|---|
| 257 | # my $maxsize = 17; # inch |
|---|
| 258 | # my $maxsize = 20; # inch |
|---|
| 259 | # my $maxsize = 40; # inch |
|---|
| 260 | my $maxsize = 80; # inch |
|---|
| 261 | open(DOT,'>'.$map_name) || die "can't write '$map_name' (Reason: $!)"; |
|---|
| 262 | |
|---|
| 263 | print DOT "digraph ARBHELPDEP {\n"; |
|---|
| 264 | # print DOT " rankdir=LR;\n"; |
|---|
| 265 | print DOT " concentrate=true;\n"; |
|---|
| 266 | print DOT " searchsize=1000;\n"; |
|---|
| 267 | print DOT " Damping=2.0;\n"; |
|---|
| 268 | print DOT " size=\"$maxsize,$maxsize\";\n"; |
|---|
| 269 | # print DOT " orientation=portrait;\n"; |
|---|
| 270 | print DOT "\n"; |
|---|
| 271 | |
|---|
| 272 | my %use = ( $start_from => 1 ); |
|---|
| 273 | |
|---|
| 274 | my $added = 1; |
|---|
| 275 | while ($added==1) { |
|---|
| 276 | $added = 0; |
|---|
| 277 | foreach (keys %link) { |
|---|
| 278 | die if (not $_ =~ />/o); |
|---|
| 279 | my ($from,$to) = ($`,$'); |
|---|
| 280 | |
|---|
| 281 | die "helpfile '$to' links to itself" if ($to eq $from); |
|---|
| 282 | |
|---|
| 283 | if (exists $use{$from}) { |
|---|
| 284 | my $next = $use{$from}+1; |
|---|
| 285 | if (($next<=$depth) and ((not exists $use{$to}) or ($use{$to}>$next)) and (not $ignore{$to})) { |
|---|
| 286 | $use{$to} = $next; |
|---|
| 287 | # print STDERR "'$to' set to $next (triggered by '$from' with use=".$use{$from}.")\n"; |
|---|
| 288 | $added = 1; |
|---|
| 289 | } |
|---|
| 290 | } |
|---|
| 291 | if (exists $use{$to}) { |
|---|
| 292 | my $next = $use{$to}+1; |
|---|
| 293 | if (($next<=$depth) and ((not exists $use{$from}) or ($use{$from}>$next)) and (not $ignore{$from})) { |
|---|
| 294 | $use{$from} = $next; |
|---|
| 295 | # print STDERR "'$from' set to $next (triggered by '$to' with use=".$use{$to}.")\n"; |
|---|
| 296 | $added = 1; |
|---|
| 297 | } |
|---|
| 298 | } |
|---|
| 299 | } |
|---|
| 300 | } |
|---|
| 301 | |
|---|
| 302 | foreach (keys %link) { |
|---|
| 303 | die if (not $_ =~ />/o); |
|---|
| 304 | my ($from,$to) = ($`,$'); |
|---|
| 305 | if ((not exists $ignore{$from}) and (not exists $ignore{$to})) { |
|---|
| 306 | if ((exists $use{$from}) and (exists $use{$to})) { |
|---|
| 307 | ($from,$to) = (dot_label($from.'['.$use{$from}.']'),dot_label($to.'['.$use{$to}.']')); |
|---|
| 308 | print DOT ' '.$from.' -> '.$to.';'."\n"; |
|---|
| 309 | } |
|---|
| 310 | } |
|---|
| 311 | } |
|---|
| 312 | print DOT "}\n"; |
|---|
| 313 | close(DOT); |
|---|
| 314 | } |
|---|
| 315 | |
|---|
| 316 | sub main() { |
|---|
| 317 | my $args = scalar(@ARGV); |
|---|
| 318 | if ($args != 5) { |
|---|
| 319 | print "Usage: generate_index.pl XMLDIRECTORY HLPDESTDIR HTMLDESTDIR NAME_OF_INDEX.xml MAP.dot\n"; |
|---|
| 320 | print "Scans for xml-helpfiles in and below XMLDIRECTORY.\n"; |
|---|
| 321 | print "Generates\n"; |
|---|
| 322 | print "- list of all found helpfiles to STDOUT (assuming it is piped to NAME_OF_INDEX.xml)\n"; |
|---|
| 323 | print "- a (partial) help-map in file MAP.dot\n"; |
|---|
| 324 | print "Note: this script also handles cleanup after renaming/deleting help-files\n"; |
|---|
| 325 | die "Error: invalid number of arguments"; |
|---|
| 326 | } |
|---|
| 327 | |
|---|
| 328 | my $xml_dir = $ARGV[0]; |
|---|
| 329 | my $hlp_dest_dir = $ARGV[1]; |
|---|
| 330 | my $html_dest_dir = $ARGV[2]; |
|---|
| 331 | my $index_name = $ARGV[3]; |
|---|
| 332 | my $map_name = $ARGV[4]; |
|---|
| 333 | |
|---|
| 334 | if (not -d $xml_dir) { die "No such directory '$xml_dir'"; } |
|---|
| 335 | if (not -d $hlp_dest_dir) { die "No such directory '$hlp_dest_dir'"; } |
|---|
| 336 | if (not -d $html_dest_dir) { die "No such directory '$html_dest_dir'"; } |
|---|
| 337 | |
|---|
| 338 | generate_index($index_name, $xml_dir, $hlp_dest_dir, $html_dest_dir); |
|---|
| 339 | generate_map($map_name); |
|---|
| 340 | } |
|---|
| 341 | main(); |
|---|