1 | // ==================================================================== // |
---|
2 | // // |
---|
3 | // File : arb_help2xml.cxx // |
---|
4 | // Purpose : Converts old ARB help format to XML // |
---|
5 | // // |
---|
6 | // Coded by Ralf Westram (coder@reallysoft.de) in October 2001 // |
---|
7 | // Copyright Department of Microbiology (Technical University Munich) // |
---|
8 | // // |
---|
9 | // Visit our web site at: http://www.arb-home.de/ // |
---|
10 | // // |
---|
11 | // ==================================================================== // |
---|
12 | |
---|
13 | #include <xml.hxx> |
---|
14 | #include <arb_defs.h> |
---|
15 | #include <arb_diff.h> |
---|
16 | #include <static_assert.h> |
---|
17 | |
---|
18 | #include <list> |
---|
19 | #include <set> |
---|
20 | #include <iostream> |
---|
21 | #include <fstream> |
---|
22 | |
---|
23 | #include <cstdlib> |
---|
24 | #include <cstdarg> |
---|
25 | #include <cstring> |
---|
26 | #include <climits> |
---|
27 | |
---|
28 | #include <unistd.h> |
---|
29 | #include <sys/stat.h> |
---|
30 | |
---|
31 | using namespace std; |
---|
32 | |
---|
33 | #define h2x_assert(bed) arb_assert(bed) |
---|
34 | |
---|
35 | #if defined(DEBUG) |
---|
36 | #define WARN_FORMATTING_PROBLEMS |
---|
37 | #define WARN_MISSING_HELP |
---|
38 | // #define DUMP_PARAGRAPHS |
---|
39 | // #define PROTECT_HELP_VS_CHANGES |
---|
40 | #endif // DEBUG |
---|
41 | |
---|
42 | |
---|
43 | #if defined(WARN_FORMATTING_PROBLEMS) |
---|
44 | |
---|
45 | #define WARN_FIXED_LAYOUT_LIST_ELEMENTS |
---|
46 | #define WARN_LONESOME_ENUM_ELEMENTS |
---|
47 | |
---|
48 | // warnings below are useless for production and shall be disabled in SVN |
---|
49 | // #define WARN_LONESOME_LIST_ELEMENTS |
---|
50 | // #define WARN_POSSIBLY_WRONG_INDENTATION_CORRECTION |
---|
51 | // #define WARN_IGNORED_ALPHA_ENUMS |
---|
52 | |
---|
53 | #endif |
---|
54 | |
---|
55 | |
---|
56 | #define MAX_LINE_LENGTH 200 // maximum length of lines in input stream |
---|
57 | #define TABSIZE 8 |
---|
58 | |
---|
59 | static const char *knownSections[] = { |
---|
60 | "OCCURRENCE", |
---|
61 | "DESCRIPTION", |
---|
62 | "NOTES", |
---|
63 | "EXAMPLES", |
---|
64 | "WARNINGS", |
---|
65 | "BUGS", |
---|
66 | "SECTION", |
---|
67 | }; |
---|
68 | |
---|
69 | enum SectionType { |
---|
70 | SEC_OCCURRENCE, |
---|
71 | SEC_DESCRIPTION, |
---|
72 | SEC_NOTES, |
---|
73 | SEC_EXAMPLES, |
---|
74 | SEC_WARNINGS, |
---|
75 | SEC_BUGS, |
---|
76 | SEC_SECTION, |
---|
77 | |
---|
78 | KNOWN_SECTION_TYPES, |
---|
79 | SEC_NONE, |
---|
80 | SEC_FAKE, |
---|
81 | }; |
---|
82 | |
---|
83 | STATIC_ASSERT(ARRAY_ELEMS(knownSections) == KNOWN_SECTION_TYPES); |
---|
84 | |
---|
85 | __ATTR__VFORMAT(1) static string vstrf(const char *format, va_list argPtr) { |
---|
86 | static size_t buf_size = 256; |
---|
87 | static char *buffer = new char[buf_size]; |
---|
88 | |
---|
89 | size_t length; |
---|
90 | while (1) { |
---|
91 | if (!buffer) { |
---|
92 | h2x_assert(buffer); // to stop when debugging |
---|
93 | throw string("out of memory"); |
---|
94 | } |
---|
95 | |
---|
96 | length = vsnprintf(buffer, buf_size, format, argPtr); |
---|
97 | if (length < buf_size) break; // string fits into current buffer |
---|
98 | |
---|
99 | // otherwise resize buffer : |
---|
100 | buf_size += buf_size/2; |
---|
101 | delete [] buffer; |
---|
102 | buffer = new char[buf_size]; |
---|
103 | } |
---|
104 | |
---|
105 | return string(buffer, length); |
---|
106 | } |
---|
107 | |
---|
108 | __ATTR__FORMAT(1) static string strf(const char *format, ...) { |
---|
109 | va_list argPtr; |
---|
110 | va_start(argPtr, format); |
---|
111 | string result = vstrf(format, argPtr); |
---|
112 | va_end(argPtr); |
---|
113 | |
---|
114 | return result; |
---|
115 | } |
---|
116 | |
---|
117 | // ----------------------------- |
---|
118 | // warnings and errors |
---|
119 | |
---|
120 | class LineAttachedMessage { |
---|
121 | string message; |
---|
122 | size_t lineno; |
---|
123 | |
---|
124 | public: |
---|
125 | LineAttachedMessage(const string& message_, size_t lineno_) : |
---|
126 | message(message_), |
---|
127 | lineno(lineno_) |
---|
128 | {} |
---|
129 | |
---|
130 | const string& Message() const { return message; } |
---|
131 | size_t Lineno() const { return lineno; } |
---|
132 | }; |
---|
133 | |
---|
134 | const size_t NO_LINENUMBER_INFO = -1U; |
---|
135 | |
---|
136 | LineAttachedMessage unattached_message(const string& message) { return LineAttachedMessage(message, NO_LINENUMBER_INFO); } |
---|
137 | |
---|
138 | |
---|
139 | static list<LineAttachedMessage> warnings; |
---|
140 | inline void add_warning(const LineAttachedMessage& laMsg) { |
---|
141 | warnings.push_back(laMsg); |
---|
142 | } |
---|
143 | inline void add_warning(const string& warning, size_t lineno) { |
---|
144 | add_warning(LineAttachedMessage(warning, lineno)); |
---|
145 | } |
---|
146 | |
---|
147 | struct MessageAttachable { |
---|
148 | virtual ~MessageAttachable() {} |
---|
149 | |
---|
150 | virtual string location_description() const = 0; // may return empty string |
---|
151 | virtual size_t line_number() const = 0; // if unknown -> should return NO_LINENUMBER_INFO |
---|
152 | |
---|
153 | LineAttachedMessage attached_message(const string& message) const { |
---|
154 | string where = location_description(); |
---|
155 | if (where.empty()) return LineAttachedMessage(message, line_number()); |
---|
156 | return LineAttachedMessage(message+" ["+where+"]", line_number()); |
---|
157 | } |
---|
158 | void attach_warning(const string& message) const { |
---|
159 | add_warning(attached_message(message)); |
---|
160 | } |
---|
161 | }; |
---|
162 | |
---|
163 | |
---|
164 | // ---------------------- |
---|
165 | // class Reader |
---|
166 | |
---|
167 | class Reader : public MessageAttachable { |
---|
168 | private: |
---|
169 | istream& in; |
---|
170 | char lineBuffer[MAX_LINE_LENGTH]; |
---|
171 | char lineBuffer2[MAX_LINE_LENGTH]; |
---|
172 | bool readAgain; |
---|
173 | bool eof; |
---|
174 | int lineNo; |
---|
175 | |
---|
176 | string location_description() const OVERRIDE { return ""; } |
---|
177 | size_t line_number() const OVERRIDE { return lineNo; } |
---|
178 | |
---|
179 | void getline() { |
---|
180 | if (!eof) { |
---|
181 | if (in.eof()) eof = true; |
---|
182 | else { |
---|
183 | h2x_assert(in.good()); |
---|
184 | |
---|
185 | in.getline(lineBuffer, MAX_LINE_LENGTH); |
---|
186 | lineNo++; |
---|
187 | |
---|
188 | if (in.eof()) eof = true; |
---|
189 | else if (in.fail()) throw "line too long"; |
---|
190 | |
---|
191 | if (strchr(lineBuffer, '\t')) { |
---|
192 | int o2 = 0; |
---|
193 | |
---|
194 | for (int o = 0; lineBuffer[o]; ++o) { |
---|
195 | if (lineBuffer[o] == '\t') { |
---|
196 | int spaces = TABSIZE - (o2 % TABSIZE); |
---|
197 | while (spaces--) lineBuffer2[o2++] = ' '; |
---|
198 | } |
---|
199 | else { |
---|
200 | lineBuffer2[o2++] = lineBuffer[o]; |
---|
201 | } |
---|
202 | } |
---|
203 | lineBuffer2[o2] = 0; |
---|
204 | strcpy(lineBuffer, lineBuffer2); |
---|
205 | } |
---|
206 | |
---|
207 | char *eol = strchr(lineBuffer, 0)-1; |
---|
208 | while (eol >= lineBuffer && isspace(eol[0])) { |
---|
209 | eol[0] = 0; // trim trailing whitespace |
---|
210 | eol--; |
---|
211 | } |
---|
212 | if (eol > lineBuffer) { |
---|
213 | // now eol points to last character |
---|
214 | if (eol[0] == '-' && isalnum(eol[-1])) { |
---|
215 | attach_warning("manual hyphenation detected"); |
---|
216 | } |
---|
217 | } |
---|
218 | } |
---|
219 | } |
---|
220 | } |
---|
221 | |
---|
222 | public: |
---|
223 | Reader(istream& in_) : in(in_), readAgain(true), eof(false), lineNo(0) { getline(); } |
---|
224 | virtual ~Reader() {} |
---|
225 | |
---|
226 | const char *getNext() { |
---|
227 | if (readAgain) readAgain = false; |
---|
228 | else getline(); |
---|
229 | return eof ? NULp : lineBuffer; |
---|
230 | } |
---|
231 | |
---|
232 | void back() { |
---|
233 | h2x_assert(!readAgain); |
---|
234 | readAgain = true; |
---|
235 | } |
---|
236 | |
---|
237 | int getLineNo() const { return lineNo; } |
---|
238 | }; |
---|
239 | |
---|
240 | enum ParagraphType { |
---|
241 | PLAIN_TEXT, |
---|
242 | ENUMERATED, |
---|
243 | ITEM, |
---|
244 | }; |
---|
245 | enum EnumerationType { |
---|
246 | NONE, |
---|
247 | DIGITS, |
---|
248 | ALPHA_UPPER, |
---|
249 | ALPHA_LOWER, |
---|
250 | }; |
---|
251 | |
---|
252 | class Ostring { |
---|
253 | string content; |
---|
254 | size_t lineNo; // where string came from |
---|
255 | ParagraphType type; |
---|
256 | |
---|
257 | // only valid for type==ENUMERATED: |
---|
258 | EnumerationType etype; |
---|
259 | unsigned number; |
---|
260 | |
---|
261 | public: |
---|
262 | |
---|
263 | Ostring(const string& s, size_t line_no, ParagraphType type_) |
---|
264 | : content(s), |
---|
265 | lineNo(line_no), |
---|
266 | type(type_), |
---|
267 | etype(NONE) |
---|
268 | { |
---|
269 | h2x_assert(type != ENUMERATED); |
---|
270 | } |
---|
271 | Ostring(const string& s, size_t line_no, ParagraphType type_, EnumerationType etype_, unsigned num) |
---|
272 | : content(s), |
---|
273 | lineNo(line_no), |
---|
274 | type(type_), |
---|
275 | etype(etype_), |
---|
276 | number(num) |
---|
277 | { |
---|
278 | h2x_assert(type == ENUMERATED); |
---|
279 | h2x_assert(etype == DIGITS || etype == ALPHA_UPPER || etype == ALPHA_LOWER); |
---|
280 | h2x_assert(num>0); |
---|
281 | } |
---|
282 | |
---|
283 | |
---|
284 | operator const string&() const { return content; } |
---|
285 | operator string&() { return content; } |
---|
286 | |
---|
287 | const string& as_string() const { return content; } |
---|
288 | string& as_string() { return content; } |
---|
289 | |
---|
290 | size_t get_lineno() const { return lineNo; } |
---|
291 | |
---|
292 | const ParagraphType& get_type() const { return type; } |
---|
293 | const EnumerationType& get_enum_type() const { |
---|
294 | h2x_assert(type == ENUMERATED); |
---|
295 | return etype; |
---|
296 | } |
---|
297 | unsigned get_number() const { |
---|
298 | h2x_assert(type == ENUMERATED); |
---|
299 | return number; |
---|
300 | } |
---|
301 | |
---|
302 | // some wrapper to make Ostring act like string |
---|
303 | const char *c_str() const { return content.c_str(); } |
---|
304 | }; |
---|
305 | |
---|
306 | typedef list<Ostring> Ostrings; |
---|
307 | |
---|
308 | #if defined(WARN_MISSING_HELP) |
---|
309 | static void check_TODO(const char *line, const Reader& reader) { |
---|
310 | if (strstr(line, "@@@") || strstr(line, "TODO")) { |
---|
311 | reader.attach_warning(strf("TODO: %s", line)); |
---|
312 | } |
---|
313 | } |
---|
314 | #else |
---|
315 | inline void check_TODO(const char *, const Reader&) { } |
---|
316 | #endif // WARN_MISSING_HELP |
---|
317 | |
---|
318 | // ---------------------------- |
---|
319 | // class Section |
---|
320 | |
---|
321 | class Section FINAL_TYPE : public MessageAttachable { |
---|
322 | SectionType type; |
---|
323 | string name; |
---|
324 | Ostrings content; |
---|
325 | size_t lineno; |
---|
326 | |
---|
327 | string location_description() const OVERRIDE { return string("in SECTION '")+name+"'"; } |
---|
328 | |
---|
329 | public: |
---|
330 | Section(string name_, SectionType type_, size_t lineno_) |
---|
331 | : type(type_), |
---|
332 | name(name_), |
---|
333 | lineno(lineno_) |
---|
334 | {} |
---|
335 | virtual ~Section() {} |
---|
336 | |
---|
337 | const Ostrings& Content() const { return content; } |
---|
338 | Ostrings& Content() { return content; } |
---|
339 | SectionType get_type() const { return type; } |
---|
340 | size_t line_number() const OVERRIDE { return lineno; } |
---|
341 | const string& getName() const { return name; } |
---|
342 | void setName(const string& name_) { name = name_; } |
---|
343 | }; |
---|
344 | |
---|
345 | typedef list<Section> SectionList; |
---|
346 | |
---|
347 | // -------------------- |
---|
348 | // class Link |
---|
349 | |
---|
350 | class Link { |
---|
351 | string target; |
---|
352 | size_t source_lineno; |
---|
353 | |
---|
354 | public: |
---|
355 | Link(const string& target_, size_t source_lineno_) : |
---|
356 | target(target_), |
---|
357 | source_lineno(source_lineno_) |
---|
358 | {} |
---|
359 | |
---|
360 | const string& Target() const { return target; } |
---|
361 | size_t SourceLineno() const { return source_lineno; } |
---|
362 | }; |
---|
363 | |
---|
364 | typedef list<Link> Links; |
---|
365 | |
---|
366 | // ------------------------ |
---|
367 | // class Helpfile |
---|
368 | |
---|
369 | class Helpfile { |
---|
370 | private: |
---|
371 | Links uplinks; |
---|
372 | Links references; |
---|
373 | Links auto_references; |
---|
374 | Section title; |
---|
375 | SectionList sections; |
---|
376 | string inputfile; |
---|
377 | |
---|
378 | void check_self_ref(const string& link) { |
---|
379 | size_t slash = inputfile.find('/'); |
---|
380 | if (slash != string::npos) { |
---|
381 | if (inputfile.substr(slash+1) == link) { |
---|
382 | throw string("Invalid link to self"); |
---|
383 | } |
---|
384 | } |
---|
385 | } |
---|
386 | |
---|
387 | public: |
---|
388 | Helpfile() : title("TITLE", SEC_FAKE, NO_LINENUMBER_INFO) {} |
---|
389 | virtual ~Helpfile() {} |
---|
390 | |
---|
391 | void readHelp(istream& in, const string& filename); |
---|
392 | void writeXML(FILE *out, const string& page_name); |
---|
393 | void extractInternalLinks(); |
---|
394 | |
---|
395 | const Section& get_title() const { return title; } |
---|
396 | }; |
---|
397 | |
---|
398 | inline bool isWhite(char c) { return c == ' '; } |
---|
399 | |
---|
400 | inline bool isEmptyOrComment(const char *s) { |
---|
401 | if (s[0] == '#') return true; |
---|
402 | for (int off = 0; ; ++off) { |
---|
403 | if (s[off] == 0) return true; |
---|
404 | if (!isWhite(s[off])) break; |
---|
405 | } |
---|
406 | |
---|
407 | return false; |
---|
408 | } |
---|
409 | |
---|
410 | inline const char *extractKeyword(const char *line, string& keyword) { |
---|
411 | // returns NULp if no keyword was found |
---|
412 | // otherwise returns position behind keyword and sets value of 'keyword' |
---|
413 | |
---|
414 | const char *space = strchr(line, ' '); |
---|
415 | if (space && space>line) { |
---|
416 | keyword = string(line, 0, space-line); |
---|
417 | return space; |
---|
418 | } |
---|
419 | else if (!space) { // test for keyword w/o content behind |
---|
420 | if (line[0]) { // not empty |
---|
421 | keyword = line; |
---|
422 | return strchr(line, 0); |
---|
423 | } |
---|
424 | } |
---|
425 | return NULp; |
---|
426 | } |
---|
427 | |
---|
428 | inline const char *eatWhite(const char *line) { |
---|
429 | // skips whitespace |
---|
430 | while (isWhite(*line)) ++line; |
---|
431 | return line; |
---|
432 | } |
---|
433 | |
---|
434 | inline void pushParagraph(Section& sec, string& paragraph, size_t lineNo, ParagraphType& type, EnumerationType& etype, unsigned num) { |
---|
435 | if (paragraph.length()) { |
---|
436 | if (type == ENUMERATED) { |
---|
437 | sec.Content().push_back(Ostring(paragraph, lineNo, type, etype, num)); |
---|
438 | } |
---|
439 | else { |
---|
440 | sec.Content().push_back(Ostring(paragraph, lineNo, type)); |
---|
441 | } |
---|
442 | |
---|
443 | type = PLAIN_TEXT; |
---|
444 | etype = NONE; |
---|
445 | paragraph = ""; |
---|
446 | } |
---|
447 | } |
---|
448 | |
---|
449 | inline const char *firstChar(const char *s) { |
---|
450 | while (isWhite(s[0])) ++s; |
---|
451 | return s; |
---|
452 | } |
---|
453 | |
---|
454 | inline bool is_startof_itemlist_element(const char *contentStart) { |
---|
455 | return |
---|
456 | (contentStart[0] == '-' || |
---|
457 | contentStart[0] == '*') |
---|
458 | && |
---|
459 | isspace(contentStart[1]) |
---|
460 | && |
---|
461 | !(isspace(contentStart[2]) || |
---|
462 | contentStart[2] == '-'); |
---|
463 | } |
---|
464 | |
---|
465 | #define MAX_ALLOWED_ENUM 99 // otherwise it starts interpreting years as enums |
---|
466 | |
---|
467 | static EnumerationType startsWithLetter(string& s, unsigned& number) { |
---|
468 | // tests if first line starts with 'letter.' |
---|
469 | // if true then 'letter.' is removed from the string |
---|
470 | // the letter is converted and returned in 'number' ('a'->1, 'b'->2, ..) |
---|
471 | |
---|
472 | size_t off = s.find_first_not_of(" \n"); |
---|
473 | if (off == string::npos) return NONE; |
---|
474 | if (!isalpha(s[off])) return NONE; |
---|
475 | |
---|
476 | size_t astart = off; |
---|
477 | EnumerationType etype = isupper(s[off]) ? ALPHA_UPPER : ALPHA_LOWER; |
---|
478 | |
---|
479 | number = s[off]-(etype == ALPHA_UPPER ? 'A' : 'a')+1; |
---|
480 | ++off; |
---|
481 | |
---|
482 | h2x_assert(number>0 && number<MAX_ALLOWED_ENUM); |
---|
483 | |
---|
484 | if (s[off] != '.' && s[off] != ')') return NONE; |
---|
485 | if (s[off+1] != ' ') return NONE; |
---|
486 | |
---|
487 | // remove 'letter.' from string : |
---|
488 | ++off; |
---|
489 | while (s[off+1] == ' ') ++off; |
---|
490 | s.erase(astart, off-astart+1); |
---|
491 | |
---|
492 | return etype; |
---|
493 | } |
---|
494 | |
---|
495 | static bool startsWithNumber(string& s, unsigned& number) { |
---|
496 | // tests if first line starts with 'number.' |
---|
497 | // if true then 'number.' is removed from the string |
---|
498 | |
---|
499 | size_t off = s.find_first_not_of(" \n"); |
---|
500 | if (off == string::npos) return false; |
---|
501 | if (!isdigit(s[off])) return false; |
---|
502 | |
---|
503 | size_t num_start = off; |
---|
504 | number = 0; |
---|
505 | |
---|
506 | for (; isdigit(s[off]); ++off) { |
---|
507 | number = number*10 + (s[off]-'0'); |
---|
508 | } |
---|
509 | if (number>MAX_ALLOWED_ENUM) return false; |
---|
510 | |
---|
511 | if (s[off] != '.' && s[off] != ')') return false; |
---|
512 | if (s[off+1] != ' ') return false; |
---|
513 | |
---|
514 | // remove 'number.' from string : |
---|
515 | ++off; |
---|
516 | while (s[off+1] == ' ') ++off; |
---|
517 | s.erase(num_start, off-num_start+1); |
---|
518 | |
---|
519 | return true; |
---|
520 | } |
---|
521 | |
---|
522 | static EnumerationType detectLineEnumType(string& line, unsigned& number) { |
---|
523 | if (startsWithNumber(line, number)) return DIGITS; |
---|
524 | return startsWithLetter(line, number); |
---|
525 | } |
---|
526 | |
---|
527 | static void parseSection(Section& sec, const char *line, int indentation, Reader& reader) { |
---|
528 | string paragraph = line; |
---|
529 | size_t para_start_lineno = reader.getLineNo(); |
---|
530 | |
---|
531 | ParagraphType type = PLAIN_TEXT; |
---|
532 | EnumerationType etype = NONE; |
---|
533 | unsigned num = 0; |
---|
534 | |
---|
535 | unsigned last_alpha_num = -1; |
---|
536 | |
---|
537 | h2x_assert(sec.Content().empty()); |
---|
538 | |
---|
539 | while (1) { |
---|
540 | line = reader.getNext(); |
---|
541 | if (!line) break; |
---|
542 | |
---|
543 | if (isEmptyOrComment(line)) { |
---|
544 | pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); |
---|
545 | check_TODO(line, reader); |
---|
546 | } |
---|
547 | else { |
---|
548 | string keyword; |
---|
549 | const char *rest = extractKeyword(line, keyword); |
---|
550 | |
---|
551 | if (rest) { // a new keyword |
---|
552 | reader.back(); |
---|
553 | break; |
---|
554 | } |
---|
555 | |
---|
556 | check_TODO(line, reader); |
---|
557 | |
---|
558 | string Line = line; |
---|
559 | |
---|
560 | if (sec.get_type() == SEC_OCCURRENCE) { |
---|
561 | pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); |
---|
562 | } |
---|
563 | else { |
---|
564 | const char *firstNonWhite = firstChar(line); |
---|
565 | if (is_startof_itemlist_element(firstNonWhite)) { |
---|
566 | h2x_assert(firstNonWhite != line); |
---|
567 | |
---|
568 | pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); |
---|
569 | |
---|
570 | Line[firstNonWhite-line] = ' '; |
---|
571 | type = ITEM; // is reset in call to pushParagraph |
---|
572 | } |
---|
573 | else { |
---|
574 | unsigned foundNum; |
---|
575 | EnumerationType foundEtype = detectLineEnumType(Line, foundNum); |
---|
576 | |
---|
577 | if (foundEtype == ALPHA_UPPER || foundEtype == ALPHA_LOWER) { |
---|
578 | if (foundNum == (last_alpha_num+1) || foundNum == 1) { |
---|
579 | last_alpha_num = foundNum; |
---|
580 | } |
---|
581 | else { |
---|
582 | #if defined(WARN_IGNORED_ALPHA_ENUMS) |
---|
583 | add_warning(reader.attached_message("Ignoring non-consecutive alpha-enum")); |
---|
584 | #endif |
---|
585 | foundEtype = NONE; |
---|
586 | |
---|
587 | reader.back(); |
---|
588 | Line = reader.getNext(); |
---|
589 | last_alpha_num = -1; |
---|
590 | } |
---|
591 | } |
---|
592 | |
---|
593 | if (foundEtype != NONE) { |
---|
594 | pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); |
---|
595 | |
---|
596 | type = ENUMERATED; |
---|
597 | num = foundNum; |
---|
598 | etype = foundEtype; |
---|
599 | |
---|
600 | if (!num) { |
---|
601 | h2x_assert(etype == DIGITS); |
---|
602 | throw "Enumerations starting with zero are not supported"; |
---|
603 | } |
---|
604 | } |
---|
605 | } |
---|
606 | } |
---|
607 | |
---|
608 | if (paragraph.length()) { |
---|
609 | paragraph = paragraph+"\n"+Line; |
---|
610 | } |
---|
611 | else { |
---|
612 | paragraph = string("\n")+Line; |
---|
613 | para_start_lineno = reader.getLineNo(); |
---|
614 | } |
---|
615 | } |
---|
616 | } |
---|
617 | |
---|
618 | pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); |
---|
619 | |
---|
620 | if (sec.Content().size()>0 && indentation>0) { |
---|
621 | string spaces; |
---|
622 | spaces.reserve(indentation); |
---|
623 | spaces.append(indentation, ' '); |
---|
624 | |
---|
625 | string& ostr = sec.Content().front(); |
---|
626 | ostr = string("\n") + spaces + ostr; |
---|
627 | } |
---|
628 | } |
---|
629 | |
---|
630 | inline void check_specific_duplicates(const string& link, const Links& existing, bool add_warnings) { |
---|
631 | for (Links::const_iterator ex = existing.begin(); ex != existing.end(); ++ex) { |
---|
632 | if (ex->Target() == link) { |
---|
633 | if (add_warnings) add_warning(strf("First Link to '%s' was found here.", ex->Target().c_str()), ex->SourceLineno()); |
---|
634 | throw strf("Link to '%s' duplicated here.", link.c_str()); |
---|
635 | } |
---|
636 | } |
---|
637 | } |
---|
638 | inline void check_duplicates(const string& link, const Links& uplinks, const Links& references, bool add_warnings) { |
---|
639 | check_specific_duplicates(link, uplinks, add_warnings); |
---|
640 | check_specific_duplicates(link, references, add_warnings); |
---|
641 | } |
---|
642 | |
---|
643 | static void warnAboutDuplicate(SectionList& sections) { |
---|
644 | set<string> seen; |
---|
645 | SectionList::iterator end = sections.end(); |
---|
646 | for (SectionList::iterator s = sections.begin(); s != end; ++s) { |
---|
647 | const string& sname = s->getName(); |
---|
648 | if (sname == "NOTES") continue; // do not warn about multiple NOTES sections |
---|
649 | |
---|
650 | SectionList::iterator o = s; ++o; |
---|
651 | for (; o != end; ++o) { |
---|
652 | if (sname == o->getName()) { |
---|
653 | o->attach_warning("duplicated SECTION name"); |
---|
654 | if (seen.find(sname) == seen.end()) { |
---|
655 | s->attach_warning("name was first used"); |
---|
656 | seen.insert(sname); |
---|
657 | } |
---|
658 | } |
---|
659 | } |
---|
660 | } |
---|
661 | } |
---|
662 | |
---|
663 | void Helpfile::readHelp(istream& in, const string& filename) { |
---|
664 | if (!in.good()) throw unattached_message(strf("Can't read from '%s'", filename.c_str())); |
---|
665 | |
---|
666 | Reader read(in); |
---|
667 | |
---|
668 | inputfile = filename; // remember file read (for comment) |
---|
669 | |
---|
670 | const char *line; |
---|
671 | const char *name_only = strrchr(filename.c_str(), '/'); |
---|
672 | |
---|
673 | h2x_assert(name_only); |
---|
674 | ++name_only; |
---|
675 | |
---|
676 | try { |
---|
677 | while (1) { |
---|
678 | line = read.getNext(); |
---|
679 | if (!line) break; |
---|
680 | |
---|
681 | if (isEmptyOrComment(line)) { |
---|
682 | check_TODO(line, read); |
---|
683 | continue; |
---|
684 | } |
---|
685 | |
---|
686 | check_TODO(line, read); |
---|
687 | |
---|
688 | string keyword; |
---|
689 | const char *rest = extractKeyword(line, keyword); |
---|
690 | |
---|
691 | if (rest) { // found a keyword |
---|
692 | if (keyword == "UP") { |
---|
693 | rest = eatWhite(rest); |
---|
694 | if (strlen(rest)) { |
---|
695 | check_duplicates(rest, uplinks, references, true); |
---|
696 | if (strcmp(name_only, rest) == 0) throw "UP link to self"; |
---|
697 | |
---|
698 | uplinks.push_back(Link(rest, read.getLineNo())); |
---|
699 | } |
---|
700 | } |
---|
701 | else if (keyword == "SUB") { |
---|
702 | rest = eatWhite(rest); |
---|
703 | if (strlen(rest)) { |
---|
704 | check_duplicates(rest, uplinks, references, true); |
---|
705 | if (strcmp(name_only, rest) == 0) throw "SUB link to self"; |
---|
706 | |
---|
707 | references.push_back(Link(rest, read.getLineNo())); |
---|
708 | } |
---|
709 | } |
---|
710 | else if (keyword == "TITLE") { |
---|
711 | rest = eatWhite(rest); |
---|
712 | parseSection(title, rest, 0, read); |
---|
713 | |
---|
714 | if (title.Content().empty()) throw "empty TITLE not allowed"; |
---|
715 | |
---|
716 | const char *t = title.Content().front().c_str(); |
---|
717 | |
---|
718 | if (strstr(t, "Standard help file form")) { |
---|
719 | throw strf("Illegal title for help file: '%s'", t); |
---|
720 | } |
---|
721 | } |
---|
722 | else { |
---|
723 | if (keyword == "NOTE") keyword = "NOTES"; |
---|
724 | if (keyword == "EXAMPLE") keyword = "EXAMPLES"; |
---|
725 | if (keyword == "WARNING") keyword = "WARNINGS"; |
---|
726 | |
---|
727 | SectionType stype = SEC_NONE; |
---|
728 | int idx; |
---|
729 | for (idx = 0; idx<KNOWN_SECTION_TYPES; ++idx) { |
---|
730 | if (knownSections[idx] == keyword) { |
---|
731 | stype = SectionType(idx); |
---|
732 | break; |
---|
733 | } |
---|
734 | } |
---|
735 | |
---|
736 | size_t lineno = read.getLineNo(); |
---|
737 | |
---|
738 | if (idx >= KNOWN_SECTION_TYPES) throw strf("unknown keyword '%s'", keyword.c_str()); |
---|
739 | |
---|
740 | if (stype == SEC_SECTION) { |
---|
741 | string section_name = eatWhite(rest); |
---|
742 | Section sec(section_name, stype, lineno); |
---|
743 | parseSection(sec, "", 0, read); |
---|
744 | sections.push_back(sec); |
---|
745 | } |
---|
746 | else { |
---|
747 | Section sec(keyword, stype, lineno); |
---|
748 | rest = eatWhite(rest); |
---|
749 | parseSection(sec, rest, rest-line, read); |
---|
750 | sections.push_back(sec); |
---|
751 | } |
---|
752 | } |
---|
753 | } |
---|
754 | else { |
---|
755 | throw strf("Unhandled line"); |
---|
756 | } |
---|
757 | } |
---|
758 | |
---|
759 | warnAboutDuplicate(sections); |
---|
760 | } |
---|
761 | catch (string& err) { throw read.attached_message(err); } |
---|
762 | catch (const char *err) { throw read.attached_message(err); } |
---|
763 | } |
---|
764 | |
---|
765 | static bool shouldReflow(const string& s, int& foundIndentation) { |
---|
766 | // foundIndentation is only valid if shouldReflow() returns true |
---|
767 | enum { START, CHAR, SPACE, MULTIPLE, DOT, DOTSPACE } state = START; |
---|
768 | bool equal_indent = true; |
---|
769 | int lastIndent = -1; |
---|
770 | int thisIndent = 0; |
---|
771 | |
---|
772 | for (string::const_iterator c = s.begin(); c != s.end(); ++c, ++thisIndent) { |
---|
773 | if (*c == '\n') { |
---|
774 | state = START; |
---|
775 | thisIndent = 0; |
---|
776 | } |
---|
777 | else if (isWhite(*c)) { |
---|
778 | if (state == DOT || state == DOTSPACE) state = DOTSPACE; // multiple spaces after DOT are allowed |
---|
779 | else if (state == SPACE) state = MULTIPLE; // now seen multiple spaces |
---|
780 | else if (state == CHAR) state = SPACE; // now seen 1 space |
---|
781 | } |
---|
782 | else { |
---|
783 | if (state == MULTIPLE) return false; // character after multiple spaces |
---|
784 | if (state == START) { |
---|
785 | if (lastIndent == -1) lastIndent = thisIndent; |
---|
786 | else if (lastIndent != thisIndent) equal_indent = false; |
---|
787 | } |
---|
788 | state = (*c == '.' || *c == ',') ? DOT : CHAR; |
---|
789 | } |
---|
790 | } |
---|
791 | |
---|
792 | if (lastIndent<0) { |
---|
793 | equal_indent = false; |
---|
794 | } |
---|
795 | |
---|
796 | if (equal_indent) { |
---|
797 | foundIndentation = lastIndent-1; |
---|
798 | h2x_assert(foundIndentation >= 0); |
---|
799 | } |
---|
800 | return equal_indent; |
---|
801 | } |
---|
802 | |
---|
803 | static string correctSpaces(const string& text, int change) { |
---|
804 | h2x_assert(text.find('\n') == string::npos); |
---|
805 | |
---|
806 | if (!change) return text; |
---|
807 | |
---|
808 | size_t first = text.find_first_not_of(' '); |
---|
809 | if (first == string::npos) return ""; // empty line |
---|
810 | |
---|
811 | if (change<0) { |
---|
812 | int remove = -change; |
---|
813 | h2x_assert(remove <= int(first)); |
---|
814 | return text.substr(remove); |
---|
815 | } |
---|
816 | |
---|
817 | h2x_assert(change>0); // add spaces |
---|
818 | return string(change, ' ')+text; |
---|
819 | } |
---|
820 | |
---|
821 | static string correctIndentation(const string& text, int change) { |
---|
822 | // removes 'remove' spaces from every line |
---|
823 | |
---|
824 | size_t this_lineend = text.find('\n'); |
---|
825 | string result; |
---|
826 | |
---|
827 | if (this_lineend == string::npos) { |
---|
828 | result = correctSpaces(text, change); |
---|
829 | } |
---|
830 | else { |
---|
831 | result = correctSpaces(text.substr(0, this_lineend), change); |
---|
832 | |
---|
833 | while (this_lineend != string::npos) { |
---|
834 | size_t next_lineend = text.find('\n', this_lineend+1); |
---|
835 | if (next_lineend == string::npos) { // last line |
---|
836 | result = result+"\n"+correctSpaces(text.substr(this_lineend+1), change); |
---|
837 | } |
---|
838 | else { |
---|
839 | result = result+"\n"+correctSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1), change); |
---|
840 | } |
---|
841 | this_lineend = next_lineend; |
---|
842 | } |
---|
843 | } |
---|
844 | return result; |
---|
845 | } |
---|
846 | |
---|
847 | inline size_t countSpaces(const string& text) { |
---|
848 | size_t first = text.find_first_not_of(' '); |
---|
849 | if (first == string::npos) return INT_MAX; // empty line |
---|
850 | return first; |
---|
851 | } |
---|
852 | |
---|
853 | static size_t scanMinIndentation(const string& text) { |
---|
854 | size_t this_lineend = text.find('\n'); |
---|
855 | size_t min_indent = INT_MAX; |
---|
856 | |
---|
857 | if (this_lineend == string::npos) { |
---|
858 | min_indent = countSpaces(text); |
---|
859 | } |
---|
860 | else { |
---|
861 | while (this_lineend != string::npos) { |
---|
862 | size_t next_lineend = text.find('\n', this_lineend+1); |
---|
863 | if (next_lineend == string::npos) { |
---|
864 | min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1))); |
---|
865 | } |
---|
866 | else { |
---|
867 | min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1))); |
---|
868 | } |
---|
869 | this_lineend = next_lineend; |
---|
870 | } |
---|
871 | } |
---|
872 | |
---|
873 | if (min_indent == INT_MAX) min_indent = 0; // only empty lines |
---|
874 | return min_indent; |
---|
875 | } |
---|
876 | |
---|
877 | // ----------------------------- |
---|
878 | // class ParagraphTree |
---|
879 | |
---|
880 | class ParagraphTree FINAL_TYPE : public MessageAttachable, virtual Noncopyable { |
---|
881 | ParagraphTree *brother; // has same indentation as this |
---|
882 | ParagraphTree *son; // indentation + 1 |
---|
883 | |
---|
884 | Ostring otext; // text of the Section (containing linefeeds) |
---|
885 | |
---|
886 | bool reflow; // should the paragraph be reflown ? (true if indentation is equal for all lines of text) |
---|
887 | int indentation; // the real indentation of the blank (behind removed enumeration) |
---|
888 | |
---|
889 | |
---|
890 | string location_description() const OVERRIDE { return "in paragraph starting here"; } |
---|
891 | size_t line_number() const OVERRIDE { return otext.get_lineno(); } |
---|
892 | |
---|
893 | ParagraphTree(Ostrings::const_iterator begin, const Ostrings::const_iterator end) |
---|
894 | : son(NULp), |
---|
895 | otext(*begin), |
---|
896 | indentation(0) |
---|
897 | { |
---|
898 | h2x_assert(begin != end); |
---|
899 | |
---|
900 | string& text = otext; |
---|
901 | |
---|
902 | reflow = shouldReflow(text, indentation); |
---|
903 | if (!reflow) { |
---|
904 | size_t reststart = text.find('\n', 1); |
---|
905 | |
---|
906 | if (reststart == 0) { |
---|
907 | attach_warning("[internal] Paragraph starts with LF -> reflow calculation will probably fail"); |
---|
908 | } |
---|
909 | |
---|
910 | if (reststart != string::npos) { |
---|
911 | int rest_indent = -1; |
---|
912 | string rest = text.substr(reststart); |
---|
913 | bool rest_reflow = shouldReflow(rest, rest_indent); |
---|
914 | |
---|
915 | if (rest_reflow) { |
---|
916 | int first_indent = countSpaces(text.substr(1)); |
---|
917 | if (get_type() == PLAIN_TEXT) { |
---|
918 | size_t last = text.find_last_not_of(' ', reststart-1); |
---|
919 | bool is_header = last != string::npos && text[last] == ':'; |
---|
920 | |
---|
921 | if (!is_header && rest_indent == (first_indent+8)) { |
---|
922 | #if defined(DEBUG) |
---|
923 | size_t textstart = text.find_first_not_of(" \n"); |
---|
924 | h2x_assert(textstart != string::npos); |
---|
925 | #endif // DEBUG |
---|
926 | |
---|
927 | text = text.substr(0, reststart)+correctIndentation(rest, -8); |
---|
928 | reflow = shouldReflow(text, indentation); |
---|
929 | } |
---|
930 | } |
---|
931 | else { |
---|
932 | int diff = rest_indent-first_indent; |
---|
933 | if (diff>0) { |
---|
934 | text = text.substr(0, reststart)+correctIndentation(rest, -diff); |
---|
935 | reflow = shouldReflow(text, indentation); |
---|
936 | } |
---|
937 | else if (diff<0) { |
---|
938 | // paragraph with more indent on first line (occurs?) |
---|
939 | attach_warning(strf("[internal] unhandled: more indentation on the 1st line (diff=%i)", diff)); |
---|
940 | } |
---|
941 | } |
---|
942 | } |
---|
943 | } |
---|
944 | } |
---|
945 | |
---|
946 | if (!reflow) { |
---|
947 | indentation = scanMinIndentation(text); |
---|
948 | } |
---|
949 | text = correctIndentation(text, -indentation); |
---|
950 | if (get_type() == ITEM) { |
---|
951 | h2x_assert(indentation >= 2); |
---|
952 | indentation -= 2; |
---|
953 | } |
---|
954 | |
---|
955 | brother = buildParagraphTree(++begin, end); |
---|
956 | } |
---|
957 | |
---|
958 | void brothers_to_sons(ParagraphTree *new_brother); |
---|
959 | |
---|
960 | public: |
---|
961 | virtual ~ParagraphTree() { |
---|
962 | delete brother; |
---|
963 | delete son; |
---|
964 | } |
---|
965 | |
---|
966 | ParagraphType get_type() const { return otext.get_type(); } |
---|
967 | |
---|
968 | bool is_itemlist_member() const { return get_type() == ITEM; } |
---|
969 | unsigned get_enumeration() const { return get_type() == ENUMERATED ? otext.get_number() : 0; } |
---|
970 | EnumerationType get_enum_type() const { return otext.get_enum_type(); } |
---|
971 | |
---|
972 | const char *readable_type() const { |
---|
973 | const char *res = NULp; |
---|
974 | switch (get_type()) { |
---|
975 | case PLAIN_TEXT: res = "PLAIN_TEXT"; break; |
---|
976 | case ITEM: res = "ITEM"; break; |
---|
977 | case ENUMERATED: res = "ENUMERATED"; break; |
---|
978 | } |
---|
979 | return res; |
---|
980 | } |
---|
981 | |
---|
982 | size_t countTextNodes() { |
---|
983 | size_t nodes = 1; // this |
---|
984 | if (son) nodes += son->countTextNodes(); |
---|
985 | if (brother) nodes += brother->countTextNodes(); |
---|
986 | return nodes; |
---|
987 | } |
---|
988 | |
---|
989 | #if defined(DUMP_PARAGRAPHS) |
---|
990 | void print_indent(ostream& out, int indent) { while (indent-->0) out << ' '; } |
---|
991 | char *masknl(const char *text) { |
---|
992 | char *result = ARB_strdup(text); |
---|
993 | for (int i = 0; result[i]; ++i) { |
---|
994 | if (result[i] == '\n') result[i] = '|'; |
---|
995 | } |
---|
996 | return result; |
---|
997 | } |
---|
998 | void dump(ostream& out, int indent = 0) { |
---|
999 | print_indent(out, indent+1); |
---|
1000 | { |
---|
1001 | char *mtext = masknl(otext.as_string().c_str()); |
---|
1002 | out << "text='" << mtext << "'\n"; |
---|
1003 | free(mtext); |
---|
1004 | } |
---|
1005 | |
---|
1006 | print_indent(out, indent+1); |
---|
1007 | out << "type='" << readable_type() << "' "; |
---|
1008 | if (get_type() == ENUMERATED) { |
---|
1009 | out << "enumeration='" << otext.get_number() << "' "; |
---|
1010 | } |
---|
1011 | out << "reflow='" << reflow << "' "; |
---|
1012 | out << "indentation='" << indentation << "'\n"; |
---|
1013 | |
---|
1014 | if (son) { |
---|
1015 | print_indent(out, indent+2); cout << "son:\n"; |
---|
1016 | son->dump(out, indent+2); |
---|
1017 | cout << "\n"; |
---|
1018 | } |
---|
1019 | if (brother) { |
---|
1020 | print_indent(out, indent); cout << "brother:\n"; |
---|
1021 | brother->dump(out, indent); |
---|
1022 | } |
---|
1023 | } |
---|
1024 | #endif // DUMP_PARAGRAPHS |
---|
1025 | |
---|
1026 | private: |
---|
1027 | static ParagraphTree* buildParagraphTree(Ostrings::const_iterator begin, const Ostrings::const_iterator end) { |
---|
1028 | if (begin == end) return NULp; |
---|
1029 | return new ParagraphTree(begin, end); |
---|
1030 | } |
---|
1031 | public: |
---|
1032 | static ParagraphTree* buildParagraphTree(const Section& sec) { |
---|
1033 | const Ostrings& txt = sec.Content(); |
---|
1034 | if (txt.empty()) throw "attempt to build an empty ParagraphTree"; |
---|
1035 | return buildParagraphTree(txt.begin(), txt.end()); |
---|
1036 | } |
---|
1037 | |
---|
1038 | bool contains(ParagraphTree *that) { |
---|
1039 | return |
---|
1040 | this == that || |
---|
1041 | (son && son->contains(that)) || |
---|
1042 | (brother && brother->contains(that)); |
---|
1043 | } |
---|
1044 | |
---|
1045 | ParagraphTree *predecessor(ParagraphTree *before_this) { |
---|
1046 | if (brother == before_this) return this; |
---|
1047 | if (!brother) return NULp; |
---|
1048 | return brother->predecessor(before_this); |
---|
1049 | } |
---|
1050 | |
---|
1051 | void append(ParagraphTree *new_brother) { |
---|
1052 | if (!brother) brother = new_brother; |
---|
1053 | else brother->append(new_brother); |
---|
1054 | } |
---|
1055 | |
---|
1056 | bool is_some_brother(const ParagraphTree *other) const { |
---|
1057 | return (other == brother) || (brother && brother->is_some_brother(other)); |
---|
1058 | } |
---|
1059 | |
---|
1060 | ParagraphTree* takeAllInFrontOf(ParagraphTree *after) { |
---|
1061 | ParagraphTree *removed = this; |
---|
1062 | ParagraphTree *after_pred = this; |
---|
1063 | |
---|
1064 | h2x_assert(is_some_brother(after)); |
---|
1065 | |
---|
1066 | while (1) { |
---|
1067 | h2x_assert(after_pred); |
---|
1068 | h2x_assert(after_pred->brother); // takeAllInFrontOf called with non-existing 'after' |
---|
1069 | |
---|
1070 | if (after_pred->brother == after) { // found after |
---|
1071 | after_pred->brother = NULp; // unlink |
---|
1072 | break; |
---|
1073 | } |
---|
1074 | after_pred = after_pred->brother; |
---|
1075 | } |
---|
1076 | |
---|
1077 | return removed; |
---|
1078 | } |
---|
1079 | |
---|
1080 | ParagraphTree *firstListMember() { |
---|
1081 | switch (get_type()) { |
---|
1082 | case PLAIN_TEXT: break; |
---|
1083 | case ITEM: return this; |
---|
1084 | case ENUMERATED: { |
---|
1085 | if (get_enumeration() == 1) return this; |
---|
1086 | break; |
---|
1087 | } |
---|
1088 | } |
---|
1089 | if (brother) return brother->firstListMember(); |
---|
1090 | return NULp; |
---|
1091 | } |
---|
1092 | |
---|
1093 | ParagraphTree *nextListMemberAfter(const ParagraphTree& previous) { |
---|
1094 | if (indentation<previous.indentation) return NULp; |
---|
1095 | if (indentation == previous.indentation && get_type() == previous.get_type()) { |
---|
1096 | if (get_type() != ENUMERATED) return this; |
---|
1097 | if (get_enumeration() > previous.get_enumeration()) return this; |
---|
1098 | return NULp; |
---|
1099 | } |
---|
1100 | if (!brother) return NULp; |
---|
1101 | return brother->nextListMemberAfter(previous); |
---|
1102 | } |
---|
1103 | ParagraphTree *nextListMember() const { |
---|
1104 | return brother ? brother->nextListMemberAfter(*this) : NULp; |
---|
1105 | } |
---|
1106 | |
---|
1107 | ParagraphTree* firstWithLessIndentThan(int wanted_indentation) { |
---|
1108 | if (indentation < wanted_indentation) return this; |
---|
1109 | if (!brother) return NULp; |
---|
1110 | return brother->firstWithLessIndentThan(wanted_indentation); |
---|
1111 | } |
---|
1112 | |
---|
1113 | void format_indentations(); |
---|
1114 | void format_lists(); |
---|
1115 | |
---|
1116 | private: |
---|
1117 | static ParagraphTree* buildNewParagraph(const string& Text, size_t beginLineNo, ParagraphType type) { |
---|
1118 | Ostrings S; |
---|
1119 | S.push_back(Ostring(Text, beginLineNo, type)); |
---|
1120 | return new ParagraphTree(S.begin(), S.end()); |
---|
1121 | } |
---|
1122 | ParagraphTree *xml_write_list_contents(); |
---|
1123 | ParagraphTree *xml_write_enum_contents(); |
---|
1124 | void xml_write_textblock(); |
---|
1125 | |
---|
1126 | public: |
---|
1127 | void xml_write(); |
---|
1128 | }; |
---|
1129 | |
---|
1130 | #if defined(DUMP_PARAGRAPHS) |
---|
1131 | static void dump_paragraph(ParagraphTree *para) { |
---|
1132 | // helper function for use in gdb |
---|
1133 | para->dump(cout, 0); |
---|
1134 | } |
---|
1135 | #endif |
---|
1136 | |
---|
1137 | void ParagraphTree::brothers_to_sons(ParagraphTree *new_brother) { |
---|
1138 | /*! folds down brothers to sons |
---|
1139 | * @param new_brother brother of 'this->brother', will become new brother. |
---|
1140 | * If new_brother == NULp -> make all brothers sons. |
---|
1141 | */ |
---|
1142 | |
---|
1143 | if (new_brother) { |
---|
1144 | h2x_assert(is_some_brother(new_brother)); |
---|
1145 | |
---|
1146 | if (brother != new_brother) { |
---|
1147 | #if defined(DEBUG) |
---|
1148 | if (son) { |
---|
1149 | son->attach_warning("Found unexpected son (in brothers_to_sons)"); |
---|
1150 | brother->attach_warning("while trying to transform paragraphs from here .."); |
---|
1151 | new_brother->attach_warning(".. to here .."); |
---|
1152 | attach_warning(".. into sons of this paragraph."); |
---|
1153 | return; |
---|
1154 | } |
---|
1155 | #endif |
---|
1156 | |
---|
1157 | h2x_assert(!son); |
---|
1158 | h2x_assert(brother); |
---|
1159 | |
---|
1160 | if (!new_brother) { // all brothers -> sons |
---|
1161 | son = brother; |
---|
1162 | brother = NULp; |
---|
1163 | } |
---|
1164 | else { |
---|
1165 | son = brother->takeAllInFrontOf(new_brother); |
---|
1166 | brother = new_brother; |
---|
1167 | } |
---|
1168 | } |
---|
1169 | } |
---|
1170 | else { |
---|
1171 | h2x_assert(!son); |
---|
1172 | son = brother; |
---|
1173 | brother = NULp; |
---|
1174 | } |
---|
1175 | } |
---|
1176 | void ParagraphTree::format_lists() { |
---|
1177 | // reformats tree such that all items/enumerations are brothers |
---|
1178 | ParagraphTree *member = firstListMember(); |
---|
1179 | if (member) { |
---|
1180 | for (ParagraphTree *curr = this; curr != member; curr = curr->brother) { |
---|
1181 | h2x_assert(curr); |
---|
1182 | if (curr->son) curr->son->format_lists(); |
---|
1183 | } |
---|
1184 | |
---|
1185 | for (ParagraphTree *next = member->nextListMember(); |
---|
1186 | next; |
---|
1187 | member = next, next = member->nextListMember()) |
---|
1188 | { |
---|
1189 | member->brothers_to_sons(next); |
---|
1190 | h2x_assert(member->brother == next); |
---|
1191 | |
---|
1192 | if (member->son) member->son->format_lists(); |
---|
1193 | } |
---|
1194 | |
---|
1195 | h2x_assert(!member->son); // member is the last item |
---|
1196 | |
---|
1197 | if (member->brother) { |
---|
1198 | ParagraphTree *non_member = member->brother->firstWithLessIndentThan(member->indentation+1); |
---|
1199 | member->brothers_to_sons(non_member); |
---|
1200 | } |
---|
1201 | |
---|
1202 | if (member->son) member->son->format_lists(); |
---|
1203 | if (member->brother) member->brother->format_lists(); |
---|
1204 | } |
---|
1205 | else { |
---|
1206 | for (ParagraphTree *curr = this; curr; curr = curr->brother) { |
---|
1207 | h2x_assert(curr); |
---|
1208 | if (curr->son) curr->son->format_lists(); |
---|
1209 | } |
---|
1210 | } |
---|
1211 | } |
---|
1212 | |
---|
1213 | void ParagraphTree::format_indentations() { |
---|
1214 | if (brother) { |
---|
1215 | ParagraphTree *same_indent = brother->firstWithLessIndentThan(indentation+1); |
---|
1216 | #if defined(WARN_POSSIBLY_WRONG_INDENTATION_CORRECTION) |
---|
1217 | if (same_indent && indentation != same_indent->indentation) { |
---|
1218 | same_indent->attach_warning("indentation is assumed to be same as .."); |
---|
1219 | attach_warning(".. here"); |
---|
1220 | } |
---|
1221 | #endif |
---|
1222 | brothers_to_sons(same_indent); // if same_indent is NULp -> make all brothers childs |
---|
1223 | if (brother) brother->format_indentations(); |
---|
1224 | } |
---|
1225 | |
---|
1226 | if (son) son->format_indentations(); |
---|
1227 | } |
---|
1228 | |
---|
1229 | // ----------------- |
---|
1230 | // LinkType |
---|
1231 | |
---|
1232 | enum LinkType { |
---|
1233 | LT_UNKNOWN = 0, |
---|
1234 | LT_HTTP = 1, |
---|
1235 | LT_HTTPS = 2, |
---|
1236 | LT_FTP = 4, |
---|
1237 | LT_FILE = 8, |
---|
1238 | LT_EMAIL = 16, |
---|
1239 | LT_HLP = 32, |
---|
1240 | LT_PS = 64, |
---|
1241 | LT_PDF = 128 |
---|
1242 | }; |
---|
1243 | |
---|
1244 | static const char *link_id[] = { |
---|
1245 | "unknown", |
---|
1246 | "www", // "http:" |
---|
1247 | "www", // "https:" |
---|
1248 | "www", // "ftp:" |
---|
1249 | "www", // "file:" |
---|
1250 | "email", |
---|
1251 | "hlp", |
---|
1252 | "ps", |
---|
1253 | "pdf", |
---|
1254 | }; |
---|
1255 | |
---|
1256 | static string LinkType2id(LinkType type) { |
---|
1257 | int idx = 0; |
---|
1258 | while (type >= 1) { |
---|
1259 | idx++; |
---|
1260 | type = LinkType(type>>1); |
---|
1261 | } |
---|
1262 | return link_id[idx]; |
---|
1263 | } |
---|
1264 | |
---|
1265 | inline const char *getExtension(const string& name) { |
---|
1266 | size_t last_dot = name.find_last_of('.'); |
---|
1267 | if (last_dot == string::npos) { |
---|
1268 | return NULp; |
---|
1269 | } |
---|
1270 | return name.c_str()+last_dot+1; |
---|
1271 | } |
---|
1272 | |
---|
1273 | static LinkType detectLinkType(const string& link_target) { |
---|
1274 | LinkType type = LT_UNKNOWN; |
---|
1275 | const char *ext = getExtension(link_target); |
---|
1276 | |
---|
1277 | if (ext && strcasecmp(ext, "hlp") == 0) type = LT_HLP; |
---|
1278 | else if (link_target.find("http://") == 0) type = LT_HTTP; |
---|
1279 | else if (link_target.find("https://") == 0) type = LT_HTTPS; |
---|
1280 | else if (link_target.find("ftp://") == 0) type = LT_FTP; |
---|
1281 | else if (link_target.find("file://") == 0) type = LT_FILE; |
---|
1282 | else if (link_target.find('@') != string::npos) type = LT_EMAIL; |
---|
1283 | else if (ext && strcasecmp(ext, "ps") == 0) type = LT_PS; |
---|
1284 | else if (ext && strcasecmp(ext, "pdf") == 0) type = LT_PDF; |
---|
1285 | |
---|
1286 | return type; |
---|
1287 | } |
---|
1288 | |
---|
1289 | // -------------------------------------------------------------------------------- |
---|
1290 | |
---|
1291 | |
---|
1292 | |
---|
1293 | static string locate_helpfile(const string& helpname) { |
---|
1294 | // search for 'helpname' in various helpfile locations |
---|
1295 | |
---|
1296 | #define PATHS 2 |
---|
1297 | static string path[PATHS] = { "oldhelp/", "genhelp/" }; |
---|
1298 | struct stat st; |
---|
1299 | |
---|
1300 | for (size_t p = 0; p<PATHS; p++) { |
---|
1301 | string fullname = path[p]+helpname; |
---|
1302 | if (stat(fullname.c_str(), &st) == 0) { |
---|
1303 | return fullname; |
---|
1304 | } |
---|
1305 | } |
---|
1306 | return ""; |
---|
1307 | #undef PATHS |
---|
1308 | } |
---|
1309 | |
---|
1310 | static string locate_document(const string& docname) { |
---|
1311 | // search for 'docname' or 'docname.gz' in various helpfile locations |
---|
1312 | |
---|
1313 | string located = locate_helpfile(docname); |
---|
1314 | if (located.empty()) { |
---|
1315 | located = locate_helpfile(docname+".gz"); |
---|
1316 | } |
---|
1317 | return located; |
---|
1318 | } |
---|
1319 | |
---|
1320 | static void add_link_attributes(XML_Tag& link, LinkType type, const string& dest, size_t source_line) { |
---|
1321 | if (type == LT_UNKNOWN) { |
---|
1322 | string msg = string("Invalid link (dest='")+dest+"')"; |
---|
1323 | throw LineAttachedMessage(msg, source_line); |
---|
1324 | } |
---|
1325 | |
---|
1326 | link.add_attribute("dest", dest); |
---|
1327 | link.add_attribute("type", LinkType2id(type)); |
---|
1328 | link.add_attribute("source_line", source_line); |
---|
1329 | |
---|
1330 | if (type&(LT_HLP|LT_PDF|LT_PS)) { // other links (www, email) cannot be checked for existence here |
---|
1331 | string fullhelp = ((type<_HLP) ? locate_helpfile : locate_document)(dest); |
---|
1332 | if (fullhelp.empty()) { |
---|
1333 | link.add_attribute("missing", "1"); |
---|
1334 | string deadlink = strf("Dead link to '%s'", dest.c_str()); |
---|
1335 | #if defined(DEVEL_RELEASE) |
---|
1336 | throw LineAttachedMessage(deadlink, source_line); |
---|
1337 | #else // !defined(DEVEL_RELEASE) |
---|
1338 | add_warning(deadlink, source_line); |
---|
1339 | #endif |
---|
1340 | } |
---|
1341 | } |
---|
1342 | } |
---|
1343 | |
---|
1344 | static void print_XML_Text_expanding_links(const string& text, size_t lineNo) { |
---|
1345 | size_t found = text.find("LINK{", 0); |
---|
1346 | if (found != string::npos) { |
---|
1347 | size_t inside_link = found+5; |
---|
1348 | size_t close = text.find('}', inside_link); |
---|
1349 | |
---|
1350 | if (close == string::npos) throw "unclosed 'LINK{}'"; |
---|
1351 | |
---|
1352 | string link_target = text.substr(inside_link, close-inside_link); |
---|
1353 | LinkType type = detectLinkType(link_target); |
---|
1354 | string dest = link_target; |
---|
1355 | |
---|
1356 | XML_Text(text.substr(0, found)); |
---|
1357 | |
---|
1358 | { |
---|
1359 | XML_Tag link("LINK"); |
---|
1360 | link.set_on_extra_line(false); |
---|
1361 | add_link_attributes(link, type, dest, lineNo); |
---|
1362 | } |
---|
1363 | |
---|
1364 | print_XML_Text_expanding_links(text.substr(close+1), lineNo); |
---|
1365 | } |
---|
1366 | else { |
---|
1367 | XML_Text t(text); |
---|
1368 | } |
---|
1369 | } |
---|
1370 | |
---|
1371 | void ParagraphTree::xml_write_textblock() { |
---|
1372 | XML_Tag textblock("T"); |
---|
1373 | textblock.add_attribute("reflow", reflow ? "1" : "0"); |
---|
1374 | |
---|
1375 | { |
---|
1376 | string usedText; |
---|
1377 | const string& text = otext; |
---|
1378 | if (reflow) { |
---|
1379 | usedText = correctIndentation(text, (textblock.Indent()+1) * the_XML_Document->indentation_per_level); |
---|
1380 | } |
---|
1381 | else { |
---|
1382 | usedText = text; |
---|
1383 | } |
---|
1384 | print_XML_Text_expanding_links(usedText, otext.get_lineno()); |
---|
1385 | } |
---|
1386 | } |
---|
1387 | |
---|
1388 | ParagraphTree *ParagraphTree::xml_write_list_contents() { |
---|
1389 | h2x_assert(is_itemlist_member()); |
---|
1390 | #if defined(WARN_FIXED_LAYOUT_LIST_ELEMENTS) |
---|
1391 | if (!reflow) attach_warning("ITEM not reflown (check output)"); |
---|
1392 | #endif |
---|
1393 | { |
---|
1394 | XML_Tag entry("ENTRY"); |
---|
1395 | entry.add_attribute("item", "1"); |
---|
1396 | xml_write_textblock(); |
---|
1397 | if (son) son->xml_write(); |
---|
1398 | } |
---|
1399 | if (brother && brother->is_itemlist_member()) { |
---|
1400 | return brother->xml_write_list_contents(); |
---|
1401 | } |
---|
1402 | return brother; |
---|
1403 | } |
---|
1404 | ParagraphTree *ParagraphTree::xml_write_enum_contents() { |
---|
1405 | h2x_assert(get_enumeration()); |
---|
1406 | #if defined(WARN_FIXED_LAYOUT_LIST_ELEMENTS) |
---|
1407 | if (!reflow) attach_warning("ENUMERATED not reflown (check output)"); |
---|
1408 | #endif |
---|
1409 | { |
---|
1410 | XML_Tag entry("ENTRY"); |
---|
1411 | switch (get_enum_type()) { |
---|
1412 | case DIGITS: |
---|
1413 | entry.add_attribute("enumerated", strf("%i", get_enumeration())); |
---|
1414 | break; |
---|
1415 | case ALPHA_UPPER: |
---|
1416 | entry.add_attribute("enumerated", strf("%c", 'A'-1+get_enumeration())); |
---|
1417 | break; |
---|
1418 | case ALPHA_LOWER: |
---|
1419 | entry.add_attribute("enumerated", strf("%c", 'a'-1+get_enumeration())); |
---|
1420 | break; |
---|
1421 | default: |
---|
1422 | h2x_assert(0); |
---|
1423 | break; |
---|
1424 | } |
---|
1425 | xml_write_textblock(); |
---|
1426 | if (son) son->xml_write(); |
---|
1427 | } |
---|
1428 | if (brother && brother->get_enumeration()) { |
---|
1429 | int diff = brother->get_enumeration()-get_enumeration(); |
---|
1430 | if (diff != 1) { |
---|
1431 | attach_warning("Non-consecutive enumeration detected between here.."); |
---|
1432 | brother->attach_warning(".. and here"); |
---|
1433 | } |
---|
1434 | return brother->xml_write_enum_contents(); |
---|
1435 | } |
---|
1436 | return brother; |
---|
1437 | } |
---|
1438 | |
---|
1439 | void ParagraphTree::xml_write() { |
---|
1440 | try { |
---|
1441 | ParagraphTree *next = NULp; |
---|
1442 | if (get_enumeration()) { |
---|
1443 | XML_Tag enu("ENUM"); |
---|
1444 | if (get_enumeration() != 1) { |
---|
1445 | attach_warning(strf("First enum starts with '%u.' (maybe previous enum was not detected)", get_enumeration())); |
---|
1446 | } |
---|
1447 | next = xml_write_enum_contents(); |
---|
1448 | #if defined(WARN_LONESOME_ENUM_ELEMENTS) |
---|
1449 | if (next == brother) attach_warning("Suspicious single-element-ENUM"); |
---|
1450 | #endif |
---|
1451 | } |
---|
1452 | else if (is_itemlist_member()) { |
---|
1453 | XML_Tag list("LIST"); |
---|
1454 | next = xml_write_list_contents(); |
---|
1455 | #if defined(WARN_LONESOME_LIST_ELEMENTS) |
---|
1456 | if (next == brother) attach_warning("Suspicious single-element-LIST"); |
---|
1457 | #endif |
---|
1458 | } |
---|
1459 | else { |
---|
1460 | { |
---|
1461 | XML_Tag para("P"); |
---|
1462 | xml_write_textblock(); |
---|
1463 | if (son) son->xml_write(); |
---|
1464 | } |
---|
1465 | next = brother; |
---|
1466 | } |
---|
1467 | if (next) next->xml_write(); |
---|
1468 | } |
---|
1469 | catch (string& err) { throw attached_message(err); } |
---|
1470 | catch (const char *err) { throw attached_message(err); } |
---|
1471 | } |
---|
1472 | |
---|
1473 | static void create_top_links(const Links& links, const char *tag) { |
---|
1474 | for (Links::const_iterator s = links.begin(); s != links.end(); ++s) { |
---|
1475 | XML_Tag link(tag); |
---|
1476 | add_link_attributes(link, detectLinkType(s->Target()), s->Target(), s->SourceLineno()); |
---|
1477 | } |
---|
1478 | } |
---|
1479 | |
---|
1480 | void Helpfile::writeXML(FILE *out, const string& page_name) { |
---|
1481 | XML_Document xml("PAGE", "arb_help.dtd", out); |
---|
1482 | |
---|
1483 | xml.skip_empty_tags = true; |
---|
1484 | xml.indentation_per_level = 2; |
---|
1485 | |
---|
1486 | xml.getRoot().add_attribute("name", page_name); |
---|
1487 | #if defined(DEBUG) |
---|
1488 | xml.getRoot().add_attribute("edit_warning", "devel"); // inserts a edit warning into development version |
---|
1489 | #else |
---|
1490 | xml.getRoot().add_attribute("edit_warning", "release"); // inserts a different edit warning into release version |
---|
1491 | #endif // DEBUG |
---|
1492 | |
---|
1493 | xml.getRoot().add_attribute("source", inputfile.c_str()); |
---|
1494 | |
---|
1495 | { |
---|
1496 | XML_Comment(string("automatically generated from ../")+inputfile+' '); |
---|
1497 | } |
---|
1498 | |
---|
1499 | create_top_links(uplinks, "UP"); |
---|
1500 | create_top_links(references, "SUB"); |
---|
1501 | create_top_links(auto_references, "SUB"); |
---|
1502 | |
---|
1503 | { |
---|
1504 | XML_Tag title_tag("TITLE"); |
---|
1505 | const Ostrings& T = title.Content(); |
---|
1506 | for (Ostrings::const_iterator s = T.begin(); s != T.end(); ++s) { |
---|
1507 | if (s != T.begin()) { XML_Text text("\n"); } |
---|
1508 | XML_Text text(*s); |
---|
1509 | } |
---|
1510 | } |
---|
1511 | |
---|
1512 | for (SectionList::const_iterator sec = sections.begin(); sec != sections.end(); ++sec) { |
---|
1513 | try { |
---|
1514 | XML_Tag section_tag("SECTION"); |
---|
1515 | section_tag.add_attribute("name", sec->getName()); |
---|
1516 | |
---|
1517 | ParagraphTree *ptree = ParagraphTree::buildParagraphTree(*sec); |
---|
1518 | |
---|
1519 | #if defined(DEBUG) |
---|
1520 | size_t textnodes = ptree->countTextNodes(); |
---|
1521 | #endif |
---|
1522 | #if defined(DUMP_PARAGRAPHS) |
---|
1523 | cout << "Dump of section '" << sec->getName() << "' (before format_lists):\n"; |
---|
1524 | ptree->dump(cout); |
---|
1525 | cout << "----------------------------------------\n"; |
---|
1526 | #endif |
---|
1527 | |
---|
1528 | ptree->format_lists(); |
---|
1529 | |
---|
1530 | #if defined(DUMP_PARAGRAPHS) |
---|
1531 | cout << "Dump of section '" << sec->getName() << "' (after format_lists):\n"; |
---|
1532 | ptree->dump(cout); |
---|
1533 | cout << "----------------------------------------\n"; |
---|
1534 | #endif |
---|
1535 | #if defined(DEBUG) |
---|
1536 | size_t textnodes2 = ptree->countTextNodes(); |
---|
1537 | h2x_assert(textnodes2 == textnodes); // if this occurs format_lists has an error |
---|
1538 | #endif |
---|
1539 | |
---|
1540 | ptree->format_indentations(); |
---|
1541 | |
---|
1542 | #if defined(DUMP_PARAGRAPHS) |
---|
1543 | cout << "Dump of section '" << sec->getName() << "' (after format_indentations):\n"; |
---|
1544 | ptree->dump(cout); |
---|
1545 | cout << "----------------------------------------\n"; |
---|
1546 | #endif |
---|
1547 | #if defined(DEBUG) |
---|
1548 | size_t textnodes3 = ptree->countTextNodes(); |
---|
1549 | h2x_assert(textnodes3 == textnodes2); // if this occurs format_indentations has an error |
---|
1550 | #endif |
---|
1551 | |
---|
1552 | ptree->xml_write(); |
---|
1553 | |
---|
1554 | delete ptree; |
---|
1555 | } |
---|
1556 | catch (string& err) { throw sec->attached_message(err); } |
---|
1557 | catch (const char *err) { throw sec->attached_message(err); } |
---|
1558 | } |
---|
1559 | } |
---|
1560 | |
---|
1561 | void Helpfile::extractInternalLinks() { |
---|
1562 | for (SectionList::const_iterator sec = sections.begin(); sec != sections.end(); ++sec) { |
---|
1563 | try { |
---|
1564 | const Ostrings& s = sec->Content(); |
---|
1565 | |
---|
1566 | for (Ostrings::const_iterator li = s.begin(); li != s.end(); ++li) { |
---|
1567 | const string& line = *li; |
---|
1568 | size_t start = 0; |
---|
1569 | |
---|
1570 | while (1) { |
---|
1571 | size_t found = line.find("LINK{", start); |
---|
1572 | if (found == string::npos) break; |
---|
1573 | found += 5; |
---|
1574 | size_t close = line.find('}', found); |
---|
1575 | if (close == string::npos) break; |
---|
1576 | |
---|
1577 | string link_target = line.substr(found, close-found); |
---|
1578 | |
---|
1579 | if (link_target.find("http://") == string::npos && |
---|
1580 | link_target.find("https://")== string::npos && |
---|
1581 | link_target.find("ftp://") == string::npos && |
---|
1582 | link_target.find("file://") == string::npos && |
---|
1583 | link_target.find('@') == string::npos) |
---|
1584 | { |
---|
1585 | check_self_ref(link_target); |
---|
1586 | |
---|
1587 | try { |
---|
1588 | check_specific_duplicates(link_target, references, false); // check only sublinks here |
---|
1589 | check_specific_duplicates(link_target, uplinks, false); // check only uplinks here |
---|
1590 | check_specific_duplicates(link_target, auto_references, false); // check only sublinks here |
---|
1591 | |
---|
1592 | // only auto-add inline reference if none of the above checks has thrown |
---|
1593 | auto_references.push_back(Link(link_target, sec->line_number())); |
---|
1594 | } |
---|
1595 | catch (string& err) { |
---|
1596 | ; // silently ignore inlined |
---|
1597 | } |
---|
1598 | } |
---|
1599 | start = close+1; |
---|
1600 | } |
---|
1601 | } |
---|
1602 | } |
---|
1603 | catch (string& err) { |
---|
1604 | throw sec->attached_message("'"+err+"' while scanning LINK{}"); |
---|
1605 | } |
---|
1606 | } |
---|
1607 | } |
---|
1608 | |
---|
1609 | static void show_err(const string& err, size_t lineno, const string& helpfile) { |
---|
1610 | if (err.find(helpfile+':') != string::npos) { |
---|
1611 | cerr << err; |
---|
1612 | } |
---|
1613 | else if (lineno == NO_LINENUMBER_INFO) { |
---|
1614 | cerr << helpfile << ":1: [in unknown line] " << err; |
---|
1615 | } |
---|
1616 | else { |
---|
1617 | cerr << helpfile << ":" << lineno << ": " << err; |
---|
1618 | } |
---|
1619 | cerr << '\n'; |
---|
1620 | } |
---|
1621 | inline void show_err(const LineAttachedMessage& line_err, const string& helpfile) { |
---|
1622 | show_err(line_err.Message(), line_err.Lineno(), helpfile); |
---|
1623 | } |
---|
1624 | inline void show_warning(const LineAttachedMessage& line_err, const string& helpfile) { |
---|
1625 | show_err(string("Warning: ")+line_err.Message(), line_err.Lineno(), helpfile); |
---|
1626 | } |
---|
1627 | inline void show_warnings(const string& helpfile) { |
---|
1628 | for (list<LineAttachedMessage>::const_iterator wi = warnings.begin(); wi != warnings.end(); ++wi) { |
---|
1629 | show_warning(*wi, helpfile); |
---|
1630 | } |
---|
1631 | } |
---|
1632 | static void show_error_and_warnings(const LineAttachedMessage& error, const string& helpfile) { |
---|
1633 | show_err(error, helpfile); |
---|
1634 | show_warnings(helpfile); |
---|
1635 | } |
---|
1636 | |
---|
1637 | int ARB_main(int argc, char *argv[]) { |
---|
1638 | if (argc != 3) { |
---|
1639 | cerr << "Usage: arb_help2xml <ARB helpfile> <XML output>\n"; |
---|
1640 | return EXIT_FAILURE; |
---|
1641 | } |
---|
1642 | |
---|
1643 | Helpfile help; |
---|
1644 | string arb_help; |
---|
1645 | |
---|
1646 | try { |
---|
1647 | try { |
---|
1648 | arb_help = argv[1]; |
---|
1649 | string xml_output = argv[2]; |
---|
1650 | |
---|
1651 | { |
---|
1652 | ifstream in(arb_help.c_str()); |
---|
1653 | help.readHelp(in, arb_help); |
---|
1654 | } |
---|
1655 | |
---|
1656 | help.extractInternalLinks(); |
---|
1657 | |
---|
1658 | { |
---|
1659 | FILE *out = std::fopen(xml_output.c_str(), "wt"); |
---|
1660 | if (!out) throw string("Can't open '")+xml_output+'\''; |
---|
1661 | |
---|
1662 | try { |
---|
1663 | // arb_help contains 'oldhelp/name.hlp' |
---|
1664 | size_t slash = arb_help.find('/'); |
---|
1665 | size_t dot = arb_help.find_last_of('.'); |
---|
1666 | |
---|
1667 | if (slash == string::npos || dot == string::npos) { |
---|
1668 | throw string("parameter <ARB helpfile> has to be in format 'oldhelp/name.hlp' (not '"+arb_help+"')"); |
---|
1669 | } |
---|
1670 | |
---|
1671 | string page_name(arb_help, slash+1, dot-slash-1); |
---|
1672 | help.writeXML(out, page_name); |
---|
1673 | fclose(out); |
---|
1674 | } |
---|
1675 | catch (...) { |
---|
1676 | fclose(out); |
---|
1677 | remove(xml_output.c_str()); |
---|
1678 | throw; |
---|
1679 | } |
---|
1680 | } |
---|
1681 | |
---|
1682 | show_warnings(arb_help); |
---|
1683 | |
---|
1684 | return EXIT_SUCCESS; |
---|
1685 | } |
---|
1686 | catch (string& err) { throw unattached_message(err); } |
---|
1687 | catch (const char * err) { throw unattached_message(err); } |
---|
1688 | catch (LineAttachedMessage& err) { throw; } |
---|
1689 | catch (...) { throw unattached_message("unknown exception in arb_help2xml"); } |
---|
1690 | } |
---|
1691 | catch (LineAttachedMessage& err) { show_error_and_warnings(err, arb_help); } |
---|
1692 | catch (...) { h2x_assert(0); } |
---|
1693 | |
---|
1694 | return EXIT_FAILURE; |
---|
1695 | } |
---|
1696 | |
---|
1697 | // -------------------------------------------------------------------------------- |
---|
1698 | |
---|
1699 | #ifdef UNIT_TESTS |
---|
1700 | #include <test_unit.h> |
---|
1701 | #include <arb_msg.h> |
---|
1702 | |
---|
1703 | static arb_test::match_expectation help_file_compiles(const char *helpfile, const char *expected_title, const char *expected_error_part) { |
---|
1704 | using namespace arb_test; |
---|
1705 | expectation_group expected; |
---|
1706 | |
---|
1707 | ifstream in(helpfile); |
---|
1708 | |
---|
1709 | LineAttachedMessage *error = NULp; |
---|
1710 | |
---|
1711 | Helpfile help; |
---|
1712 | try { |
---|
1713 | help.readHelp(in, helpfile); |
---|
1714 | help.extractInternalLinks(); |
---|
1715 | |
---|
1716 | FILE *devnul = fopen("/dev/null", "wt"); |
---|
1717 | if (!devnul) throw unattached_message("can't write to null device"); |
---|
1718 | help.writeXML(devnul, "dummy"); |
---|
1719 | fclose(devnul); |
---|
1720 | } |
---|
1721 | catch (LineAttachedMessage& err) { error = new LineAttachedMessage(err); } |
---|
1722 | catch (...) { error = new LineAttachedMessage(unattached_message("unknown exception")); } |
---|
1723 | |
---|
1724 | if (expected_error_part) { |
---|
1725 | expected.add(that(error).does_differ_from_NULL()); |
---|
1726 | if (error) expected.add(that(error->Message()).does_contain(expected_error_part)); |
---|
1727 | } |
---|
1728 | else { |
---|
1729 | expected.add(that(error).is_equal_to_NULL()); |
---|
1730 | if (!error) { |
---|
1731 | Section title = help.get_title(); |
---|
1732 | const Ostrings& title_strings = title.Content(); |
---|
1733 | |
---|
1734 | expected.add(that(title_strings.front().as_string()).is_equal_to(expected_title)); |
---|
1735 | expected.add(that(title_strings.size()).is_equal_to(1)); |
---|
1736 | } |
---|
1737 | else { |
---|
1738 | show_error_and_warnings(*error, helpfile); |
---|
1739 | } |
---|
1740 | } |
---|
1741 | |
---|
1742 | delete error; |
---|
1743 | |
---|
1744 | return all().ofgroup(expected); |
---|
1745 | } |
---|
1746 | |
---|
1747 | #define HELP_FILE_COMPILES(name,expTitle) TEST_EXPECTATION(help_file_compiles(name,expTitle,NULp)) |
---|
1748 | #define HELP_FILE_COMPILE_ERROR(name,expError) TEST_EXPECTATION(help_file_compiles(name,NULp,expError)) |
---|
1749 | |
---|
1750 | void TEST_hlp2xml_conversion() { |
---|
1751 | TEST_EXPECT_ZERO(chdir("../../HELP_SOURCE")); |
---|
1752 | |
---|
1753 | HELP_FILE_COMPILES("genhelp/agde_treepuzzle.hlp", "treepuzzle"); // genhelp/agde_treepuzzle.hlp |
---|
1754 | |
---|
1755 | HELP_FILE_COMPILES("oldhelp/markbyref.hlp", "Mark by reference"); // oldhelp/markbyref.hlp |
---|
1756 | HELP_FILE_COMPILES("oldhelp/ad_align.hlp", "Alignment Administration"); // oldhelp/ad_align.hlp |
---|
1757 | HELP_FILE_COMPILES("genhelp/copyright.hlp", "Copyrights and licenses"); // genhelp/copyright.hlp |
---|
1758 | |
---|
1759 | HELP_FILE_COMPILE_ERROR("akjsdlkad.hlp", "Can't read from"); // no such file |
---|
1760 | } |
---|
1761 | TEST_PUBLISH(TEST_hlp2xml_conversion); |
---|
1762 | |
---|
1763 | |
---|
1764 | // #define TEST_AUTO_UPDATE // uncomment to update expected xml // @@@ comment-out! |
---|
1765 | |
---|
1766 | void TEST_hlp2xml_output() { |
---|
1767 | string tested_helpfile[] = { |
---|
1768 | "unittest" |
---|
1769 | }; |
---|
1770 | |
---|
1771 | string HELP_SOURCE = "../../HELP_SOURCE/"; |
---|
1772 | string LIB = "../../lib/"; |
---|
1773 | string EXPECTED = "help/"; |
---|
1774 | |
---|
1775 | for (size_t i = 0; i<ARRAY_ELEMS(tested_helpfile); ++i) { |
---|
1776 | string xml = HELP_SOURCE + "Xml/" + tested_helpfile[i] + ".xml"; |
---|
1777 | string html = LIB + "help_html/" + tested_helpfile[i] + ".html"; |
---|
1778 | string hlp = LIB + "help/" + tested_helpfile[i] + ".hlp"; |
---|
1779 | |
---|
1780 | string xml_expected = EXPECTED + tested_helpfile[i] + ".xml"; |
---|
1781 | string html_expected = EXPECTED + tested_helpfile[i] + ".html"; |
---|
1782 | string hlp_expected = EXPECTED + tested_helpfile[i] + ".hlp"; |
---|
1783 | |
---|
1784 | |
---|
1785 | #if defined(TEST_AUTO_UPDATE) |
---|
1786 | # if defined(NDEBUG) |
---|
1787 | # error please use auto-update only in DEBUG mode |
---|
1788 | # endif |
---|
1789 | TEST_COPY_FILE(xml.c_str(), xml_expected.c_str()); |
---|
1790 | TEST_COPY_FILE(html.c_str(), html_expected.c_str()); |
---|
1791 | TEST_COPY_FILE(hlp.c_str(), hlp_expected.c_str()); |
---|
1792 | |
---|
1793 | #else // !defined(TEST_AUTO_UPDATE) |
---|
1794 | |
---|
1795 | # if defined(DEBUG) |
---|
1796 | int expected_xml_difflines = 0; |
---|
1797 | int expected_hlp_difflines = 0; |
---|
1798 | # else // !defined(DEBUG) |
---|
1799 | int expected_xml_difflines = 1; // value of "edit_warning" differs - see .@edit_warning |
---|
1800 | int expected_hlp_difflines = 1; // resulting warning in helpfile |
---|
1801 | # endif |
---|
1802 | TEST_EXPECT_TEXTFILE_DIFFLINES(xml_expected.c_str(), xml.c_str(), expected_xml_difflines); |
---|
1803 | TEST_EXPECT_TEXTFILE_DIFFLINES_IGNORE_DATES(html_expected.c_str(), html.c_str(), 0); // html contains the update-date |
---|
1804 | TEST_EXPECT_TEXTFILE_DIFFLINES(hlp_expected.c_str(), hlp.c_str(), expected_hlp_difflines); |
---|
1805 | #endif |
---|
1806 | } |
---|
1807 | } |
---|
1808 | |
---|
1809 | |
---|
1810 | #if defined(PROTECT_HELP_VS_CHANGES) |
---|
1811 | void TEST_protect_help_vs_changes() { // should normally be disabled |
---|
1812 | // fails if help changes compared to another checkout |
---|
1813 | // or just updates the diff w/o failing (if you comment out the last line) |
---|
1814 | // |
---|
1815 | // if the patch is hugo and you load it into xemacs |
---|
1816 | // you might want to (turn-on-lazy-shot) |
---|
1817 | // |
---|
1818 | // patch-pointer: ../UNIT_TESTER/run/help_changes.patch |
---|
1819 | |
---|
1820 | bool do_help = true; |
---|
1821 | bool do_html = true; |
---|
1822 | |
---|
1823 | const char *ref_WC = "ARB.help.ref"; |
---|
1824 | |
---|
1825 | // ---------------------------------------- config above |
---|
1826 | |
---|
1827 | string this_base = "../.."; |
---|
1828 | string ref_base = this_base+"/../"+ref_WC; |
---|
1829 | string to_help = "/lib/help"; |
---|
1830 | string to_html = "/lib/help_html"; |
---|
1831 | string diff_help = "diff -u "+ref_base+to_help+" "+this_base+to_help; |
---|
1832 | string diff_html = "diff -u "+ref_base+to_html+" "+this_base+to_html; |
---|
1833 | |
---|
1834 | string update_cmd; |
---|
1835 | |
---|
1836 | if (do_help) { |
---|
1837 | if (do_html) update_cmd = string("(")+diff_help+";"+diff_html+")"; |
---|
1838 | else update_cmd = diff_help; |
---|
1839 | } |
---|
1840 | else if (do_html) update_cmd = diff_html; |
---|
1841 | |
---|
1842 | string patch = "help_changes.patch"; |
---|
1843 | update_cmd += " >"+patch+" ||true"; |
---|
1844 | |
---|
1845 | string fail_on_change_cmd = "test \"`cat "+patch+" | grep -v '^Common subdirectories' | wc -l`\" = \"0\" || ( echo \"Error: Help changed\"; false)"; |
---|
1846 | |
---|
1847 | TEST_EXPECT_NO_ERROR(GBK_system(update_cmd.c_str())); |
---|
1848 | TEST_EXPECT_NO_ERROR(GBK_system(fail_on_change_cmd.c_str())); // @@@ uncomment before commit |
---|
1849 | } |
---|
1850 | #endif |
---|
1851 | |
---|
1852 | #endif // UNIT_TESTS |
---|