1 | // ==================================================================== // |
---|
2 | // // |
---|
3 | // File : arb_help2xml.cxx // |
---|
4 | // Purpose : Converts old ARB help format to XML // |
---|
5 | // // |
---|
6 | // Coded by Ralf Westram (coder@reallysoft.de) in October 2001 // |
---|
7 | // Copyright Department of Microbiology (Technical University Munich) // |
---|
8 | // // |
---|
9 | // Visit our web site at: http://www.arb-home.de/ // |
---|
10 | // // |
---|
11 | // ==================================================================== // |
---|
12 | |
---|
13 | #include <xml.hxx> |
---|
14 | #include <arb_defs.h> |
---|
15 | #include <arb_diff.h> |
---|
16 | #include <static_assert.h> |
---|
17 | |
---|
18 | #include <list> |
---|
19 | #include <set> |
---|
20 | #include <iostream> |
---|
21 | #include <fstream> |
---|
22 | |
---|
23 | #include <cstdlib> |
---|
24 | #include <cstdarg> |
---|
25 | #include <cstring> |
---|
26 | #include <climits> |
---|
27 | |
---|
28 | #include <unistd.h> |
---|
29 | #include <sys/stat.h> |
---|
30 | |
---|
31 | using namespace std; |
---|
32 | |
---|
33 | #define h2x_assert(bed) arb_assert(bed) |
---|
34 | |
---|
35 | // Limit the length of the TITLE/SUBTITLE of helppages. |
---|
36 | // - TITLE has to fit into UP/SUB subwindows of arb internal help window |
---|
37 | // - SUBTITLE has to fit into default help-textsubwindow width |
---|
38 | #define MAX_TITLE_CHARS 42 |
---|
39 | #define MAX_SUBTITLE_CHARS 75 |
---|
40 | |
---|
41 | #if defined(DEBUG) |
---|
42 | #define WARN_FORMATTING_PROBLEMS |
---|
43 | #define WARN_MISSING_HELP |
---|
44 | // #define DUMP_PARAGRAPHS |
---|
45 | // #define PROTECT_HELP_VS_CHANGES |
---|
46 | #endif // DEBUG |
---|
47 | |
---|
48 | |
---|
49 | #if defined(WARN_FORMATTING_PROBLEMS) |
---|
50 | |
---|
51 | #define WARN_FIXED_LAYOUT_LIST_ELEMENTS |
---|
52 | #define WARN_LONESOME_ENUM_ELEMENTS |
---|
53 | |
---|
54 | // warnings below are useless for production and shall be disabled in SVN |
---|
55 | // #define WARN_LONESOME_LIST_ELEMENTS |
---|
56 | // #define WARN_POSSIBLY_WRONG_INDENTATION_CORRECTION |
---|
57 | // #define WARN_IGNORED_ALPHA_ENUMS |
---|
58 | |
---|
59 | #endif |
---|
60 | |
---|
61 | |
---|
62 | #define MAX_LINE_LENGTH 200 // maximum length of lines in input stream |
---|
63 | #define TABSIZE 8 |
---|
64 | |
---|
65 | static const char *knownSections[] = { |
---|
66 | "OCCURRENCE", |
---|
67 | "DESCRIPTION", |
---|
68 | "NOTES", |
---|
69 | "EXAMPLES", |
---|
70 | "WARNINGS", |
---|
71 | "BUGS", |
---|
72 | "SECTION", |
---|
73 | }; |
---|
74 | |
---|
75 | enum SectionType { |
---|
76 | SEC_OCCURRENCE, |
---|
77 | SEC_DESCRIPTION, |
---|
78 | SEC_NOTES, |
---|
79 | SEC_EXAMPLES, |
---|
80 | SEC_WARNINGS, |
---|
81 | SEC_BUGS, |
---|
82 | SEC_SECTION, |
---|
83 | |
---|
84 | KNOWN_SECTION_TYPES, |
---|
85 | SEC_NONE, |
---|
86 | SEC_FAKE, |
---|
87 | }; |
---|
88 | |
---|
89 | STATIC_ASSERT(ARRAY_ELEMS(knownSections) == KNOWN_SECTION_TYPES); |
---|
90 | |
---|
91 | __ATTR__VFORMAT(1) static string vstrf(const char *format, va_list argPtr) { |
---|
92 | static size_t buf_size = 256; |
---|
93 | static char *buffer = new char[buf_size]; |
---|
94 | |
---|
95 | size_t length; |
---|
96 | while (1) { |
---|
97 | if (!buffer) { |
---|
98 | h2x_assert(buffer); // to stop when debugging |
---|
99 | throw string("out of memory"); |
---|
100 | } |
---|
101 | |
---|
102 | length = vsnprintf(buffer, buf_size, format, argPtr); |
---|
103 | if (length < buf_size) break; // string fits into current buffer |
---|
104 | |
---|
105 | // otherwise resize buffer : |
---|
106 | buf_size += buf_size/2; |
---|
107 | delete [] buffer; |
---|
108 | buffer = new char[buf_size]; |
---|
109 | } |
---|
110 | |
---|
111 | return string(buffer, length); |
---|
112 | } |
---|
113 | |
---|
114 | __ATTR__FORMAT(1) static string strf(const char *format, ...) { |
---|
115 | va_list argPtr; |
---|
116 | va_start(argPtr, format); |
---|
117 | string result = vstrf(format, argPtr); |
---|
118 | va_end(argPtr); |
---|
119 | |
---|
120 | return result; |
---|
121 | } |
---|
122 | |
---|
123 | // ----------------------------- |
---|
124 | // warnings and errors |
---|
125 | |
---|
126 | class LineAttachedMessage { |
---|
127 | string message; |
---|
128 | size_t lineno; |
---|
129 | |
---|
130 | public: |
---|
131 | LineAttachedMessage(const string& message_, size_t lineno_) : |
---|
132 | message(message_), |
---|
133 | lineno(lineno_) |
---|
134 | {} |
---|
135 | |
---|
136 | const string& Message() const { return message; } |
---|
137 | size_t Lineno() const { return lineno; } |
---|
138 | }; |
---|
139 | |
---|
140 | const size_t NO_LINENUMBER_INFO = -1U; |
---|
141 | |
---|
142 | LineAttachedMessage unattached_message(const string& message) { return LineAttachedMessage(message, NO_LINENUMBER_INFO); } |
---|
143 | |
---|
144 | |
---|
145 | static list<LineAttachedMessage> warnings; |
---|
146 | inline void add_warning(const LineAttachedMessage& laMsg) { |
---|
147 | warnings.push_back(laMsg); |
---|
148 | } |
---|
149 | inline void add_warning(const string& warning, size_t lineno) { |
---|
150 | add_warning(LineAttachedMessage(warning, lineno)); |
---|
151 | } |
---|
152 | |
---|
153 | struct MessageAttachable { |
---|
154 | virtual ~MessageAttachable() {} |
---|
155 | |
---|
156 | virtual string location_description() const = 0; // may return empty string |
---|
157 | virtual size_t line_number() const = 0; // if unknown -> should return NO_LINENUMBER_INFO |
---|
158 | |
---|
159 | LineAttachedMessage attached_message(const string& message) const { |
---|
160 | string where = location_description(); |
---|
161 | if (where.empty()) return LineAttachedMessage(message, line_number()); |
---|
162 | return LineAttachedMessage(message+" ["+where+"]", line_number()); |
---|
163 | } |
---|
164 | void attach_warning(const string& message) const { |
---|
165 | add_warning(attached_message(message)); |
---|
166 | } |
---|
167 | }; |
---|
168 | |
---|
169 | |
---|
170 | // ---------------------- |
---|
171 | // class Reader |
---|
172 | |
---|
173 | class Reader : public MessageAttachable { |
---|
174 | private: |
---|
175 | istream& in; |
---|
176 | char lineBuffer[MAX_LINE_LENGTH]; |
---|
177 | char lineBuffer2[MAX_LINE_LENGTH]; |
---|
178 | bool readAgain; |
---|
179 | bool eof; |
---|
180 | int lineNo; |
---|
181 | |
---|
182 | string location_description() const OVERRIDE { return ""; } |
---|
183 | size_t line_number() const OVERRIDE { return lineNo; } |
---|
184 | |
---|
185 | void getline() { |
---|
186 | if (!eof) { |
---|
187 | if (in.eof()) eof = true; |
---|
188 | else { |
---|
189 | h2x_assert(in.good()); |
---|
190 | |
---|
191 | in.getline(lineBuffer, MAX_LINE_LENGTH); |
---|
192 | lineNo++; |
---|
193 | |
---|
194 | if (in.eof()) eof = true; |
---|
195 | else if (in.fail()) throw "line too long"; |
---|
196 | |
---|
197 | if (strchr(lineBuffer, '\t')) { |
---|
198 | int o2 = 0; |
---|
199 | |
---|
200 | for (int o = 0; lineBuffer[o]; ++o) { |
---|
201 | if (lineBuffer[o] == '\t') { |
---|
202 | int spaces = TABSIZE - (o2 % TABSIZE); |
---|
203 | while (spaces--) lineBuffer2[o2++] = ' '; |
---|
204 | } |
---|
205 | else { |
---|
206 | lineBuffer2[o2++] = lineBuffer[o]; |
---|
207 | } |
---|
208 | } |
---|
209 | lineBuffer2[o2] = 0; |
---|
210 | strcpy(lineBuffer, lineBuffer2); |
---|
211 | } |
---|
212 | |
---|
213 | char *eol = strchr(lineBuffer, 0)-1; |
---|
214 | while (eol >= lineBuffer && isspace(eol[0])) { |
---|
215 | eol[0] = 0; // trim trailing whitespace |
---|
216 | eol--; |
---|
217 | } |
---|
218 | if (eol > lineBuffer) { |
---|
219 | // now eol points to last character |
---|
220 | if (eol[0] == '-' && isalnum(eol[-1])) { |
---|
221 | attach_warning("manual hyphenation detected"); |
---|
222 | } |
---|
223 | } |
---|
224 | } |
---|
225 | } |
---|
226 | } |
---|
227 | |
---|
228 | public: |
---|
229 | Reader(istream& in_) : in(in_), readAgain(true), eof(false), lineNo(0) { getline(); } |
---|
230 | virtual ~Reader() {} |
---|
231 | |
---|
232 | const char *getNext() { |
---|
233 | if (readAgain) readAgain = false; |
---|
234 | else getline(); |
---|
235 | return eof ? NULp : lineBuffer; |
---|
236 | } |
---|
237 | |
---|
238 | void back() { |
---|
239 | h2x_assert(!readAgain); |
---|
240 | readAgain = true; |
---|
241 | } |
---|
242 | |
---|
243 | int getLineNo() const { return lineNo; } |
---|
244 | }; |
---|
245 | |
---|
246 | enum ParagraphType { |
---|
247 | PLAIN_TEXT, |
---|
248 | ENUMERATED, |
---|
249 | ITEM, |
---|
250 | }; |
---|
251 | enum EnumerationType { |
---|
252 | NONE, |
---|
253 | DIGITS, |
---|
254 | ALPHA_UPPER, |
---|
255 | ALPHA_LOWER, |
---|
256 | }; |
---|
257 | |
---|
258 | class Ostring : public MessageAttachable { |
---|
259 | string content; |
---|
260 | size_t lineNo; // where string came from |
---|
261 | ParagraphType type; |
---|
262 | |
---|
263 | // only valid for type==ENUMERATED: |
---|
264 | EnumerationType etype; |
---|
265 | unsigned number; |
---|
266 | |
---|
267 | public: |
---|
268 | |
---|
269 | Ostring(const string& s, size_t line_no, ParagraphType type_) |
---|
270 | : content(s), |
---|
271 | lineNo(line_no), |
---|
272 | type(type_), |
---|
273 | etype(NONE) |
---|
274 | { |
---|
275 | h2x_assert(type != ENUMERATED); |
---|
276 | } |
---|
277 | Ostring(const string& s, size_t line_no, ParagraphType type_, EnumerationType etype_, unsigned num) |
---|
278 | : content(s), |
---|
279 | lineNo(line_no), |
---|
280 | type(type_), |
---|
281 | etype(etype_), |
---|
282 | number(num) |
---|
283 | { |
---|
284 | h2x_assert(type == ENUMERATED); |
---|
285 | h2x_assert(etype == DIGITS || etype == ALPHA_UPPER || etype == ALPHA_LOWER); |
---|
286 | h2x_assert(num>0); |
---|
287 | } |
---|
288 | |
---|
289 | // MessageAttachable interface: |
---|
290 | string location_description() const OVERRIDE { return ""; } |
---|
291 | size_t line_number() const OVERRIDE { return get_lineno(); } |
---|
292 | |
---|
293 | operator const string&() const { return content; } |
---|
294 | operator string&() { return content; } |
---|
295 | |
---|
296 | const string& as_string() const { return content; } |
---|
297 | string& as_string() { return content; } |
---|
298 | |
---|
299 | size_t get_lineno() const { return lineNo; } // @@@ replace by line_number()? |
---|
300 | |
---|
301 | const ParagraphType& get_type() const { return type; } |
---|
302 | const EnumerationType& get_enum_type() const { |
---|
303 | h2x_assert(type == ENUMERATED); |
---|
304 | return etype; |
---|
305 | } |
---|
306 | unsigned get_number() const { |
---|
307 | h2x_assert(type == ENUMERATED); |
---|
308 | return number; |
---|
309 | } |
---|
310 | |
---|
311 | // wrapper to make Ostring act like char* |
---|
312 | const char *c_str() const { return content.c_str(); } |
---|
313 | }; |
---|
314 | |
---|
315 | typedef list<Ostring> Ostrings; |
---|
316 | |
---|
317 | #if defined(WARN_MISSING_HELP) |
---|
318 | static void check_TODO(const char *line, const Reader& reader) { |
---|
319 | if (strstr(line, "@@@") || strstr(line, "TODO")) { |
---|
320 | reader.attach_warning(strf("TODO: %s", line)); |
---|
321 | } |
---|
322 | } |
---|
323 | #else |
---|
324 | inline void check_TODO(const char *, const Reader&) { } |
---|
325 | #endif // WARN_MISSING_HELP |
---|
326 | |
---|
327 | // ---------------------------- |
---|
328 | // class Section |
---|
329 | |
---|
330 | class Section FINAL_TYPE : public MessageAttachable { |
---|
331 | SectionType type; |
---|
332 | string name; |
---|
333 | Ostrings content; |
---|
334 | size_t lineno; |
---|
335 | |
---|
336 | string location_description() const OVERRIDE { return string("in SECTION '")+name+"'"; } |
---|
337 | |
---|
338 | public: |
---|
339 | Section(string name_, SectionType type_, size_t lineno_) |
---|
340 | : type(type_), |
---|
341 | name(name_), |
---|
342 | lineno(lineno_) |
---|
343 | {} |
---|
344 | virtual ~Section() {} |
---|
345 | |
---|
346 | const Ostrings& Content() const { return content; } |
---|
347 | Ostrings& Content() { return content; } |
---|
348 | SectionType get_type() const { return type; } |
---|
349 | size_t line_number() const OVERRIDE { return lineno; } |
---|
350 | const string& getName() const { return name; } |
---|
351 | void setName(const string& name_) { name = name_; } |
---|
352 | |
---|
353 | void set_line_number(size_t lineNumber) { lineno = lineNumber; } |
---|
354 | }; |
---|
355 | |
---|
356 | typedef list<Section> SectionList; |
---|
357 | |
---|
358 | // -------------------- |
---|
359 | // class Link |
---|
360 | |
---|
361 | class Link { |
---|
362 | string target; |
---|
363 | size_t source_lineno; |
---|
364 | |
---|
365 | public: |
---|
366 | Link(const string& target_, size_t source_lineno_) : |
---|
367 | target(target_), |
---|
368 | source_lineno(source_lineno_) |
---|
369 | {} |
---|
370 | |
---|
371 | const string& Target() const { return target; } |
---|
372 | size_t SourceLineno() const { return source_lineno; } |
---|
373 | }; |
---|
374 | |
---|
375 | typedef list<Link> Links; |
---|
376 | |
---|
377 | // ------------------------ |
---|
378 | // class Helpfile |
---|
379 | |
---|
380 | class Helpfile { |
---|
381 | Links uplinks; |
---|
382 | Links references; |
---|
383 | Links auto_references; |
---|
384 | Section title; |
---|
385 | SectionList sections; |
---|
386 | string inputfile; |
---|
387 | |
---|
388 | void check_self_ref(const string& link) { |
---|
389 | size_t slash = inputfile.find('/'); |
---|
390 | if (slash != string::npos) { |
---|
391 | if (inputfile.substr(slash+1) == link) { |
---|
392 | throw string("Invalid link to self"); |
---|
393 | } |
---|
394 | } |
---|
395 | } |
---|
396 | |
---|
397 | public: |
---|
398 | Helpfile() : title("TITLE", SEC_FAKE, NO_LINENUMBER_INFO) {} |
---|
399 | virtual ~Helpfile() {} |
---|
400 | |
---|
401 | void readHelp(istream& in, const string& filename); |
---|
402 | void writeXML(FILE *out, const string& page_name); |
---|
403 | void extractInternalLinks(); |
---|
404 | |
---|
405 | const Section& get_title() const { return title; } |
---|
406 | }; |
---|
407 | |
---|
408 | inline bool isSpace(char c) { return c == ' '; } |
---|
409 | inline bool isWhitespace(char c) { return isSpace(c) || c == '\n'; } |
---|
410 | |
---|
411 | inline bool isEmptyOrComment(const char *s) { |
---|
412 | if (s[0] == '#') return true; |
---|
413 | for (int off = 0; ; ++off) { |
---|
414 | if (s[off] == 0) return true; |
---|
415 | if (!isSpace(s[off])) break; |
---|
416 | } |
---|
417 | |
---|
418 | return false; |
---|
419 | } |
---|
420 | |
---|
421 | inline const char *extractKeyword(const char *line, string& keyword) { |
---|
422 | // returns NULp if no keyword was found |
---|
423 | // otherwise returns position behind keyword and sets value of 'keyword' |
---|
424 | |
---|
425 | const char *space = strchr(line, ' '); |
---|
426 | if (space && space>line) { |
---|
427 | keyword = string(line, 0, space-line); |
---|
428 | return space; |
---|
429 | } |
---|
430 | else if (!space) { // test for keyword w/o content behind |
---|
431 | if (line[0]) { // not empty |
---|
432 | keyword = line; |
---|
433 | return strchr(line, 0); |
---|
434 | } |
---|
435 | } |
---|
436 | return NULp; |
---|
437 | } |
---|
438 | |
---|
439 | inline const char *eatSpace(const char *line) { |
---|
440 | // skip over spaces at start of 'line' |
---|
441 | while (isSpace(*line)) ++line; |
---|
442 | return line; |
---|
443 | } |
---|
444 | inline const char *eatWhitespace(const char *paragraph) { |
---|
445 | // skip over spaces and empty lines at start of 'paragraph' |
---|
446 | while (isWhitespace(*paragraph)) ++paragraph; |
---|
447 | return paragraph; |
---|
448 | } |
---|
449 | |
---|
450 | inline void pushParagraph(Section& sec, string& paragraph, size_t lineNo, ParagraphType& type, EnumerationType& etype, unsigned num) { |
---|
451 | if (paragraph.length()) { |
---|
452 | if (type == ENUMERATED) { |
---|
453 | sec.Content().push_back(Ostring(paragraph, lineNo, type, etype, num)); |
---|
454 | } |
---|
455 | else { |
---|
456 | sec.Content().push_back(Ostring(paragraph, lineNo, type)); |
---|
457 | } |
---|
458 | |
---|
459 | type = PLAIN_TEXT; |
---|
460 | etype = NONE; |
---|
461 | paragraph = ""; |
---|
462 | } |
---|
463 | } |
---|
464 | |
---|
465 | inline const char *firstChar(const char *s) { |
---|
466 | while (isSpace(s[0])) ++s; |
---|
467 | return s; |
---|
468 | } |
---|
469 | |
---|
470 | inline bool is_startof_itemlist_element(const char *contentStart) { |
---|
471 | return |
---|
472 | (contentStart[0] == '-' || |
---|
473 | contentStart[0] == '*') |
---|
474 | && |
---|
475 | isspace(contentStart[1]) |
---|
476 | && |
---|
477 | !(isspace(contentStart[2]) || |
---|
478 | contentStart[2] == '-'); |
---|
479 | } |
---|
480 | |
---|
481 | #define MAX_ALLOWED_ENUM 99 // otherwise it starts interpreting years as enums |
---|
482 | |
---|
483 | static EnumerationType startsWithLetter(string& s, unsigned& number) { |
---|
484 | // tests if first line starts with 'letter.' |
---|
485 | // if true then 'letter.' is removed from the string |
---|
486 | // the letter is converted and returned in 'number' ('a'->1, 'b'->2, ..) |
---|
487 | |
---|
488 | size_t off = s.find_first_not_of(" \n"); |
---|
489 | if (off == string::npos) return NONE; |
---|
490 | if (!isalpha(s[off])) return NONE; |
---|
491 | |
---|
492 | size_t astart = off; |
---|
493 | EnumerationType etype = isupper(s[off]) ? ALPHA_UPPER : ALPHA_LOWER; |
---|
494 | |
---|
495 | number = s[off]-(etype == ALPHA_UPPER ? 'A' : 'a')+1; |
---|
496 | ++off; |
---|
497 | |
---|
498 | h2x_assert(number>0 && number<MAX_ALLOWED_ENUM); |
---|
499 | |
---|
500 | if (s[off] != '.' && s[off] != ')') return NONE; |
---|
501 | if (s[off+1] != ' ') return NONE; |
---|
502 | |
---|
503 | // remove 'letter.' from string : |
---|
504 | ++off; |
---|
505 | while (s[off+1] == ' ') ++off; |
---|
506 | s.erase(astart, off-astart+1); |
---|
507 | |
---|
508 | return etype; |
---|
509 | } |
---|
510 | |
---|
511 | static bool startsWithNumber(string& s, unsigned& number) { |
---|
512 | // tests if first line starts with 'number.' |
---|
513 | // if true then 'number.' is removed from the string |
---|
514 | |
---|
515 | size_t off = s.find_first_not_of(" \n"); |
---|
516 | if (off == string::npos) return false; |
---|
517 | if (!isdigit(s[off])) return false; |
---|
518 | |
---|
519 | size_t num_start = off; |
---|
520 | number = 0; |
---|
521 | |
---|
522 | for (; isdigit(s[off]); ++off) { |
---|
523 | number = number*10 + (s[off]-'0'); |
---|
524 | } |
---|
525 | if (number>MAX_ALLOWED_ENUM) return false; |
---|
526 | |
---|
527 | if (s[off] != '.' && s[off] != ')') return false; |
---|
528 | if (s[off+1] != ' ') return false; |
---|
529 | |
---|
530 | // remove 'number.' from string : |
---|
531 | ++off; |
---|
532 | while (s[off+1] == ' ') ++off; |
---|
533 | s.erase(num_start, off-num_start+1); |
---|
534 | |
---|
535 | return true; |
---|
536 | } |
---|
537 | |
---|
538 | static EnumerationType detectLineEnumType(string& line, unsigned& number) { |
---|
539 | if (startsWithNumber(line, number)) return DIGITS; |
---|
540 | return startsWithLetter(line, number); |
---|
541 | } |
---|
542 | |
---|
543 | static void parseSection(Section& sec, const char *line, int indentation, Reader& reader) { |
---|
544 | string paragraph = line; |
---|
545 | size_t para_start_lineno = reader.getLineNo(); |
---|
546 | |
---|
547 | if (sec.line_number() == NO_LINENUMBER_INFO) { // linenumber is not known yet |
---|
548 | // assume section just started (this happens with TITLE) |
---|
549 | sec.set_line_number(para_start_lineno); |
---|
550 | } |
---|
551 | |
---|
552 | ParagraphType type = PLAIN_TEXT; |
---|
553 | EnumerationType etype = NONE; |
---|
554 | unsigned num = 0; |
---|
555 | |
---|
556 | unsigned last_alpha_num = -1; |
---|
557 | |
---|
558 | h2x_assert(sec.Content().empty()); |
---|
559 | |
---|
560 | while (1) { |
---|
561 | line = reader.getNext(); |
---|
562 | if (!line) break; |
---|
563 | |
---|
564 | if (isEmptyOrComment(line)) { |
---|
565 | pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); |
---|
566 | check_TODO(line, reader); |
---|
567 | } |
---|
568 | else { |
---|
569 | string keyword; |
---|
570 | const char *rest = extractKeyword(line, keyword); |
---|
571 | |
---|
572 | if (rest) { // a new keyword |
---|
573 | reader.back(); |
---|
574 | break; |
---|
575 | } |
---|
576 | |
---|
577 | check_TODO(line, reader); |
---|
578 | |
---|
579 | string Line = line; |
---|
580 | |
---|
581 | if (sec.get_type() == SEC_OCCURRENCE) { |
---|
582 | pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); |
---|
583 | } |
---|
584 | else { |
---|
585 | const char *firstNonWhite = firstChar(line); |
---|
586 | if (is_startof_itemlist_element(firstNonWhite)) { |
---|
587 | h2x_assert(firstNonWhite != line); |
---|
588 | |
---|
589 | pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); |
---|
590 | |
---|
591 | Line[firstNonWhite-line] = ' '; |
---|
592 | type = ITEM; // is reset in call to pushParagraph |
---|
593 | } |
---|
594 | else { |
---|
595 | unsigned foundNum; |
---|
596 | EnumerationType foundEtype = detectLineEnumType(Line, foundNum); |
---|
597 | |
---|
598 | if (foundEtype == ALPHA_UPPER || foundEtype == ALPHA_LOWER) { |
---|
599 | if (foundNum == (last_alpha_num+1) || foundNum == 1) { |
---|
600 | last_alpha_num = foundNum; |
---|
601 | } |
---|
602 | else { |
---|
603 | #if defined(WARN_IGNORED_ALPHA_ENUMS) |
---|
604 | add_warning(reader.attached_message("Ignoring non-consecutive alpha-enum")); |
---|
605 | #endif |
---|
606 | foundEtype = NONE; |
---|
607 | |
---|
608 | reader.back(); |
---|
609 | Line = reader.getNext(); |
---|
610 | last_alpha_num = -1; |
---|
611 | } |
---|
612 | } |
---|
613 | |
---|
614 | if (foundEtype != NONE) { |
---|
615 | pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); |
---|
616 | |
---|
617 | type = ENUMERATED; |
---|
618 | num = foundNum; |
---|
619 | etype = foundEtype; |
---|
620 | |
---|
621 | if (!num) { |
---|
622 | h2x_assert(etype == DIGITS); |
---|
623 | throw "Enumerations starting with zero are not supported"; |
---|
624 | } |
---|
625 | } |
---|
626 | } |
---|
627 | } |
---|
628 | |
---|
629 | if (paragraph.length()) { |
---|
630 | paragraph = paragraph+"\n"+Line; |
---|
631 | } |
---|
632 | else { |
---|
633 | paragraph = string("\n")+Line; |
---|
634 | para_start_lineno = reader.getLineNo(); |
---|
635 | } |
---|
636 | } |
---|
637 | } |
---|
638 | |
---|
639 | pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); |
---|
640 | |
---|
641 | if (sec.Content().size()>0 && indentation>0) { |
---|
642 | string spaces; |
---|
643 | spaces.reserve(indentation); |
---|
644 | spaces.append(indentation, ' '); |
---|
645 | |
---|
646 | string& ostr = sec.Content().front(); |
---|
647 | ostr = string("\n") + spaces + ostr; |
---|
648 | } |
---|
649 | } |
---|
650 | |
---|
651 | inline void check_specific_duplicates(const string& link, const Links& existing, bool add_warnings) { |
---|
652 | for (Links::const_iterator ex = existing.begin(); ex != existing.end(); ++ex) { |
---|
653 | if (ex->Target() == link) { |
---|
654 | if (add_warnings) add_warning(strf("First Link to '%s' was found here.", ex->Target().c_str()), ex->SourceLineno()); |
---|
655 | throw strf("Link to '%s' duplicated here.", link.c_str()); |
---|
656 | } |
---|
657 | } |
---|
658 | } |
---|
659 | inline void check_duplicates(const string& link, const Links& uplinks, const Links& references, bool add_warnings) { |
---|
660 | check_specific_duplicates(link, uplinks, add_warnings); |
---|
661 | check_specific_duplicates(link, references, add_warnings); |
---|
662 | } |
---|
663 | |
---|
664 | static void warnAboutDuplicate(SectionList& sections) { |
---|
665 | set<string> seen; |
---|
666 | SectionList::iterator end = sections.end(); |
---|
667 | for (SectionList::iterator s = sections.begin(); s != end; ++s) { |
---|
668 | const string& sname = s->getName(); |
---|
669 | if (sname == "NOTES") continue; // do not warn about multiple NOTES sections |
---|
670 | |
---|
671 | SectionList::iterator o = s; ++o; |
---|
672 | for (; o != end; ++o) { |
---|
673 | if (sname == o->getName()) { |
---|
674 | o->attach_warning("duplicated SECTION name"); |
---|
675 | if (seen.find(sname) == seen.end()) { |
---|
676 | s->attach_warning("name was first used"); |
---|
677 | seen.insert(sname); |
---|
678 | } |
---|
679 | } |
---|
680 | } |
---|
681 | } |
---|
682 | } |
---|
683 | |
---|
684 | void Helpfile::readHelp(istream& in, const string& filename) { |
---|
685 | if (!in.good()) throw unattached_message(strf("Can't read from '%s'", filename.c_str())); |
---|
686 | |
---|
687 | Reader read(in); |
---|
688 | |
---|
689 | inputfile = filename; // remember file read (for comment) |
---|
690 | |
---|
691 | const char *line; |
---|
692 | const char *name_only = strrchr(filename.c_str(), '/'); |
---|
693 | |
---|
694 | h2x_assert(name_only); |
---|
695 | ++name_only; |
---|
696 | |
---|
697 | try { |
---|
698 | while (1) { |
---|
699 | line = read.getNext(); |
---|
700 | if (!line) break; |
---|
701 | |
---|
702 | if (isEmptyOrComment(line)) { |
---|
703 | check_TODO(line, read); |
---|
704 | continue; |
---|
705 | } |
---|
706 | |
---|
707 | check_TODO(line, read); |
---|
708 | |
---|
709 | string keyword; |
---|
710 | const char *rest = extractKeyword(line, keyword); |
---|
711 | |
---|
712 | if (rest) { // found a keyword |
---|
713 | if (keyword == "UP") { |
---|
714 | rest = eatSpace(rest); |
---|
715 | if (strlen(rest)) { |
---|
716 | check_duplicates(rest, uplinks, references, true); |
---|
717 | if (strcmp(name_only, rest) == 0) throw "UP link to self"; |
---|
718 | |
---|
719 | uplinks.push_back(Link(rest, read.getLineNo())); |
---|
720 | } |
---|
721 | } |
---|
722 | else if (keyword == "SUB") { |
---|
723 | rest = eatSpace(rest); |
---|
724 | if (strlen(rest)) { |
---|
725 | check_duplicates(rest, uplinks, references, true); |
---|
726 | if (strcmp(name_only, rest) == 0) throw "SUB link to self"; |
---|
727 | |
---|
728 | references.push_back(Link(rest, read.getLineNo())); |
---|
729 | } |
---|
730 | } |
---|
731 | else if (keyword == "TITLE") { |
---|
732 | rest = eatSpace(rest); |
---|
733 | parseSection(title, rest, 0, read); |
---|
734 | |
---|
735 | if (title.Content().empty()) throw "empty TITLE not allowed"; |
---|
736 | |
---|
737 | const string& t = title.Content().front(); |
---|
738 | if (t.find("Standard help file form") != string::npos) { |
---|
739 | throw strf("Illegal title for help file: '%s'", t.c_str()); |
---|
740 | } |
---|
741 | |
---|
742 | const size_t len = t.length(); |
---|
743 | if (len>MAX_TITLE_CHARS) { |
---|
744 | // ignore non-alphanumeric characters at end of string: |
---|
745 | size_t last_alnum_pos = len-1; |
---|
746 | while (!isalnum(t[last_alnum_pos])) { |
---|
747 | --last_alnum_pos; |
---|
748 | } |
---|
749 | ++last_alnum_pos; |
---|
750 | arb_assert(last_alnum_pos<=len); |
---|
751 | |
---|
752 | const size_t ignored = len-last_alnum_pos; |
---|
753 | if ((len-ignored)>MAX_TITLE_CHARS) { |
---|
754 | title.attach_warning(strf("TITLE too verbose (max. %i chars allowed; found %zu%s)", |
---|
755 | MAX_TITLE_CHARS, |
---|
756 | len, |
---|
757 | ignored ? strf("; acceptable trailing chars: %zu", ignored).c_str() : "" |
---|
758 | )); |
---|
759 | } |
---|
760 | } |
---|
761 | } |
---|
762 | else { |
---|
763 | if (keyword == "NOTE") keyword = "NOTES"; |
---|
764 | if (keyword == "EXAMPLE") keyword = "EXAMPLES"; |
---|
765 | if (keyword == "WARNING") keyword = "WARNINGS"; |
---|
766 | |
---|
767 | SectionType stype = SEC_NONE; |
---|
768 | int idx; |
---|
769 | for (idx = 0; idx<KNOWN_SECTION_TYPES; ++idx) { |
---|
770 | if (knownSections[idx] == keyword) { |
---|
771 | stype = SectionType(idx); |
---|
772 | break; |
---|
773 | } |
---|
774 | } |
---|
775 | |
---|
776 | size_t lineno = read.getLineNo(); |
---|
777 | |
---|
778 | if (idx >= KNOWN_SECTION_TYPES) throw strf("unknown keyword '%s'", keyword.c_str()); |
---|
779 | |
---|
780 | if (stype == SEC_SECTION) { |
---|
781 | string section_name = eatSpace(rest); |
---|
782 | Section sec(section_name, stype, lineno); |
---|
783 | parseSection(sec, "", 0, read); |
---|
784 | sections.push_back(sec); |
---|
785 | } |
---|
786 | else { |
---|
787 | Section sec(keyword, stype, lineno); |
---|
788 | rest = eatSpace(rest); |
---|
789 | parseSection(sec, rest, rest-line, read); |
---|
790 | sections.push_back(sec); |
---|
791 | } |
---|
792 | } |
---|
793 | } |
---|
794 | else { |
---|
795 | throw strf("Unhandled line"); |
---|
796 | } |
---|
797 | } |
---|
798 | |
---|
799 | warnAboutDuplicate(sections); |
---|
800 | } |
---|
801 | catch (string& err) { throw read.attached_message(err); } |
---|
802 | catch (const char *err) { throw read.attached_message(err); } |
---|
803 | } |
---|
804 | |
---|
805 | static bool shouldReflow(const string& s, int& foundIndentation) { |
---|
806 | // foundIndentation is only valid if shouldReflow() returns true |
---|
807 | enum { START, CHAR, SPACE, MULTIPLE, DOT, DOTSPACE } state = START; |
---|
808 | bool equal_indent = true; |
---|
809 | int lastIndent = -1; |
---|
810 | int thisIndent = 0; |
---|
811 | |
---|
812 | for (string::const_iterator c = s.begin(); c != s.end(); ++c, ++thisIndent) { |
---|
813 | if (*c == '\n') { |
---|
814 | state = START; |
---|
815 | thisIndent = 0; |
---|
816 | } |
---|
817 | else if (isSpace(*c)) { |
---|
818 | if (state == DOT || state == DOTSPACE) state = DOTSPACE; // multiple spaces after DOT are allowed |
---|
819 | else if (state == SPACE) state = MULTIPLE; // now seen multiple spaces |
---|
820 | else if (state == CHAR) state = SPACE; // now seen 1 space |
---|
821 | } |
---|
822 | else { |
---|
823 | if (state == MULTIPLE) return false; // character after multiple spaces |
---|
824 | if (state == START) { |
---|
825 | if (lastIndent == -1) lastIndent = thisIndent; |
---|
826 | else if (lastIndent != thisIndent) equal_indent = false; |
---|
827 | } |
---|
828 | state = (*c == '.' || *c == ',') ? DOT : CHAR; |
---|
829 | } |
---|
830 | } |
---|
831 | |
---|
832 | if (lastIndent<0) { |
---|
833 | equal_indent = false; |
---|
834 | } |
---|
835 | |
---|
836 | if (equal_indent) { |
---|
837 | foundIndentation = lastIndent-1; |
---|
838 | h2x_assert(foundIndentation >= 0); |
---|
839 | } |
---|
840 | return equal_indent; |
---|
841 | } |
---|
842 | |
---|
843 | static string correctSpaces(const string& text, int change) { |
---|
844 | h2x_assert(text.find('\n') == string::npos); |
---|
845 | |
---|
846 | if (!change) return text; |
---|
847 | |
---|
848 | size_t first = text.find_first_not_of(' '); |
---|
849 | if (first == string::npos) return ""; // empty line |
---|
850 | |
---|
851 | if (change<0) { |
---|
852 | int remove = -change; |
---|
853 | h2x_assert(remove <= int(first)); |
---|
854 | return text.substr(remove); |
---|
855 | } |
---|
856 | |
---|
857 | h2x_assert(change>0); // add spaces |
---|
858 | return string(change, ' ')+text; |
---|
859 | } |
---|
860 | |
---|
861 | static string correctIndentation(const string& text, int change) { |
---|
862 | // removes 'remove' spaces from every line |
---|
863 | |
---|
864 | size_t this_lineend = text.find('\n'); |
---|
865 | string result; |
---|
866 | |
---|
867 | if (this_lineend == string::npos) { |
---|
868 | result = correctSpaces(text, change); |
---|
869 | } |
---|
870 | else { |
---|
871 | result = correctSpaces(text.substr(0, this_lineend), change); |
---|
872 | |
---|
873 | while (this_lineend != string::npos) { |
---|
874 | size_t next_lineend = text.find('\n', this_lineend+1); |
---|
875 | if (next_lineend == string::npos) { // last line |
---|
876 | result = result+"\n"+correctSpaces(text.substr(this_lineend+1), change); |
---|
877 | } |
---|
878 | else { |
---|
879 | result = result+"\n"+correctSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1), change); |
---|
880 | } |
---|
881 | this_lineend = next_lineend; |
---|
882 | } |
---|
883 | } |
---|
884 | return result; |
---|
885 | } |
---|
886 | |
---|
887 | inline size_t countSpaces(const string& text) { |
---|
888 | size_t first = text.find_first_not_of(' '); |
---|
889 | if (first == string::npos) return INT_MAX; // empty line |
---|
890 | return first; |
---|
891 | } |
---|
892 | |
---|
893 | static size_t scanMinIndentation(const string& text) { |
---|
894 | size_t this_lineend = text.find('\n'); |
---|
895 | size_t min_indent = INT_MAX; |
---|
896 | |
---|
897 | if (this_lineend == string::npos) { |
---|
898 | min_indent = countSpaces(text); |
---|
899 | } |
---|
900 | else { |
---|
901 | while (this_lineend != string::npos) { |
---|
902 | size_t next_lineend = text.find('\n', this_lineend+1); |
---|
903 | if (next_lineend == string::npos) { |
---|
904 | min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1))); |
---|
905 | } |
---|
906 | else { |
---|
907 | min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1))); |
---|
908 | } |
---|
909 | this_lineend = next_lineend; |
---|
910 | } |
---|
911 | } |
---|
912 | |
---|
913 | if (min_indent == INT_MAX) min_indent = 0; // only empty lines |
---|
914 | return min_indent; |
---|
915 | } |
---|
916 | |
---|
917 | // ----------------------------- |
---|
918 | // class ParagraphTree |
---|
919 | |
---|
920 | class ParagraphTree FINAL_TYPE : public MessageAttachable, virtual Noncopyable { |
---|
921 | ParagraphTree *brother; // has same indentation as this |
---|
922 | ParagraphTree *son; // indentation + 1 |
---|
923 | |
---|
924 | Ostring otext; // text of the Section (containing linefeeds) |
---|
925 | |
---|
926 | bool reflow; // should the paragraph be reflown ? (true if indentation is equal for all lines of text) |
---|
927 | int indentation; // the real indentation of the blank (behind removed enumeration) |
---|
928 | |
---|
929 | |
---|
930 | string location_description() const OVERRIDE { return "in paragraph starting here"; } |
---|
931 | size_t line_number() const OVERRIDE { return otext.get_lineno(); } |
---|
932 | |
---|
933 | ParagraphTree(Ostrings::const_iterator begin, const Ostrings::const_iterator end) |
---|
934 | : son(NULp), |
---|
935 | otext(*begin), |
---|
936 | indentation(0) |
---|
937 | { |
---|
938 | h2x_assert(begin != end); |
---|
939 | |
---|
940 | string& text = otext; |
---|
941 | |
---|
942 | reflow = shouldReflow(text, indentation); |
---|
943 | if (!reflow) { |
---|
944 | size_t reststart = text.find('\n', 1); |
---|
945 | |
---|
946 | if (reststart == 0) { |
---|
947 | attach_warning("[internal] Paragraph starts with LF -> reflow calculation will probably fail"); |
---|
948 | } |
---|
949 | |
---|
950 | if (reststart != string::npos) { |
---|
951 | int rest_indent = -1; |
---|
952 | string rest = text.substr(reststart); |
---|
953 | bool rest_reflow = shouldReflow(rest, rest_indent); |
---|
954 | |
---|
955 | if (rest_reflow) { |
---|
956 | int first_indent = countSpaces(text.substr(1)); |
---|
957 | if (get_type() == PLAIN_TEXT) { |
---|
958 | size_t last = text.find_last_not_of(' ', reststart-1); |
---|
959 | bool is_header = last != string::npos && text[last] == ':'; |
---|
960 | |
---|
961 | if (!is_header && rest_indent == (first_indent+8)) { |
---|
962 | #if defined(DEBUG) |
---|
963 | size_t textstart = text.find_first_not_of(" \n"); |
---|
964 | h2x_assert(textstart != string::npos); |
---|
965 | #endif // DEBUG |
---|
966 | |
---|
967 | text = text.substr(0, reststart)+correctIndentation(rest, -8); |
---|
968 | reflow = shouldReflow(text, indentation); |
---|
969 | } |
---|
970 | } |
---|
971 | else { |
---|
972 | int diff = rest_indent-first_indent; |
---|
973 | if (diff>0) { |
---|
974 | text = text.substr(0, reststart)+correctIndentation(rest, -diff); |
---|
975 | reflow = shouldReflow(text, indentation); |
---|
976 | } |
---|
977 | else if (diff<0) { |
---|
978 | // paragraph with more indent on first line (occurs?) |
---|
979 | attach_warning(strf("[internal] unhandled: more indentation on the 1st line (diff=%i)", diff)); |
---|
980 | } |
---|
981 | } |
---|
982 | } |
---|
983 | } |
---|
984 | } |
---|
985 | |
---|
986 | if (!reflow) { |
---|
987 | indentation = scanMinIndentation(text); |
---|
988 | } |
---|
989 | text = correctIndentation(text, -indentation); |
---|
990 | if (get_type() == ITEM) { |
---|
991 | h2x_assert(indentation >= 2); |
---|
992 | indentation -= 2; |
---|
993 | } |
---|
994 | |
---|
995 | brother = buildParagraphTree(++begin, end); |
---|
996 | } |
---|
997 | |
---|
998 | void brothers_to_sons(ParagraphTree *new_brother); |
---|
999 | |
---|
1000 | public: |
---|
1001 | virtual ~ParagraphTree() { |
---|
1002 | delete brother; |
---|
1003 | delete son; |
---|
1004 | } |
---|
1005 | |
---|
1006 | ParagraphType get_type() const { return otext.get_type(); } |
---|
1007 | |
---|
1008 | bool is_itemlist_member() const { return get_type() == ITEM; } |
---|
1009 | unsigned get_enumeration() const { return get_type() == ENUMERATED ? otext.get_number() : 0; } |
---|
1010 | EnumerationType get_enum_type() const { return otext.get_enum_type(); } |
---|
1011 | |
---|
1012 | const char *readable_type() const { |
---|
1013 | const char *res = NULp; |
---|
1014 | switch (get_type()) { |
---|
1015 | case PLAIN_TEXT: res = "PLAIN_TEXT"; break; |
---|
1016 | case ITEM: res = "ITEM"; break; |
---|
1017 | case ENUMERATED: res = "ENUMERATED"; break; |
---|
1018 | } |
---|
1019 | return res; |
---|
1020 | } |
---|
1021 | |
---|
1022 | size_t countTextNodes() { |
---|
1023 | size_t nodes = 1; // this |
---|
1024 | if (son) nodes += son->countTextNodes(); |
---|
1025 | if (brother) nodes += brother->countTextNodes(); |
---|
1026 | return nodes; |
---|
1027 | } |
---|
1028 | |
---|
1029 | #if defined(DUMP_PARAGRAPHS) |
---|
1030 | void print_indent(ostream& out, int indent) { while (indent-->0) out << ' '; } |
---|
1031 | char *masknl(const char *text) { |
---|
1032 | char *result = ARB_strdup(text); |
---|
1033 | for (int i = 0; result[i]; ++i) { |
---|
1034 | if (result[i] == '\n') result[i] = '|'; |
---|
1035 | } |
---|
1036 | return result; |
---|
1037 | } |
---|
1038 | void dump(ostream& out, int indent = 0) { |
---|
1039 | print_indent(out, indent+1); |
---|
1040 | { |
---|
1041 | char *mtext = masknl(otext.as_string().c_str()); |
---|
1042 | out << "text='" << mtext << "'\n"; |
---|
1043 | free(mtext); |
---|
1044 | } |
---|
1045 | |
---|
1046 | print_indent(out, indent+1); |
---|
1047 | out << "type='" << readable_type() << "' "; |
---|
1048 | if (get_type() == ENUMERATED) { |
---|
1049 | out << "enumeration='" << otext.get_number() << "' "; |
---|
1050 | } |
---|
1051 | out << "reflow='" << reflow << "' "; |
---|
1052 | out << "indentation='" << indentation << "'\n"; |
---|
1053 | |
---|
1054 | if (son) { |
---|
1055 | print_indent(out, indent+2); cout << "son:\n"; |
---|
1056 | son->dump(out, indent+2); |
---|
1057 | cout << "\n"; |
---|
1058 | } |
---|
1059 | if (brother) { |
---|
1060 | print_indent(out, indent); cout << "brother:\n"; |
---|
1061 | brother->dump(out, indent); |
---|
1062 | } |
---|
1063 | } |
---|
1064 | #endif // DUMP_PARAGRAPHS |
---|
1065 | |
---|
1066 | private: |
---|
1067 | static ParagraphTree* buildParagraphTree(Ostrings::const_iterator begin, const Ostrings::const_iterator end) { |
---|
1068 | if (begin == end) return NULp; |
---|
1069 | return new ParagraphTree(begin, end); |
---|
1070 | } |
---|
1071 | public: |
---|
1072 | static ParagraphTree* buildParagraphTree(const Section& sec) { |
---|
1073 | const Ostrings& txt = sec.Content(); |
---|
1074 | if (txt.empty()) throw "attempt to build an empty ParagraphTree"; |
---|
1075 | return buildParagraphTree(txt.begin(), txt.end()); |
---|
1076 | } |
---|
1077 | |
---|
1078 | bool contains(ParagraphTree *that) { |
---|
1079 | return |
---|
1080 | this == that || |
---|
1081 | (son && son->contains(that)) || |
---|
1082 | (brother && brother->contains(that)); |
---|
1083 | } |
---|
1084 | |
---|
1085 | ParagraphTree *predecessor(ParagraphTree *before_this) { |
---|
1086 | if (brother == before_this) return this; |
---|
1087 | if (!brother) return NULp; |
---|
1088 | return brother->predecessor(before_this); |
---|
1089 | } |
---|
1090 | |
---|
1091 | void append(ParagraphTree *new_brother) { |
---|
1092 | if (!brother) brother = new_brother; |
---|
1093 | else brother->append(new_brother); |
---|
1094 | } |
---|
1095 | |
---|
1096 | bool is_some_brother(const ParagraphTree *other) const { |
---|
1097 | return (other == brother) || (brother && brother->is_some_brother(other)); |
---|
1098 | } |
---|
1099 | |
---|
1100 | ParagraphTree* takeAllInFrontOf(ParagraphTree *after) { |
---|
1101 | ParagraphTree *removed = this; |
---|
1102 | ParagraphTree *after_pred = this; |
---|
1103 | |
---|
1104 | h2x_assert(is_some_brother(after)); |
---|
1105 | |
---|
1106 | while (1) { |
---|
1107 | h2x_assert(after_pred); |
---|
1108 | h2x_assert(after_pred->brother); // takeAllInFrontOf called with non-existing 'after' |
---|
1109 | |
---|
1110 | if (after_pred->brother == after) { // found after |
---|
1111 | after_pred->brother = NULp; // unlink |
---|
1112 | break; |
---|
1113 | } |
---|
1114 | after_pred = after_pred->brother; |
---|
1115 | } |
---|
1116 | |
---|
1117 | return removed; |
---|
1118 | } |
---|
1119 | |
---|
1120 | ParagraphTree *firstListMember() { |
---|
1121 | switch (get_type()) { |
---|
1122 | case PLAIN_TEXT: break; |
---|
1123 | case ITEM: return this; |
---|
1124 | case ENUMERATED: { |
---|
1125 | if (get_enumeration() == 1) return this; |
---|
1126 | break; |
---|
1127 | } |
---|
1128 | } |
---|
1129 | if (brother) return brother->firstListMember(); |
---|
1130 | return NULp; |
---|
1131 | } |
---|
1132 | |
---|
1133 | ParagraphTree *nextListMemberAfter(const ParagraphTree& previous) { |
---|
1134 | if (indentation<previous.indentation) return NULp; |
---|
1135 | if (indentation == previous.indentation && get_type() == previous.get_type()) { |
---|
1136 | if (get_type() != ENUMERATED) return this; |
---|
1137 | if (get_enumeration() > previous.get_enumeration()) return this; |
---|
1138 | return NULp; |
---|
1139 | } |
---|
1140 | if (!brother) return NULp; |
---|
1141 | return brother->nextListMemberAfter(previous); |
---|
1142 | } |
---|
1143 | ParagraphTree *nextListMember() const { |
---|
1144 | return brother ? brother->nextListMemberAfter(*this) : NULp; |
---|
1145 | } |
---|
1146 | |
---|
1147 | ParagraphTree* firstWithLessIndentThan(int wanted_indentation) { |
---|
1148 | if (indentation < wanted_indentation) return this; |
---|
1149 | if (!brother) return NULp; |
---|
1150 | return brother->firstWithLessIndentThan(wanted_indentation); |
---|
1151 | } |
---|
1152 | |
---|
1153 | void format_indentations(); |
---|
1154 | void format_lists(); |
---|
1155 | |
---|
1156 | private: |
---|
1157 | static ParagraphTree* buildNewParagraph(const string& Text, size_t beginLineNo, ParagraphType type) { |
---|
1158 | Ostrings S; |
---|
1159 | S.push_back(Ostring(Text, beginLineNo, type)); |
---|
1160 | return new ParagraphTree(S.begin(), S.end()); |
---|
1161 | } |
---|
1162 | ParagraphTree *xml_write_list_contents(); |
---|
1163 | ParagraphTree *xml_write_enum_contents(); |
---|
1164 | void xml_write_textblock(); |
---|
1165 | |
---|
1166 | public: |
---|
1167 | void xml_write(); |
---|
1168 | }; |
---|
1169 | |
---|
1170 | #if defined(DUMP_PARAGRAPHS) |
---|
1171 | static void dump_paragraph(ParagraphTree *para) { |
---|
1172 | // helper function for use in gdb |
---|
1173 | para->dump(cout, 0); |
---|
1174 | } |
---|
1175 | #endif |
---|
1176 | |
---|
1177 | void ParagraphTree::brothers_to_sons(ParagraphTree *new_brother) { |
---|
1178 | /*! folds down brothers to sons |
---|
1179 | * @param new_brother brother of 'this->brother', will become new brother. |
---|
1180 | * If new_brother == NULp -> make all brothers sons. |
---|
1181 | */ |
---|
1182 | |
---|
1183 | if (new_brother) { |
---|
1184 | h2x_assert(is_some_brother(new_brother)); |
---|
1185 | |
---|
1186 | if (brother != new_brother) { |
---|
1187 | #if defined(DEBUG) |
---|
1188 | if (son) { |
---|
1189 | son->attach_warning("Found unexpected son (in brothers_to_sons)"); |
---|
1190 | brother->attach_warning("while trying to transform paragraphs from here .."); |
---|
1191 | new_brother->attach_warning(".. to here .."); |
---|
1192 | attach_warning(".. into sons of this paragraph."); |
---|
1193 | return; |
---|
1194 | } |
---|
1195 | #endif |
---|
1196 | |
---|
1197 | h2x_assert(!son); |
---|
1198 | h2x_assert(brother); |
---|
1199 | |
---|
1200 | if (!new_brother) { // all brothers -> sons |
---|
1201 | son = brother; |
---|
1202 | brother = NULp; |
---|
1203 | } |
---|
1204 | else { |
---|
1205 | son = brother->takeAllInFrontOf(new_brother); |
---|
1206 | brother = new_brother; |
---|
1207 | } |
---|
1208 | } |
---|
1209 | } |
---|
1210 | else { |
---|
1211 | h2x_assert(!son); |
---|
1212 | son = brother; |
---|
1213 | brother = NULp; |
---|
1214 | } |
---|
1215 | } |
---|
1216 | void ParagraphTree::format_lists() { |
---|
1217 | // reformats tree such that all items/enumerations are brothers |
---|
1218 | ParagraphTree *member = firstListMember(); |
---|
1219 | if (member) { |
---|
1220 | for (ParagraphTree *curr = this; curr != member; curr = curr->brother) { |
---|
1221 | h2x_assert(curr); |
---|
1222 | if (curr->son) curr->son->format_lists(); |
---|
1223 | } |
---|
1224 | |
---|
1225 | for (ParagraphTree *next = member->nextListMember(); |
---|
1226 | next; |
---|
1227 | member = next, next = member->nextListMember()) |
---|
1228 | { |
---|
1229 | member->brothers_to_sons(next); |
---|
1230 | h2x_assert(member->brother == next); |
---|
1231 | |
---|
1232 | if (member->son) member->son->format_lists(); |
---|
1233 | } |
---|
1234 | |
---|
1235 | h2x_assert(!member->son); // member is the last item |
---|
1236 | |
---|
1237 | if (member->brother) { |
---|
1238 | ParagraphTree *non_member = member->brother->firstWithLessIndentThan(member->indentation+1); |
---|
1239 | member->brothers_to_sons(non_member); |
---|
1240 | } |
---|
1241 | |
---|
1242 | if (member->son) member->son->format_lists(); |
---|
1243 | if (member->brother) member->brother->format_lists(); |
---|
1244 | } |
---|
1245 | else { |
---|
1246 | for (ParagraphTree *curr = this; curr; curr = curr->brother) { |
---|
1247 | h2x_assert(curr); |
---|
1248 | if (curr->son) curr->son->format_lists(); |
---|
1249 | } |
---|
1250 | } |
---|
1251 | } |
---|
1252 | |
---|
1253 | void ParagraphTree::format_indentations() { |
---|
1254 | if (brother) { |
---|
1255 | ParagraphTree *same_indent = brother->firstWithLessIndentThan(indentation+1); |
---|
1256 | #if defined(WARN_POSSIBLY_WRONG_INDENTATION_CORRECTION) |
---|
1257 | if (same_indent && indentation != same_indent->indentation) { |
---|
1258 | same_indent->attach_warning("indentation is assumed to be same as .."); |
---|
1259 | attach_warning(".. here"); |
---|
1260 | } |
---|
1261 | #endif |
---|
1262 | brothers_to_sons(same_indent); // if same_indent is NULp -> make all brothers childs |
---|
1263 | if (brother) brother->format_indentations(); |
---|
1264 | } |
---|
1265 | |
---|
1266 | if (son) son->format_indentations(); |
---|
1267 | } |
---|
1268 | |
---|
1269 | // ----------------- |
---|
1270 | // LinkType |
---|
1271 | |
---|
1272 | enum LinkType { |
---|
1273 | LT_UNKNOWN = 0, |
---|
1274 | LT_HTTP = 1, |
---|
1275 | LT_HTTPS = 2, |
---|
1276 | LT_FTP = 4, |
---|
1277 | LT_FILE = 8, |
---|
1278 | LT_EMAIL = 16, |
---|
1279 | LT_HLP = 32, |
---|
1280 | LT_PS = 64, |
---|
1281 | LT_PDF = 128, |
---|
1282 | LT_TICKET = 256, |
---|
1283 | }; |
---|
1284 | |
---|
1285 | static const char *link_id[] = { |
---|
1286 | "unknown", |
---|
1287 | "www", // "http:" |
---|
1288 | "www", // "https:" |
---|
1289 | "www", // "ftp:" |
---|
1290 | "www", // "file:" |
---|
1291 | "email", |
---|
1292 | "hlp", |
---|
1293 | "ps", |
---|
1294 | "pdf", |
---|
1295 | "ticket", |
---|
1296 | }; |
---|
1297 | |
---|
1298 | static string LinkType2id(LinkType type) { |
---|
1299 | size_t idx = 0; |
---|
1300 | while (type >= 1) { |
---|
1301 | idx++; |
---|
1302 | type = LinkType(type>>1); |
---|
1303 | } |
---|
1304 | arb_assert(idx<ARRAY_ELEMS(link_id)); |
---|
1305 | return link_id[idx]; |
---|
1306 | } |
---|
1307 | |
---|
1308 | inline const char *getExtension(const string& name) { |
---|
1309 | size_t last_dot = name.find_last_of('.'); |
---|
1310 | if (last_dot == string::npos) { |
---|
1311 | return NULp; |
---|
1312 | } |
---|
1313 | return name.c_str()+last_dot+1; |
---|
1314 | } |
---|
1315 | |
---|
1316 | static LinkType detectLinkType(const string& link_target) { |
---|
1317 | LinkType type = LT_UNKNOWN; |
---|
1318 | const char *ext = getExtension(link_target); |
---|
1319 | |
---|
1320 | if (ext && strcasecmp(ext, "hlp") == 0) type = LT_HLP; |
---|
1321 | else if (link_target.find("http://") == 0) type = LT_HTTP; |
---|
1322 | else if (link_target.find("https://") == 0) type = LT_HTTPS; |
---|
1323 | else if (link_target.find("ftp://") == 0) type = LT_FTP; |
---|
1324 | else if (link_target.find("file://") == 0) type = LT_FILE; |
---|
1325 | else if (link_target.find('@') != string::npos) type = LT_EMAIL; |
---|
1326 | else if (ext && strcasecmp(ext, "ps") == 0) type = LT_PS; |
---|
1327 | else if (ext && strcasecmp(ext, "pdf") == 0) type = LT_PDF; |
---|
1328 | else if (link_target[0] == '#') type = LT_TICKET; |
---|
1329 | |
---|
1330 | return type; |
---|
1331 | } |
---|
1332 | |
---|
1333 | // -------------------------------------------------------------------------------- |
---|
1334 | |
---|
1335 | |
---|
1336 | |
---|
1337 | static string locate_helpfile(const string& helpname) { |
---|
1338 | // search for 'helpname' in various helpfile locations |
---|
1339 | |
---|
1340 | #define PATHS 2 |
---|
1341 | static string path[PATHS] = { "source/", "genhelp/" }; |
---|
1342 | struct stat st; |
---|
1343 | |
---|
1344 | for (size_t p = 0; p<PATHS; p++) { |
---|
1345 | string fullname = path[p]+helpname; |
---|
1346 | if (stat(fullname.c_str(), &st) == 0) { |
---|
1347 | return fullname; |
---|
1348 | } |
---|
1349 | } |
---|
1350 | return ""; |
---|
1351 | #undef PATHS |
---|
1352 | } |
---|
1353 | |
---|
1354 | static string locate_document(const string& docname) { |
---|
1355 | // search for 'docname' or 'docname.gz' in various helpfile locations |
---|
1356 | |
---|
1357 | string located = locate_helpfile(docname); |
---|
1358 | if (located.empty()) { |
---|
1359 | located = locate_helpfile(docname+".gz"); |
---|
1360 | } |
---|
1361 | return located; |
---|
1362 | } |
---|
1363 | |
---|
1364 | static void add_link_attributes(XML_Tag& link, LinkType type, const string& dest, size_t source_line) { |
---|
1365 | if (type == LT_UNKNOWN) { |
---|
1366 | string msg = string("Unknown link type (dest='")+dest+"')"; |
---|
1367 | throw LineAttachedMessage(msg, source_line); |
---|
1368 | } |
---|
1369 | |
---|
1370 | link.add_attribute("dest", dest); |
---|
1371 | link.add_attribute("type", LinkType2id(type)); |
---|
1372 | link.add_attribute("source_line", source_line); |
---|
1373 | |
---|
1374 | if (type&(LT_HLP|LT_PDF|LT_PS)) { // other links (www, email) cannot be checked for existence here |
---|
1375 | string fullhelp = ((type<_HLP) ? locate_helpfile : locate_document)(dest); |
---|
1376 | if (fullhelp.empty()) { |
---|
1377 | link.add_attribute("missing", "1"); |
---|
1378 | string deadlink = strf("Dead link to '%s'", dest.c_str()); |
---|
1379 | #if defined(DEVEL_RELEASE) |
---|
1380 | throw LineAttachedMessage(deadlink, source_line); |
---|
1381 | #else // !defined(DEVEL_RELEASE) |
---|
1382 | add_warning(deadlink, source_line); |
---|
1383 | #endif |
---|
1384 | } |
---|
1385 | } |
---|
1386 | } |
---|
1387 | |
---|
1388 | static void print_XML_Text_expanding_links(const string& text, size_t lineNo) { |
---|
1389 | size_t found = text.find("LINK{", 0); |
---|
1390 | if (found != string::npos) { |
---|
1391 | size_t inside_link = found+5; |
---|
1392 | size_t close = text.find('}', inside_link); |
---|
1393 | |
---|
1394 | if (close == string::npos) throw "unclosed 'LINK{}'"; |
---|
1395 | |
---|
1396 | string link_target = text.substr(inside_link, close-inside_link); |
---|
1397 | LinkType type = detectLinkType(link_target); |
---|
1398 | string dest = link_target; |
---|
1399 | |
---|
1400 | XML_Text(text.substr(0, found)); |
---|
1401 | |
---|
1402 | { |
---|
1403 | XML_Tag link("LINK"); |
---|
1404 | link.set_on_extra_line(false); |
---|
1405 | add_link_attributes(link, type, dest, lineNo); |
---|
1406 | } |
---|
1407 | |
---|
1408 | print_XML_Text_expanding_links(text.substr(close+1), lineNo); |
---|
1409 | } |
---|
1410 | else { |
---|
1411 | XML_Text t(text); |
---|
1412 | } |
---|
1413 | } |
---|
1414 | |
---|
1415 | static string autolink_ticket_references(const string& text) { |
---|
1416 | size_t hashpos = text.find('#'); |
---|
1417 | if (hashpos == string::npos) { |
---|
1418 | return text; |
---|
1419 | } |
---|
1420 | |
---|
1421 | if (!isdigit(text[hashpos+1])) { // char after '#' is no digit = > not a ticketref |
---|
1422 | size_t afterhash = hashpos+1; |
---|
1423 | return |
---|
1424 | text.substr(0, afterhash) + |
---|
1425 | autolink_ticket_references(text.substr(afterhash)); |
---|
1426 | } |
---|
1427 | |
---|
1428 | size_t hashlength = 2; |
---|
1429 | while (isdigit(text[hashpos+hashlength])) ++hashlength; |
---|
1430 | |
---|
1431 | return |
---|
1432 | text.substr(0, hashpos) + |
---|
1433 | "LINK{" + |
---|
1434 | text.substr(hashpos, hashlength) + |
---|
1435 | "}" + |
---|
1436 | autolink_ticket_references(text.substr(hashpos+hashlength)); |
---|
1437 | } |
---|
1438 | |
---|
1439 | inline void print_XML_Text(const string& text, size_t lineNo) { |
---|
1440 | string autolinkedText = autolink_ticket_references(text); |
---|
1441 | print_XML_Text_expanding_links(autolinkedText, lineNo); |
---|
1442 | } |
---|
1443 | |
---|
1444 | void ParagraphTree::xml_write_textblock() { |
---|
1445 | XML_Tag textblock("T"); |
---|
1446 | textblock.add_attribute("reflow", reflow ? "1" : "0"); |
---|
1447 | |
---|
1448 | { |
---|
1449 | string usedText; |
---|
1450 | const string& text = otext; |
---|
1451 | if (reflow) { |
---|
1452 | usedText = correctIndentation(text, (textblock.Indent()+1) * the_XML_Document->indentation_per_level); |
---|
1453 | } |
---|
1454 | else { |
---|
1455 | usedText = text; |
---|
1456 | } |
---|
1457 | print_XML_Text(usedText, otext.get_lineno()); |
---|
1458 | } |
---|
1459 | } |
---|
1460 | |
---|
1461 | ParagraphTree *ParagraphTree::xml_write_list_contents() { |
---|
1462 | h2x_assert(is_itemlist_member()); |
---|
1463 | #if defined(WARN_FIXED_LAYOUT_LIST_ELEMENTS) |
---|
1464 | if (!reflow) attach_warning("ITEM not reflown (check output)"); |
---|
1465 | #endif |
---|
1466 | { |
---|
1467 | XML_Tag entry("ENTRY"); |
---|
1468 | entry.add_attribute("item", "1"); |
---|
1469 | xml_write_textblock(); |
---|
1470 | if (son) son->xml_write(); |
---|
1471 | } |
---|
1472 | if (brother && brother->is_itemlist_member()) { |
---|
1473 | return brother->xml_write_list_contents(); |
---|
1474 | } |
---|
1475 | return brother; |
---|
1476 | } |
---|
1477 | ParagraphTree *ParagraphTree::xml_write_enum_contents() { |
---|
1478 | h2x_assert(get_enumeration()); |
---|
1479 | #if defined(WARN_FIXED_LAYOUT_LIST_ELEMENTS) |
---|
1480 | if (!reflow) attach_warning("ENUMERATED not reflown (check output)"); |
---|
1481 | #endif |
---|
1482 | { |
---|
1483 | XML_Tag entry("ENTRY"); |
---|
1484 | switch (get_enum_type()) { |
---|
1485 | case DIGITS: |
---|
1486 | entry.add_attribute("enumerated", strf("%i", get_enumeration())); |
---|
1487 | break; |
---|
1488 | case ALPHA_UPPER: |
---|
1489 | entry.add_attribute("enumerated", strf("%c", 'A'-1+get_enumeration())); |
---|
1490 | break; |
---|
1491 | case ALPHA_LOWER: |
---|
1492 | entry.add_attribute("enumerated", strf("%c", 'a'-1+get_enumeration())); |
---|
1493 | break; |
---|
1494 | default: |
---|
1495 | h2x_assert(0); |
---|
1496 | break; |
---|
1497 | } |
---|
1498 | xml_write_textblock(); |
---|
1499 | if (son) son->xml_write(); |
---|
1500 | } |
---|
1501 | if (brother && brother->get_enumeration()) { |
---|
1502 | int diff = brother->get_enumeration()-get_enumeration(); |
---|
1503 | if (diff != 1) { |
---|
1504 | attach_warning("Non-consecutive enumeration detected between here.."); |
---|
1505 | brother->attach_warning(".. and here"); |
---|
1506 | } |
---|
1507 | return brother->xml_write_enum_contents(); |
---|
1508 | } |
---|
1509 | return brother; |
---|
1510 | } |
---|
1511 | |
---|
1512 | void ParagraphTree::xml_write() { |
---|
1513 | try { |
---|
1514 | ParagraphTree *next = NULp; |
---|
1515 | if (get_enumeration()) { |
---|
1516 | XML_Tag enu("ENUM"); |
---|
1517 | if (get_enumeration() != 1) { |
---|
1518 | attach_warning(strf("First enum starts with '%u.' (maybe previous enum was not detected)", get_enumeration())); |
---|
1519 | } |
---|
1520 | next = xml_write_enum_contents(); |
---|
1521 | #if defined(WARN_LONESOME_ENUM_ELEMENTS) |
---|
1522 | if (next == brother) attach_warning("Suspicious single-element-ENUM"); |
---|
1523 | #endif |
---|
1524 | } |
---|
1525 | else if (is_itemlist_member()) { |
---|
1526 | XML_Tag list("LIST"); |
---|
1527 | next = xml_write_list_contents(); |
---|
1528 | #if defined(WARN_LONESOME_LIST_ELEMENTS) |
---|
1529 | if (next == brother) attach_warning("Suspicious single-element-LIST"); |
---|
1530 | #endif |
---|
1531 | } |
---|
1532 | else { |
---|
1533 | { |
---|
1534 | XML_Tag para("P"); |
---|
1535 | xml_write_textblock(); |
---|
1536 | if (son) son->xml_write(); |
---|
1537 | } |
---|
1538 | next = brother; |
---|
1539 | } |
---|
1540 | if (next) next->xml_write(); |
---|
1541 | } |
---|
1542 | catch (string& err) { throw attached_message(err); } |
---|
1543 | catch (const char *err) { throw attached_message(err); } |
---|
1544 | } |
---|
1545 | |
---|
1546 | static void create_top_links(const Links& links, const char *tag) { |
---|
1547 | for (Links::const_iterator s = links.begin(); s != links.end(); ++s) { |
---|
1548 | XML_Tag link(tag); |
---|
1549 | add_link_attributes(link, detectLinkType(s->Target()), s->Target(), s->SourceLineno()); |
---|
1550 | } |
---|
1551 | } |
---|
1552 | |
---|
1553 | inline string remove_LF_and_indentation(string paragraph) { |
---|
1554 | // remove linefeeds + spaces behind linefeed (=indentation) |
---|
1555 | size_t pos = 0; |
---|
1556 | while (1) { |
---|
1557 | size_t lf = paragraph.find('\n', pos); |
---|
1558 | if (lf == string::npos) break; // all LFs handled |
---|
1559 | |
---|
1560 | // eliminate spaces before the LF: |
---|
1561 | if (lf>0 && paragraph[lf-1] == ' ') { // LF is preceeded by space(s) |
---|
1562 | size_t sp = lf-1; |
---|
1563 | while (sp>=1 && paragraph[sp-1] == ' ') --sp; // position to 1st space |
---|
1564 | arb_assert(sp<lf); |
---|
1565 | paragraph.erase(sp, lf-sp); |
---|
1566 | lf = sp; |
---|
1567 | } |
---|
1568 | arb_assert(paragraph[lf] == '\n'); |
---|
1569 | |
---|
1570 | size_t ns = paragraph.find(' ', lf); // next space |
---|
1571 | if (ns != lf+1) { // no space after LF |
---|
1572 | paragraph[lf] = ' '; // -> replace LF by single space |
---|
1573 | pos = lf+1; |
---|
1574 | } |
---|
1575 | else { |
---|
1576 | size_t as = paragraph.find_first_not_of(' ', ns); // pos after consecutive space(s) |
---|
1577 | size_t ls = as == string::npos ? ns : as-1; // last consecutive space |
---|
1578 | paragraph.erase(lf, ls-lf); // keep one space (between concatenated line contents) |
---|
1579 | } |
---|
1580 | } |
---|
1581 | // remove trailing spaces: |
---|
1582 | size_t ls = paragraph.find_last_not_of(' '); |
---|
1583 | if (ls == string::npos) { // only spaces |
---|
1584 | paragraph.clear(); |
---|
1585 | } |
---|
1586 | else { |
---|
1587 | ++ls; |
---|
1588 | paragraph.erase(ls, paragraph.length()-ls); |
---|
1589 | } |
---|
1590 | return paragraph; |
---|
1591 | } |
---|
1592 | |
---|
1593 | void Helpfile::writeXML(FILE *out, const string& page_name) { |
---|
1594 | XML_Document xml("PAGE", "arb_help.dtd", out); |
---|
1595 | |
---|
1596 | xml.skip_empty_tags = true; |
---|
1597 | xml.indentation_per_level = 2; |
---|
1598 | |
---|
1599 | xml.getRoot().add_attribute("name", page_name); |
---|
1600 | #if defined(DEBUG) |
---|
1601 | xml.getRoot().add_attribute("edit_warning", "devel"); // inserts a edit warning into development version |
---|
1602 | #else |
---|
1603 | xml.getRoot().add_attribute("edit_warning", "release"); // inserts a different edit warning into release version |
---|
1604 | #endif // DEBUG |
---|
1605 | |
---|
1606 | xml.getRoot().add_attribute("source", inputfile.c_str()); |
---|
1607 | |
---|
1608 | { |
---|
1609 | XML_Comment(string("automatically generated from ../")+inputfile+' '); |
---|
1610 | } |
---|
1611 | |
---|
1612 | create_top_links(uplinks, "UP"); |
---|
1613 | create_top_links(references, "SUB"); |
---|
1614 | create_top_links(auto_references, "SUB"); |
---|
1615 | |
---|
1616 | try { |
---|
1617 | string titleText, subtitleText; |
---|
1618 | |
---|
1619 | const Ostrings& T = title.Content(); |
---|
1620 | Ostrings::const_iterator s = T.begin(); |
---|
1621 | |
---|
1622 | if (s != T.end()) titleText = *s++; |
---|
1623 | |
---|
1624 | bool subtitleAdded = false; // @@@ not needed! (use !subtitleText.empty()) |
---|
1625 | for (; s != T.end(); ++s) { |
---|
1626 | if (s->get_type() != PLAIN_TEXT) { |
---|
1627 | throw s->attached_message("wrong paragraph type (plain text expected)"); |
---|
1628 | } |
---|
1629 | string text = s->as_string(); |
---|
1630 | if (!text.empty()) { // ignore empty lines |
---|
1631 | text = eatWhitespace(text.c_str()); |
---|
1632 | if (!text.empty()) { |
---|
1633 | if (subtitleAdded) throw s->attached_message("only one subtitle accepted"); |
---|
1634 | |
---|
1635 | text = remove_LF_and_indentation(text); |
---|
1636 | |
---|
1637 | if (text.length()>MAX_SUBTITLE_CHARS) { |
---|
1638 | s->attach_warning(strf("subtitle too verbose (max. %i chars allowed; found %zu)", MAX_SUBTITLE_CHARS, text.length())); |
---|
1639 | } |
---|
1640 | subtitleText = text; |
---|
1641 | subtitleAdded = true; // accept only one line |
---|
1642 | } |
---|
1643 | } |
---|
1644 | } |
---|
1645 | |
---|
1646 | { |
---|
1647 | XML_Tag title_tag("TITLE"); { XML_Text text(titleText); } |
---|
1648 | } |
---|
1649 | if (!subtitleText.empty()) { |
---|
1650 | XML_Tag title_tag("SUBTITLE"); { XML_Text text(subtitleText); } |
---|
1651 | } |
---|
1652 | |
---|
1653 | } |
---|
1654 | catch (string& err) { throw title.attached_message(err); } |
---|
1655 | catch (const char *err) { throw title.attached_message(err); } |
---|
1656 | |
---|
1657 | for (SectionList::const_iterator sec = sections.begin(); sec != sections.end(); ++sec) { |
---|
1658 | try { |
---|
1659 | XML_Tag section_tag("SECTION"); |
---|
1660 | section_tag.add_attribute("name", sec->getName()); |
---|
1661 | |
---|
1662 | ParagraphTree *ptree = ParagraphTree::buildParagraphTree(*sec); |
---|
1663 | |
---|
1664 | #if defined(DEBUG) |
---|
1665 | size_t textnodes = ptree->countTextNodes(); |
---|
1666 | #endif |
---|
1667 | #if defined(DUMP_PARAGRAPHS) |
---|
1668 | cout << "Dump of section '" << sec->getName() << "' (before format_lists):\n"; |
---|
1669 | ptree->dump(cout); |
---|
1670 | cout << "----------------------------------------\n"; |
---|
1671 | #endif |
---|
1672 | |
---|
1673 | ptree->format_lists(); |
---|
1674 | |
---|
1675 | #if defined(DUMP_PARAGRAPHS) |
---|
1676 | cout << "Dump of section '" << sec->getName() << "' (after format_lists):\n"; |
---|
1677 | ptree->dump(cout); |
---|
1678 | cout << "----------------------------------------\n"; |
---|
1679 | #endif |
---|
1680 | #if defined(DEBUG) |
---|
1681 | size_t textnodes2 = ptree->countTextNodes(); |
---|
1682 | h2x_assert(textnodes2 == textnodes); // if this occurs format_lists has an error |
---|
1683 | #endif |
---|
1684 | |
---|
1685 | ptree->format_indentations(); |
---|
1686 | |
---|
1687 | #if defined(DUMP_PARAGRAPHS) |
---|
1688 | cout << "Dump of section '" << sec->getName() << "' (after format_indentations):\n"; |
---|
1689 | ptree->dump(cout); |
---|
1690 | cout << "----------------------------------------\n"; |
---|
1691 | #endif |
---|
1692 | #if defined(DEBUG) |
---|
1693 | size_t textnodes3 = ptree->countTextNodes(); |
---|
1694 | h2x_assert(textnodes3 == textnodes2); // if this occurs format_indentations has an error |
---|
1695 | #endif |
---|
1696 | |
---|
1697 | ptree->xml_write(); |
---|
1698 | |
---|
1699 | delete ptree; |
---|
1700 | } |
---|
1701 | catch (string& err) { throw sec->attached_message(err); } |
---|
1702 | catch (const char *err) { throw sec->attached_message(err); } |
---|
1703 | } |
---|
1704 | } |
---|
1705 | |
---|
1706 | void Helpfile::extractInternalLinks() { |
---|
1707 | for (SectionList::const_iterator sec = sections.begin(); sec != sections.end(); ++sec) { |
---|
1708 | try { |
---|
1709 | const Ostrings& s = sec->Content(); |
---|
1710 | |
---|
1711 | for (Ostrings::const_iterator li = s.begin(); li != s.end(); ++li) { |
---|
1712 | const string& line = *li; |
---|
1713 | size_t start = 0; |
---|
1714 | |
---|
1715 | while (1) { |
---|
1716 | size_t found = line.find("LINK{", start); |
---|
1717 | if (found == string::npos) break; |
---|
1718 | found += 5; |
---|
1719 | size_t close = line.find('}', found); |
---|
1720 | if (close == string::npos) break; |
---|
1721 | |
---|
1722 | string link_target = line.substr(found, close-found); |
---|
1723 | |
---|
1724 | if (link_target.find("http://") == string::npos && |
---|
1725 | link_target.find("https://")== string::npos && |
---|
1726 | link_target.find("ftp://") == string::npos && |
---|
1727 | link_target.find("file://") == string::npos && |
---|
1728 | link_target.find('@') == string::npos) |
---|
1729 | { |
---|
1730 | check_self_ref(link_target); |
---|
1731 | |
---|
1732 | try { |
---|
1733 | check_specific_duplicates(link_target, references, false); // check only sublinks here |
---|
1734 | check_specific_duplicates(link_target, uplinks, false); // check only uplinks here |
---|
1735 | check_specific_duplicates(link_target, auto_references, false); // check only sublinks here |
---|
1736 | |
---|
1737 | // only auto-add inline reference if none of the above checks has thrown |
---|
1738 | auto_references.push_back(Link(link_target, sec->line_number())); |
---|
1739 | } |
---|
1740 | catch (string& err) { |
---|
1741 | ; // silently ignore inlined |
---|
1742 | } |
---|
1743 | } |
---|
1744 | start = close+1; |
---|
1745 | } |
---|
1746 | } |
---|
1747 | } |
---|
1748 | catch (string& err) { |
---|
1749 | throw sec->attached_message("'"+err+"' while scanning LINK{}"); |
---|
1750 | } |
---|
1751 | } |
---|
1752 | } |
---|
1753 | |
---|
1754 | static void show_err(const string& err, size_t lineno, const string& helpfile) { |
---|
1755 | if (err.find(helpfile+':') != string::npos) { |
---|
1756 | cerr << err; |
---|
1757 | } |
---|
1758 | else if (lineno == NO_LINENUMBER_INFO) { |
---|
1759 | cerr << helpfile << ":1: [in unknown line] " << err; |
---|
1760 | } |
---|
1761 | else { |
---|
1762 | cerr << helpfile << ":" << lineno << ": " << err; |
---|
1763 | } |
---|
1764 | cerr << '\n'; |
---|
1765 | } |
---|
1766 | inline void show_err(const LineAttachedMessage& line_err, const string& helpfile) { |
---|
1767 | show_err(line_err.Message(), line_err.Lineno(), helpfile); |
---|
1768 | } |
---|
1769 | inline void show_warning(const LineAttachedMessage& line_err, const string& helpfile) { |
---|
1770 | show_err(string("Warning: ")+line_err.Message(), line_err.Lineno(), helpfile); |
---|
1771 | } |
---|
1772 | inline void show_warnings(const string& helpfile) { |
---|
1773 | for (list<LineAttachedMessage>::const_iterator wi = warnings.begin(); wi != warnings.end(); ++wi) { |
---|
1774 | show_warning(*wi, helpfile); |
---|
1775 | } |
---|
1776 | } |
---|
1777 | static void show_error_and_warnings(const LineAttachedMessage& error, const string& helpfile) { |
---|
1778 | show_err(error, helpfile); |
---|
1779 | show_warnings(helpfile); |
---|
1780 | } |
---|
1781 | |
---|
1782 | int ARB_main(int argc, char *argv[]) { |
---|
1783 | if (argc != 3) { |
---|
1784 | cerr << "Usage: arb_help2xml <ARB helpfile> <XML output>\n"; |
---|
1785 | return EXIT_FAILURE; |
---|
1786 | } |
---|
1787 | |
---|
1788 | Helpfile help; |
---|
1789 | string arb_help; |
---|
1790 | |
---|
1791 | try { |
---|
1792 | try { |
---|
1793 | arb_help = argv[1]; |
---|
1794 | string xml_output = argv[2]; |
---|
1795 | |
---|
1796 | { |
---|
1797 | ifstream in(arb_help.c_str()); |
---|
1798 | help.readHelp(in, arb_help); |
---|
1799 | } |
---|
1800 | |
---|
1801 | help.extractInternalLinks(); |
---|
1802 | |
---|
1803 | { |
---|
1804 | FILE *out = std::fopen(xml_output.c_str(), "wt"); |
---|
1805 | if (!out) throw string("Can't open '")+xml_output+'\''; |
---|
1806 | |
---|
1807 | try { |
---|
1808 | // arb_help contains 'source/name.hlp' |
---|
1809 | size_t slash = arb_help.find('/'); |
---|
1810 | size_t dot = arb_help.find_last_of('.'); |
---|
1811 | |
---|
1812 | if (slash == string::npos || dot == string::npos) { |
---|
1813 | throw string("parameter <ARB helpfile> has to be in format 'source/name.hlp' (not '"+arb_help+"')"); |
---|
1814 | } |
---|
1815 | |
---|
1816 | string page_name(arb_help, slash+1, dot-slash-1); |
---|
1817 | help.writeXML(out, page_name); |
---|
1818 | fclose(out); |
---|
1819 | } |
---|
1820 | catch (...) { |
---|
1821 | fclose(out); |
---|
1822 | remove(xml_output.c_str()); |
---|
1823 | throw; |
---|
1824 | } |
---|
1825 | } |
---|
1826 | |
---|
1827 | show_warnings(arb_help); |
---|
1828 | |
---|
1829 | return EXIT_SUCCESS; |
---|
1830 | } |
---|
1831 | catch (string& err) { throw unattached_message(err); } |
---|
1832 | catch (const char * err) { throw unattached_message(err); } |
---|
1833 | catch (LineAttachedMessage& err) { throw; } |
---|
1834 | catch (...) { throw unattached_message("unknown exception in arb_help2xml"); } |
---|
1835 | } |
---|
1836 | catch (LineAttachedMessage& err) { show_error_and_warnings(err, arb_help); } |
---|
1837 | catch (...) { h2x_assert(0); } |
---|
1838 | |
---|
1839 | return EXIT_FAILURE; |
---|
1840 | } |
---|
1841 | |
---|
1842 | // -------------------------------------------------------------------------------- |
---|
1843 | |
---|
1844 | #ifdef UNIT_TESTS |
---|
1845 | #include <test_unit.h> |
---|
1846 | #include <arb_msg.h> |
---|
1847 | #include <arb_file.h> |
---|
1848 | |
---|
1849 | // Hint: you may set ONLY_DO_UNITTEST = 1 to speed up code/test-cycle |
---|
1850 | // see ./Makefile@ONLY_DO_UNITTEST |
---|
1851 | |
---|
1852 | #define TEST_REMOVE_LF_AND_INDENTATION(i,want) TEST_EXPECT_EQUAL(remove_LF_and_indentation(i).c_str(), want) |
---|
1853 | #define TEST_REMOVE_LF_AND_INDENTATION__BROKEN(i,want,got) TEST_EXPECT_EQUAL__BROKEN(remove_LF_and_indentation(i).c_str(), want, got) |
---|
1854 | |
---|
1855 | void TEST_remove_LF_and_indentation() { |
---|
1856 | TEST_REMOVE_LF_AND_INDENTATION("", |
---|
1857 | ""); |
---|
1858 | |
---|
1859 | TEST_REMOVE_LF_AND_INDENTATION(" \n \n \n ", |
---|
1860 | ""); |
---|
1861 | TEST_REMOVE_LF_AND_INDENTATION("hello\nNewline", |
---|
1862 | "hello Newline"); |
---|
1863 | TEST_REMOVE_LF_AND_INDENTATION("hello\nNewline\n 1\n2 \n 3 \n4\n5\n 6 \n 7 \n 8\n", |
---|
1864 | "hello Newline 1 2 3 4 5 6 7 8"); |
---|
1865 | |
---|
1866 | TEST_REMOVE_LF_AND_INDENTATION("Visualization of Three-dimensional\n structure of small subunit (16S) rRNA", |
---|
1867 | "Visualization of Three-dimensional structure of small subunit (16S) rRNA"); |
---|
1868 | } |
---|
1869 | |
---|
1870 | static arb_test::match_expectation help_file_compiles(const char *helpfile, const char *expected_title, const char *expected_error_part) { |
---|
1871 | using namespace arb_test; |
---|
1872 | expectation_group expected; |
---|
1873 | |
---|
1874 | ifstream in(helpfile); |
---|
1875 | |
---|
1876 | LineAttachedMessage *error = NULp; |
---|
1877 | |
---|
1878 | Helpfile help; |
---|
1879 | try { |
---|
1880 | help.readHelp(in, helpfile); |
---|
1881 | help.extractInternalLinks(); |
---|
1882 | |
---|
1883 | FILE *devnul = fopen("/dev/null", "wt"); |
---|
1884 | if (!devnul) throw unattached_message("can't write to null device"); |
---|
1885 | help.writeXML(devnul, "dummy"); |
---|
1886 | fclose(devnul); |
---|
1887 | } |
---|
1888 | catch (LineAttachedMessage& err) { error = new LineAttachedMessage(err); } |
---|
1889 | catch (...) { error = new LineAttachedMessage(unattached_message("unknown exception")); } |
---|
1890 | |
---|
1891 | if (expected_error_part) { |
---|
1892 | expected.add(that(error).does_differ_from_NULL()); |
---|
1893 | if (error) expected.add(that(error->Message()).does_contain(expected_error_part)); |
---|
1894 | } |
---|
1895 | else { |
---|
1896 | expected.add(that(error).is_equal_to_NULL()); |
---|
1897 | if (!error) { |
---|
1898 | Section title = help.get_title(); |
---|
1899 | const Ostrings& title_strings = title.Content(); |
---|
1900 | |
---|
1901 | expected.add(that(title_strings.front().as_string()).is_equal_to(expected_title)); |
---|
1902 | expected.add(that(title_strings.size()).is_equal_to(1)); |
---|
1903 | } |
---|
1904 | else { |
---|
1905 | show_error_and_warnings(*error, helpfile); |
---|
1906 | } |
---|
1907 | } |
---|
1908 | |
---|
1909 | delete error; |
---|
1910 | |
---|
1911 | return all().ofgroup(expected); |
---|
1912 | } |
---|
1913 | |
---|
1914 | #define HELP_FILE_COMPILES(name,expTitle) TEST_EXPECTATION(help_file_compiles(name,expTitle,NULp)) |
---|
1915 | #define HELP_FILE_COMPILE_ERROR(name,expError) TEST_EXPECTATION(help_file_compiles(name,NULp,expError)) |
---|
1916 | |
---|
1917 | void TEST_hlp2xml_conversion() { |
---|
1918 | TEST_EXPECT_ZERO(chdir("../../HELP_SOURCE")); |
---|
1919 | |
---|
1920 | HELP_FILE_COMPILES("genhelp/agde_treepuzzle.hlp", "treepuzzle"); // genhelp/agde_treepuzzle.hlp |
---|
1921 | |
---|
1922 | HELP_FILE_COMPILES("source/markbyref.hlp", "Mark by reference"); // source/markbyref.hlp |
---|
1923 | HELP_FILE_COMPILES("source/ad_align.hlp", "Alignment Administration"); // source/ad_align.hlp |
---|
1924 | HELP_FILE_COMPILES("genhelp/copyright.hlp", "Copyrights and licenses"); // genhelp/copyright.hlp |
---|
1925 | |
---|
1926 | // @@@ add test for helpfile with subtitle |
---|
1927 | |
---|
1928 | HELP_FILE_COMPILE_ERROR("akjsdlkad.hlp", "Can't read from"); // no such file |
---|
1929 | } |
---|
1930 | TEST_PUBLISH(TEST_hlp2xml_conversion); |
---|
1931 | |
---|
1932 | |
---|
1933 | // #define TEST_AUTO_UPDATE // uncomment to update expected xml // @@@ comment-out! |
---|
1934 | |
---|
1935 | void TEST_hlp2xml_output() { |
---|
1936 | string tested_helpfile[] = { |
---|
1937 | "unittest" |
---|
1938 | }; |
---|
1939 | |
---|
1940 | string HELP_SOURCE = "../../HELP_SOURCE/"; |
---|
1941 | string LIB = "../../lib/"; |
---|
1942 | string EXPECTED = "help/"; |
---|
1943 | |
---|
1944 | for (size_t i = 0; i<ARRAY_ELEMS(tested_helpfile); ++i) { |
---|
1945 | string xml = HELP_SOURCE + "Xml/" + tested_helpfile[i] + ".xml"; |
---|
1946 | string html = LIB + "help_html/" + tested_helpfile[i] + ".html"; |
---|
1947 | string hlp = LIB + "help/" + tested_helpfile[i] + ".hlp"; |
---|
1948 | |
---|
1949 | string xml_expected = EXPECTED + tested_helpfile[i] + ".xml"; |
---|
1950 | string html_expected = EXPECTED + tested_helpfile[i] + ".html"; |
---|
1951 | string hlp_expected = EXPECTED + tested_helpfile[i] + ".hlp"; |
---|
1952 | |
---|
1953 | |
---|
1954 | #if defined(TEST_AUTO_UPDATE) |
---|
1955 | # if defined(NDEBUG) |
---|
1956 | # error please use auto-update only in DEBUG mode |
---|
1957 | # endif |
---|
1958 | TEST_COPY_FILE(xml.c_str(), xml_expected.c_str()); |
---|
1959 | TEST_COPY_FILE(html.c_str(), html_expected.c_str()); |
---|
1960 | TEST_COPY_FILE(hlp.c_str(), hlp_expected.c_str()); |
---|
1961 | |
---|
1962 | #else // !defined(TEST_AUTO_UPDATE) |
---|
1963 | |
---|
1964 | # if defined(DEBUG) |
---|
1965 | int expected_xml_difflines = 0; |
---|
1966 | int expected_hlp_difflines = 0; |
---|
1967 | # else // !defined(DEBUG) |
---|
1968 | int expected_xml_difflines = 1; // value of "edit_warning" differs - see .@edit_warning |
---|
1969 | int expected_hlp_difflines = 2; // resulting warning in helpfile |
---|
1970 | # endif |
---|
1971 | TEST_EXPECT_TEXTFILE_DIFFLINES(xml.c_str(), xml_expected.c_str(), expected_xml_difflines); |
---|
1972 | TEST_EXPECT_TEXTFILE_DIFFLINES_IGNORE_DATES(html.c_str(), html_expected.c_str(), 0); // html contains the update-date |
---|
1973 | TEST_EXPECT_TEXTFILE_DIFFLINES(hlp.c_str(), hlp_expected.c_str(), expected_hlp_difflines); |
---|
1974 | #endif |
---|
1975 | } |
---|
1976 | } |
---|
1977 | |
---|
1978 | |
---|
1979 | #if defined(PROTECT_HELP_VS_CHANGES) |
---|
1980 | void TEST_protect_help_vs_changes() { // should normally be disabled |
---|
1981 | // fails if help changes compared to another checkout |
---|
1982 | // or just updates the diff w/o failing (if you comment out the last line) |
---|
1983 | // |
---|
1984 | // if the patch is hugo and you load it into xemacs |
---|
1985 | // you might want to (turn-on-lazy-shot) |
---|
1986 | // |
---|
1987 | // patch-pointer: ../UNIT_TESTER/run/help_changes.patch |
---|
1988 | |
---|
1989 | bool do_help = true; |
---|
1990 | bool do_html = true; |
---|
1991 | |
---|
1992 | const char *ref_WC = "ARB.help.ref"; |
---|
1993 | |
---|
1994 | // ---------------------------------------- config above |
---|
1995 | |
---|
1996 | string this_base = "../.."; |
---|
1997 | string ref_base = this_base+"/../"+ref_WC; |
---|
1998 | string to_help = "/lib/help"; |
---|
1999 | string to_html = "/lib/help_html"; |
---|
2000 | string diff_help = "diff -u "+ref_base+to_help+" "+this_base+to_help; |
---|
2001 | string diff_html = "diff -u "+ref_base+to_html+" "+this_base+to_html; |
---|
2002 | |
---|
2003 | string update_cmd; |
---|
2004 | |
---|
2005 | if (do_help) { |
---|
2006 | if (do_html) update_cmd = string("(")+diff_help+";"+diff_html+")"; |
---|
2007 | else update_cmd = diff_help; |
---|
2008 | } |
---|
2009 | else if (do_html) update_cmd = diff_html; |
---|
2010 | |
---|
2011 | string patch = "help_changes.patch"; |
---|
2012 | update_cmd += " >"+patch+" ||true"; |
---|
2013 | |
---|
2014 | string fail_on_change_cmd = "test \"`cat "+patch+" | grep -v '^Common subdirectories' | wc -l`\" = \"0\" || ( echo \"Error: Help changed\"; false)"; |
---|
2015 | |
---|
2016 | TEST_EXPECT_NO_ERROR(GBK_system(update_cmd.c_str())); |
---|
2017 | TEST_EXPECT_NO_ERROR(GBK_system(fail_on_change_cmd.c_str())); // @@@ uncomment before commit |
---|
2018 | } |
---|
2019 | #endif |
---|
2020 | |
---|
2021 | #endif // UNIT_TESTS |
---|