| 1 | // ==================================================================== // |
|---|
| 2 | // // |
|---|
| 3 | // File : arb_help2xml.cxx // |
|---|
| 4 | // Purpose : Converts old ARB help format to XML // |
|---|
| 5 | // // |
|---|
| 6 | // Coded by Ralf Westram (coder@reallysoft.de) in October 2001 // |
|---|
| 7 | // Copyright Department of Microbiology (Technical University Munich) // |
|---|
| 8 | // // |
|---|
| 9 | // Visit our web site at: http://www.arb-home.de/ // |
|---|
| 10 | // // |
|---|
| 11 | // ==================================================================== // |
|---|
| 12 | |
|---|
| 13 | #include <xml.hxx> |
|---|
| 14 | #include <arb_defs.h> |
|---|
| 15 | #include <arb_diff.h> |
|---|
| 16 | #include <static_assert.h> |
|---|
| 17 | |
|---|
| 18 | #include <list> |
|---|
| 19 | #include <set> |
|---|
| 20 | #include <iostream> |
|---|
| 21 | #include <fstream> |
|---|
| 22 | |
|---|
| 23 | #include <cstdlib> |
|---|
| 24 | #include <cstdarg> |
|---|
| 25 | #include <cstring> |
|---|
| 26 | #include <climits> |
|---|
| 27 | |
|---|
| 28 | #include <unistd.h> |
|---|
| 29 | #include <sys/stat.h> |
|---|
| 30 | |
|---|
| 31 | using namespace std; |
|---|
| 32 | |
|---|
| 33 | #define h2x_assert(bed) arb_assert(bed) |
|---|
| 34 | |
|---|
| 35 | // Limit the length of the TITLE/SUBTITLE of helppages. |
|---|
| 36 | // - TITLE has to fit into UP/SUB subwindows of arb internal help window |
|---|
| 37 | // - SUBTITLE has to fit into default help-textsubwindow width |
|---|
| 38 | #define MAX_TITLE_CHARS 42 |
|---|
| 39 | #define MAX_SUBTITLE_CHARS 75 |
|---|
| 40 | |
|---|
| 41 | #if defined(DEBUG) |
|---|
| 42 | #define WARN_FORMATTING_PROBLEMS |
|---|
| 43 | #define WARN_MISSING_HELP |
|---|
| 44 | // #define DUMP_PARAGRAPHS |
|---|
| 45 | // #define PROTECT_HELP_VS_CHANGES |
|---|
| 46 | #endif // DEBUG |
|---|
| 47 | |
|---|
| 48 | |
|---|
| 49 | #if defined(WARN_FORMATTING_PROBLEMS) |
|---|
| 50 | |
|---|
| 51 | #define WARN_FIXED_LAYOUT_LIST_ELEMENTS |
|---|
| 52 | #define WARN_LONESOME_ENUM_ELEMENTS |
|---|
| 53 | |
|---|
| 54 | // warnings below are useless for production and shall be disabled in SVN |
|---|
| 55 | // #define WARN_LONESOME_LIST_ELEMENTS |
|---|
| 56 | // #define WARN_POSSIBLY_WRONG_INDENTATION_CORRECTION |
|---|
| 57 | // #define WARN_IGNORED_ALPHA_ENUMS |
|---|
| 58 | |
|---|
| 59 | #endif |
|---|
| 60 | |
|---|
| 61 | |
|---|
| 62 | #define MAX_LINE_LENGTH 200 // maximum length of lines in input stream |
|---|
| 63 | #define TABSIZE 8 |
|---|
| 64 | |
|---|
| 65 | static const char *knownSections[] = { |
|---|
| 66 | "OCCURRENCE", |
|---|
| 67 | "DESCRIPTION", |
|---|
| 68 | "NOTES", |
|---|
| 69 | "EXAMPLES", |
|---|
| 70 | "WARNINGS", |
|---|
| 71 | "BUGS", |
|---|
| 72 | "SECTION", |
|---|
| 73 | }; |
|---|
| 74 | |
|---|
| 75 | enum SectionType { |
|---|
| 76 | SEC_OCCURRENCE, |
|---|
| 77 | SEC_DESCRIPTION, |
|---|
| 78 | SEC_NOTES, |
|---|
| 79 | SEC_EXAMPLES, |
|---|
| 80 | SEC_WARNINGS, |
|---|
| 81 | SEC_BUGS, |
|---|
| 82 | SEC_SECTION, |
|---|
| 83 | |
|---|
| 84 | KNOWN_SECTION_TYPES, |
|---|
| 85 | SEC_NONE, |
|---|
| 86 | SEC_FAKE, |
|---|
| 87 | }; |
|---|
| 88 | |
|---|
| 89 | STATIC_ASSERT(ARRAY_ELEMS(knownSections) == KNOWN_SECTION_TYPES); |
|---|
| 90 | |
|---|
| 91 | __ATTR__VFORMAT(1) static string vstrf(const char *format, va_list argPtr) { |
|---|
| 92 | static size_t buf_size = 256; |
|---|
| 93 | static char *buffer = new char[buf_size]; |
|---|
| 94 | |
|---|
| 95 | size_t length; |
|---|
| 96 | while (1) { |
|---|
| 97 | if (!buffer) { |
|---|
| 98 | h2x_assert(buffer); // to stop when debugging |
|---|
| 99 | throw string("out of memory"); |
|---|
| 100 | } |
|---|
| 101 | |
|---|
| 102 | length = vsnprintf(buffer, buf_size, format, argPtr); |
|---|
| 103 | if (length < buf_size) break; // string fits into current buffer |
|---|
| 104 | |
|---|
| 105 | // otherwise resize buffer : |
|---|
| 106 | buf_size += buf_size/2; |
|---|
| 107 | delete [] buffer; |
|---|
| 108 | buffer = new char[buf_size]; |
|---|
| 109 | } |
|---|
| 110 | |
|---|
| 111 | return string(buffer, length); |
|---|
| 112 | } |
|---|
| 113 | |
|---|
| 114 | __ATTR__FORMAT(1) static string strf(const char *format, ...) { |
|---|
| 115 | va_list argPtr; |
|---|
| 116 | va_start(argPtr, format); |
|---|
| 117 | string result = vstrf(format, argPtr); |
|---|
| 118 | va_end(argPtr); |
|---|
| 119 | |
|---|
| 120 | return result; |
|---|
| 121 | } |
|---|
| 122 | |
|---|
| 123 | // ----------------------------- |
|---|
| 124 | // warnings and errors |
|---|
| 125 | |
|---|
| 126 | class LineAttachedMessage { |
|---|
| 127 | string message; |
|---|
| 128 | size_t lineno; |
|---|
| 129 | |
|---|
| 130 | public: |
|---|
| 131 | LineAttachedMessage(const string& message_, size_t lineno_) : |
|---|
| 132 | message(message_), |
|---|
| 133 | lineno(lineno_) |
|---|
| 134 | {} |
|---|
| 135 | |
|---|
| 136 | const string& Message() const { return message; } |
|---|
| 137 | size_t Lineno() const { return lineno; } |
|---|
| 138 | }; |
|---|
| 139 | |
|---|
| 140 | const size_t NO_LINENUMBER_INFO = -1U; |
|---|
| 141 | |
|---|
| 142 | LineAttachedMessage unattached_message(const string& message) { return LineAttachedMessage(message, NO_LINENUMBER_INFO); } |
|---|
| 143 | |
|---|
| 144 | |
|---|
| 145 | static list<LineAttachedMessage> warnings; |
|---|
| 146 | inline void add_warning(const LineAttachedMessage& laMsg) { |
|---|
| 147 | warnings.push_back(laMsg); |
|---|
| 148 | } |
|---|
| 149 | inline void add_warning(const string& warning, size_t lineno) { |
|---|
| 150 | add_warning(LineAttachedMessage(warning, lineno)); |
|---|
| 151 | } |
|---|
| 152 | |
|---|
| 153 | struct MessageAttachable { |
|---|
| 154 | virtual ~MessageAttachable() {} |
|---|
| 155 | |
|---|
| 156 | virtual string location_description() const = 0; // may return empty string |
|---|
| 157 | virtual size_t line_number() const = 0; // if unknown -> should return NO_LINENUMBER_INFO |
|---|
| 158 | |
|---|
| 159 | LineAttachedMessage attached_message(const string& message) const { |
|---|
| 160 | string where = location_description(); |
|---|
| 161 | if (where.empty()) return LineAttachedMessage(message, line_number()); |
|---|
| 162 | return LineAttachedMessage(message+" ["+where+"]", line_number()); |
|---|
| 163 | } |
|---|
| 164 | void attach_warning(const string& message) const { |
|---|
| 165 | add_warning(attached_message(message)); |
|---|
| 166 | } |
|---|
| 167 | }; |
|---|
| 168 | |
|---|
| 169 | |
|---|
| 170 | // ---------------------- |
|---|
| 171 | // class Reader |
|---|
| 172 | |
|---|
| 173 | class Reader : public MessageAttachable { |
|---|
| 174 | private: |
|---|
| 175 | istream& in; |
|---|
| 176 | char lineBuffer[MAX_LINE_LENGTH]; |
|---|
| 177 | char lineBuffer2[MAX_LINE_LENGTH]; |
|---|
| 178 | bool readAgain; |
|---|
| 179 | bool eof; |
|---|
| 180 | int lineNo; |
|---|
| 181 | |
|---|
| 182 | string location_description() const OVERRIDE { return ""; } |
|---|
| 183 | size_t line_number() const OVERRIDE { return lineNo; } |
|---|
| 184 | |
|---|
| 185 | void getline() { |
|---|
| 186 | if (!eof) { |
|---|
| 187 | if (in.eof()) eof = true; |
|---|
| 188 | else { |
|---|
| 189 | h2x_assert(in.good()); |
|---|
| 190 | |
|---|
| 191 | in.getline(lineBuffer, MAX_LINE_LENGTH); |
|---|
| 192 | lineNo++; |
|---|
| 193 | |
|---|
| 194 | if (in.eof()) eof = true; |
|---|
| 195 | else if (in.fail()) throw "line too long"; |
|---|
| 196 | |
|---|
| 197 | if (strchr(lineBuffer, '\t')) { |
|---|
| 198 | int o2 = 0; |
|---|
| 199 | |
|---|
| 200 | for (int o = 0; lineBuffer[o]; ++o) { |
|---|
| 201 | if (lineBuffer[o] == '\t') { |
|---|
| 202 | int spaces = TABSIZE - (o2 % TABSIZE); |
|---|
| 203 | while (spaces--) lineBuffer2[o2++] = ' '; |
|---|
| 204 | } |
|---|
| 205 | else { |
|---|
| 206 | lineBuffer2[o2++] = lineBuffer[o]; |
|---|
| 207 | } |
|---|
| 208 | } |
|---|
| 209 | lineBuffer2[o2] = 0; |
|---|
| 210 | strcpy(lineBuffer, lineBuffer2); |
|---|
| 211 | } |
|---|
| 212 | |
|---|
| 213 | char *eol = strchr(lineBuffer, 0)-1; |
|---|
| 214 | while (eol >= lineBuffer && isspace(eol[0])) { |
|---|
| 215 | eol[0] = 0; // trim trailing whitespace |
|---|
| 216 | eol--; |
|---|
| 217 | } |
|---|
| 218 | if (eol > lineBuffer) { |
|---|
| 219 | // now eol points to last character |
|---|
| 220 | if (eol[0] == '-' && isalnum(eol[-1])) { |
|---|
| 221 | attach_warning("manual hyphenation detected"); |
|---|
| 222 | } |
|---|
| 223 | } |
|---|
| 224 | } |
|---|
| 225 | } |
|---|
| 226 | } |
|---|
| 227 | |
|---|
| 228 | public: |
|---|
| 229 | Reader(istream& in_) : in(in_), readAgain(true), eof(false), lineNo(0) { getline(); } |
|---|
| 230 | virtual ~Reader() {} |
|---|
| 231 | |
|---|
| 232 | const char *getNext() { |
|---|
| 233 | if (readAgain) readAgain = false; |
|---|
| 234 | else getline(); |
|---|
| 235 | return eof ? NULp : lineBuffer; |
|---|
| 236 | } |
|---|
| 237 | |
|---|
| 238 | void back() { |
|---|
| 239 | h2x_assert(!readAgain); |
|---|
| 240 | readAgain = true; |
|---|
| 241 | } |
|---|
| 242 | |
|---|
| 243 | int getLineNo() const { return lineNo; } |
|---|
| 244 | }; |
|---|
| 245 | |
|---|
| 246 | enum ParagraphType { |
|---|
| 247 | PLAIN_TEXT, |
|---|
| 248 | ENUMERATED, |
|---|
| 249 | ITEM, |
|---|
| 250 | }; |
|---|
| 251 | enum EnumerationType { |
|---|
| 252 | NONE, |
|---|
| 253 | DIGITS, |
|---|
| 254 | ALPHA_UPPER, |
|---|
| 255 | ALPHA_LOWER, |
|---|
| 256 | }; |
|---|
| 257 | |
|---|
| 258 | class Ostring : public MessageAttachable { |
|---|
| 259 | string content; |
|---|
| 260 | size_t lineNo; // where string came from |
|---|
| 261 | ParagraphType type; |
|---|
| 262 | |
|---|
| 263 | unsigned preformatted_width; // 0 = use default width, otherwise contains custom width from control comment. |
|---|
| 264 | |
|---|
| 265 | // only valid for type == ENUMERATED: |
|---|
| 266 | EnumerationType etype; |
|---|
| 267 | unsigned number; |
|---|
| 268 | |
|---|
| 269 | static unsigned current_preformatted_width; // same meaning as 'preformatted_width' |
|---|
| 270 | static unsigned current_preformatted_blocks; // automatically set 'current_preformatted_width' to zero (after this number of blocks were generated). |
|---|
| 271 | |
|---|
| 272 | void check_auto_unpreformat() { |
|---|
| 273 | if (current_preformatted_blocks && !--current_preformatted_blocks) { // count down to zero.. |
|---|
| 274 | current_preformatted_width = 0; // .. then switch off preformatted section |
|---|
| 275 | } |
|---|
| 276 | } |
|---|
| 277 | |
|---|
| 278 | public: |
|---|
| 279 | |
|---|
| 280 | Ostring(const string& s, size_t line_no, ParagraphType type_) |
|---|
| 281 | : content(s), |
|---|
| 282 | lineNo(line_no), |
|---|
| 283 | type(type_), |
|---|
| 284 | preformatted_width(current_preformatted_width), |
|---|
| 285 | etype(NONE) |
|---|
| 286 | { |
|---|
| 287 | h2x_assert(type != ENUMERATED); |
|---|
| 288 | check_auto_unpreformat(); |
|---|
| 289 | } |
|---|
| 290 | Ostring(const string& s, size_t line_no, ParagraphType type_, EnumerationType etype_, unsigned num) |
|---|
| 291 | : content(s), |
|---|
| 292 | lineNo(line_no), |
|---|
| 293 | type(type_), |
|---|
| 294 | preformatted_width(current_preformatted_width), |
|---|
| 295 | etype(etype_), |
|---|
| 296 | number(num) |
|---|
| 297 | { |
|---|
| 298 | h2x_assert(type == ENUMERATED); |
|---|
| 299 | h2x_assert(etype == DIGITS || etype == ALPHA_UPPER || etype == ALPHA_LOWER); |
|---|
| 300 | h2x_assert(num>0); |
|---|
| 301 | check_auto_unpreformat(); |
|---|
| 302 | } |
|---|
| 303 | |
|---|
| 304 | static void set_current_preformatted_width(unsigned allowed) { |
|---|
| 305 | current_preformatted_width = allowed; |
|---|
| 306 | } |
|---|
| 307 | static void set_preformatted_blocks_wanted(unsigned preformatted_blocks) { |
|---|
| 308 | h2x_assert(!current_preformatted_blocks); |
|---|
| 309 | current_preformatted_blocks = preformatted_blocks; |
|---|
| 310 | } |
|---|
| 311 | |
|---|
| 312 | // MessageAttachable interface: |
|---|
| 313 | string location_description() const OVERRIDE { return ""; } |
|---|
| 314 | size_t line_number() const OVERRIDE { return get_lineno(); } |
|---|
| 315 | |
|---|
| 316 | operator const string&() const { return content; } |
|---|
| 317 | operator string&() { return content; } |
|---|
| 318 | |
|---|
| 319 | const string& as_string() const { return content; } |
|---|
| 320 | string& as_string() { return content; } |
|---|
| 321 | |
|---|
| 322 | size_t get_lineno() const { return lineNo; } // @@@ replace by line_number()? |
|---|
| 323 | |
|---|
| 324 | const ParagraphType& get_type() const { return type; } |
|---|
| 325 | const EnumerationType& get_enum_type() const { |
|---|
| 326 | h2x_assert(type == ENUMERATED); |
|---|
| 327 | return etype; |
|---|
| 328 | } |
|---|
| 329 | unsigned get_number() const { |
|---|
| 330 | h2x_assert(type == ENUMERATED); |
|---|
| 331 | return number; |
|---|
| 332 | } |
|---|
| 333 | unsigned get_preformatted_width() const { |
|---|
| 334 | // returns > 0 for text inside PREFORMATTED control comments. |
|---|
| 335 | return preformatted_width; |
|---|
| 336 | } |
|---|
| 337 | |
|---|
| 338 | // wrapper to make Ostring act like char* |
|---|
| 339 | const char *c_str() const { return content.c_str(); } |
|---|
| 340 | }; |
|---|
| 341 | unsigned Ostring::current_preformatted_width = 0; |
|---|
| 342 | unsigned Ostring::current_preformatted_blocks = 0; |
|---|
| 343 | |
|---|
| 344 | typedef list<Ostring> Ostrings; |
|---|
| 345 | |
|---|
| 346 | #if defined(WARN_MISSING_HELP) |
|---|
| 347 | static void check_TODO(const char *line, const Reader& reader) { |
|---|
| 348 | if (strstr(line, "@@@") || strstr(line, "TODO")) { |
|---|
| 349 | reader.attach_warning(strf("TODO: %s", line)); |
|---|
| 350 | } |
|---|
| 351 | } |
|---|
| 352 | #else |
|---|
| 353 | inline void check_TODO(const char *, const Reader&) { } |
|---|
| 354 | #endif // WARN_MISSING_HELP |
|---|
| 355 | |
|---|
| 356 | // ---------------------------- |
|---|
| 357 | // class Section |
|---|
| 358 | |
|---|
| 359 | class Section FINAL_TYPE : public MessageAttachable { |
|---|
| 360 | SectionType type; |
|---|
| 361 | string name; |
|---|
| 362 | Ostrings content; |
|---|
| 363 | size_t lineno; |
|---|
| 364 | |
|---|
| 365 | string location_description() const OVERRIDE { return string("in SECTION '")+name+"'"; } |
|---|
| 366 | |
|---|
| 367 | public: |
|---|
| 368 | Section(string name_, SectionType type_, size_t lineno_) |
|---|
| 369 | : type(type_), |
|---|
| 370 | name(name_), |
|---|
| 371 | lineno(lineno_) |
|---|
| 372 | {} |
|---|
| 373 | virtual ~Section() {} |
|---|
| 374 | |
|---|
| 375 | const Ostrings& Content() const { return content; } |
|---|
| 376 | Ostrings& Content() { return content; } |
|---|
| 377 | SectionType get_type() const { return type; } |
|---|
| 378 | size_t line_number() const OVERRIDE { return lineno; } |
|---|
| 379 | const string& getName() const { return name; } |
|---|
| 380 | void setName(const string& name_) { name = name_; } |
|---|
| 381 | |
|---|
| 382 | void set_line_number(size_t lineNumber) { lineno = lineNumber; } |
|---|
| 383 | }; |
|---|
| 384 | |
|---|
| 385 | typedef list<Section> SectionList; |
|---|
| 386 | |
|---|
| 387 | // -------------------- |
|---|
| 388 | // class Link |
|---|
| 389 | |
|---|
| 390 | class Link { |
|---|
| 391 | string target; |
|---|
| 392 | size_t source_lineno; |
|---|
| 393 | |
|---|
| 394 | public: |
|---|
| 395 | Link(const string& target_, size_t source_lineno_) : |
|---|
| 396 | target(target_), |
|---|
| 397 | source_lineno(source_lineno_) |
|---|
| 398 | {} |
|---|
| 399 | |
|---|
| 400 | const string& Target() const { return target; } |
|---|
| 401 | size_t SourceLineno() const { return source_lineno; } |
|---|
| 402 | }; |
|---|
| 403 | |
|---|
| 404 | typedef list<Link> Links; |
|---|
| 405 | |
|---|
| 406 | // ------------------------ |
|---|
| 407 | // class Helpfile |
|---|
| 408 | |
|---|
| 409 | class Helpfile { |
|---|
| 410 | Links uplinks; |
|---|
| 411 | Links references; |
|---|
| 412 | Links auto_references; |
|---|
| 413 | Section title; |
|---|
| 414 | SectionList sections; |
|---|
| 415 | string inputfile; |
|---|
| 416 | |
|---|
| 417 | void check_self_ref(const string& link) { |
|---|
| 418 | size_t slash = inputfile.find('/'); |
|---|
| 419 | if (slash != string::npos) { |
|---|
| 420 | if (inputfile.substr(slash+1) == link) { |
|---|
| 421 | throw string("Invalid link to self"); |
|---|
| 422 | } |
|---|
| 423 | } |
|---|
| 424 | } |
|---|
| 425 | |
|---|
| 426 | public: |
|---|
| 427 | Helpfile() : title("TITLE", SEC_FAKE, NO_LINENUMBER_INFO) {} |
|---|
| 428 | virtual ~Helpfile() {} |
|---|
| 429 | |
|---|
| 430 | void readHelp(istream& in, const string& filename); |
|---|
| 431 | void writeXML(FILE *out, const string& page_name); |
|---|
| 432 | void extractInternalLinks(); |
|---|
| 433 | |
|---|
| 434 | const Section& get_title() const { return title; } |
|---|
| 435 | }; |
|---|
| 436 | |
|---|
| 437 | inline bool isSpace(char c) { return c == ' '; } |
|---|
| 438 | inline bool isWhitespace(char c) { return isSpace(c) || c == '\n'; } |
|---|
| 439 | |
|---|
| 440 | inline bool isComment(const char *s) { |
|---|
| 441 | return s[0] == '#'; |
|---|
| 442 | } |
|---|
| 443 | inline bool isEmptyOrComment(const char *s) { |
|---|
| 444 | if (isComment(s)) return true; |
|---|
| 445 | for (int off = 0; ; ++off) { |
|---|
| 446 | if (s[off] == 0) return true; |
|---|
| 447 | if (!isSpace(s[off])) break; |
|---|
| 448 | } |
|---|
| 449 | |
|---|
| 450 | return false; |
|---|
| 451 | } |
|---|
| 452 | |
|---|
| 453 | static void checkControlComment(const char *line) { |
|---|
| 454 | const int DEFAULT_WIDTH = 91; // has to match value in to_help.xsl@preformatted-default-width |
|---|
| 455 | |
|---|
| 456 | if (isComment(line)) { |
|---|
| 457 | // Note: currently all valid control comments start with the same keyword |
|---|
| 458 | // Please document control comments in help.readme@Control |
|---|
| 459 | const char *KEYWORD = "PREFORMATTED "; |
|---|
| 460 | const char *found = strstr(line, KEYWORD); |
|---|
| 461 | if (found) { |
|---|
| 462 | const char *rest = found + strlen(KEYWORD); |
|---|
| 463 | if (strcmp(rest, "RESET") == 0) { |
|---|
| 464 | Ostring::set_current_preformatted_width(0); |
|---|
| 465 | } |
|---|
| 466 | #define WLEN 6 |
|---|
| 467 | else if (strncmp(rest, "WIDTH ", WLEN) == 0) { |
|---|
| 468 | const char *rest2 = rest + WLEN; |
|---|
| 469 | int width = atoi(rest2); |
|---|
| 470 | if (width<1) { |
|---|
| 471 | if (strncmp(rest2, "DEFAULT", 7) == 0) { |
|---|
| 472 | width = DEFAULT_WIDTH; |
|---|
| 473 | } |
|---|
| 474 | else { |
|---|
| 475 | throw strf("invalid width %i in control comment '%s'", width, line); |
|---|
| 476 | } |
|---|
| 477 | } |
|---|
| 478 | Ostring::set_current_preformatted_width(width); |
|---|
| 479 | } |
|---|
| 480 | #undef WLEN |
|---|
| 481 | else if (strcmp(rest, "1") == 0) { // @@@ allow higher numbers for more lines? |
|---|
| 482 | Ostring::set_current_preformatted_width(DEFAULT_WIDTH); |
|---|
| 483 | Ostring::set_preformatted_blocks_wanted(1); |
|---|
| 484 | } |
|---|
| 485 | else { |
|---|
| 486 | throw strf("invalid control comment '%s' (while parsing at '%s')", line, rest); |
|---|
| 487 | } |
|---|
| 488 | } |
|---|
| 489 | } |
|---|
| 490 | } |
|---|
| 491 | |
|---|
| 492 | inline const char *extractKeyword(const char *line, string& keyword) { |
|---|
| 493 | // returns NULp if no keyword was found |
|---|
| 494 | // otherwise returns position behind keyword and sets value of 'keyword' |
|---|
| 495 | |
|---|
| 496 | const char *space = strchr(line, ' '); |
|---|
| 497 | if (space && space>line) { |
|---|
| 498 | keyword = string(line, 0, space-line); |
|---|
| 499 | return space; |
|---|
| 500 | } |
|---|
| 501 | else if (!space) { // test for keyword w/o content behind |
|---|
| 502 | if (line[0]) { // not empty |
|---|
| 503 | keyword = line; |
|---|
| 504 | return strchr(line, 0); |
|---|
| 505 | } |
|---|
| 506 | } |
|---|
| 507 | return NULp; |
|---|
| 508 | } |
|---|
| 509 | |
|---|
| 510 | inline const char *eatSpace(const char *line) { |
|---|
| 511 | // skip over spaces at start of 'line' |
|---|
| 512 | while (isSpace(*line)) ++line; |
|---|
| 513 | return line; |
|---|
| 514 | } |
|---|
| 515 | inline const char *eatWhitespace(const char *paragraph) { |
|---|
| 516 | // skip over spaces and empty lines at start of 'paragraph' |
|---|
| 517 | while (isWhitespace(*paragraph)) ++paragraph; |
|---|
| 518 | return paragraph; |
|---|
| 519 | } |
|---|
| 520 | |
|---|
| 521 | inline void pushParagraph(Section& sec, string& paragraph, size_t lineNo, ParagraphType& type, EnumerationType& etype, unsigned num) { |
|---|
| 522 | if (paragraph.length()) { |
|---|
| 523 | if (type == ENUMERATED) { |
|---|
| 524 | sec.Content().push_back(Ostring(paragraph, lineNo, type, etype, num)); |
|---|
| 525 | } |
|---|
| 526 | else { |
|---|
| 527 | sec.Content().push_back(Ostring(paragraph, lineNo, type)); |
|---|
| 528 | } |
|---|
| 529 | |
|---|
| 530 | type = PLAIN_TEXT; |
|---|
| 531 | etype = NONE; |
|---|
| 532 | paragraph = ""; |
|---|
| 533 | } |
|---|
| 534 | } |
|---|
| 535 | |
|---|
| 536 | inline const char *firstChar(const char *s) { |
|---|
| 537 | while (isSpace(s[0])) ++s; |
|---|
| 538 | return s; |
|---|
| 539 | } |
|---|
| 540 | |
|---|
| 541 | inline bool is_startof_itemlist_element(const char *contentStart) { |
|---|
| 542 | return |
|---|
| 543 | (contentStart[0] == '-' || |
|---|
| 544 | contentStart[0] == '*') |
|---|
| 545 | && |
|---|
| 546 | isspace(contentStart[1]) |
|---|
| 547 | && |
|---|
| 548 | !(isspace(contentStart[2]) || |
|---|
| 549 | contentStart[2] == '-'); |
|---|
| 550 | } |
|---|
| 551 | |
|---|
| 552 | #define MAX_ALLOWED_ENUM 99 // otherwise it starts interpreting years as enums |
|---|
| 553 | |
|---|
| 554 | static EnumerationType startsWithLetter(string& s, unsigned& number) { |
|---|
| 555 | // tests if first line starts with 'letter.' |
|---|
| 556 | // if true then 'letter.' is removed from the string |
|---|
| 557 | // the letter is converted and returned in 'number' ('a'->1, 'b'->2, ..) |
|---|
| 558 | |
|---|
| 559 | size_t off = s.find_first_not_of(" \n"); |
|---|
| 560 | if (off == string::npos) return NONE; |
|---|
| 561 | if (!isalpha(s[off])) return NONE; |
|---|
| 562 | |
|---|
| 563 | size_t astart = off; |
|---|
| 564 | EnumerationType etype = isupper(s[off]) ? ALPHA_UPPER : ALPHA_LOWER; |
|---|
| 565 | |
|---|
| 566 | number = s[off]-(etype == ALPHA_UPPER ? 'A' : 'a')+1; |
|---|
| 567 | ++off; |
|---|
| 568 | |
|---|
| 569 | h2x_assert(number>0 && number<MAX_ALLOWED_ENUM); |
|---|
| 570 | |
|---|
| 571 | if (s[off] != '.' && s[off] != ')') return NONE; |
|---|
| 572 | if (s[off+1] != ' ') return NONE; |
|---|
| 573 | |
|---|
| 574 | // remove 'letter.' from string : |
|---|
| 575 | ++off; |
|---|
| 576 | while (s[off+1] == ' ') ++off; |
|---|
| 577 | s.erase(astart, off-astart+1); |
|---|
| 578 | |
|---|
| 579 | return etype; |
|---|
| 580 | } |
|---|
| 581 | |
|---|
| 582 | static bool startsWithNumber(string& s, unsigned& number) { |
|---|
| 583 | // tests if first line starts with 'number.' |
|---|
| 584 | // if true then 'number.' is removed from the string |
|---|
| 585 | |
|---|
| 586 | size_t off = s.find_first_not_of(" \n"); |
|---|
| 587 | if (off == string::npos) return false; |
|---|
| 588 | if (!isdigit(s[off])) return false; |
|---|
| 589 | |
|---|
| 590 | size_t num_start = off; |
|---|
| 591 | number = 0; |
|---|
| 592 | |
|---|
| 593 | for (; isdigit(s[off]); ++off) { |
|---|
| 594 | number = number*10 + (s[off]-'0'); |
|---|
| 595 | } |
|---|
| 596 | if (number>MAX_ALLOWED_ENUM) return false; |
|---|
| 597 | |
|---|
| 598 | if (s[off] != '.' && s[off] != ')') return false; |
|---|
| 599 | if (s[off+1] != ' ') return false; |
|---|
| 600 | |
|---|
| 601 | // remove 'number.' from string : |
|---|
| 602 | ++off; |
|---|
| 603 | while (s[off+1] == ' ') ++off; |
|---|
| 604 | s.erase(num_start, off-num_start+1); |
|---|
| 605 | |
|---|
| 606 | return true; |
|---|
| 607 | } |
|---|
| 608 | |
|---|
| 609 | static EnumerationType detectLineEnumType(string& line, unsigned& number) { |
|---|
| 610 | if (startsWithNumber(line, number)) return DIGITS; |
|---|
| 611 | return startsWithLetter(line, number); |
|---|
| 612 | } |
|---|
| 613 | |
|---|
| 614 | static void parseSection(Section& sec, const char *line, int indentation, Reader& reader) { |
|---|
| 615 | string paragraph = line; |
|---|
| 616 | size_t para_start_lineno = reader.getLineNo(); |
|---|
| 617 | |
|---|
| 618 | if (sec.line_number() == NO_LINENUMBER_INFO) { // linenumber is not known yet |
|---|
| 619 | // assume section just started (this happens with TITLE) |
|---|
| 620 | sec.set_line_number(para_start_lineno); |
|---|
| 621 | } |
|---|
| 622 | |
|---|
| 623 | ParagraphType type = PLAIN_TEXT; |
|---|
| 624 | EnumerationType etype = NONE; |
|---|
| 625 | unsigned num = 0; |
|---|
| 626 | |
|---|
| 627 | unsigned last_alpha_num = -1; |
|---|
| 628 | |
|---|
| 629 | h2x_assert(sec.Content().empty()); |
|---|
| 630 | |
|---|
| 631 | while (1) { |
|---|
| 632 | line = reader.getNext(); |
|---|
| 633 | if (!line) break; |
|---|
| 634 | |
|---|
| 635 | if (isEmptyOrComment(line)) { |
|---|
| 636 | pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); |
|---|
| 637 | checkControlComment(line); |
|---|
| 638 | check_TODO(line, reader); |
|---|
| 639 | } |
|---|
| 640 | else { |
|---|
| 641 | string keyword; |
|---|
| 642 | const char *rest = extractKeyword(line, keyword); |
|---|
| 643 | |
|---|
| 644 | if (rest) { // a new keyword |
|---|
| 645 | reader.back(); |
|---|
| 646 | break; |
|---|
| 647 | } |
|---|
| 648 | |
|---|
| 649 | check_TODO(line, reader); |
|---|
| 650 | |
|---|
| 651 | string Line = line; |
|---|
| 652 | |
|---|
| 653 | if (sec.get_type() == SEC_OCCURRENCE) { |
|---|
| 654 | pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); |
|---|
| 655 | } |
|---|
| 656 | else { |
|---|
| 657 | const char *firstNonWhite = firstChar(line); |
|---|
| 658 | if (is_startof_itemlist_element(firstNonWhite)) { |
|---|
| 659 | h2x_assert(firstNonWhite != line); |
|---|
| 660 | |
|---|
| 661 | pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); |
|---|
| 662 | |
|---|
| 663 | Line[firstNonWhite-line] = ' '; |
|---|
| 664 | type = ITEM; // is reset in call to pushParagraph |
|---|
| 665 | } |
|---|
| 666 | else { |
|---|
| 667 | unsigned foundNum; |
|---|
| 668 | EnumerationType foundEtype = detectLineEnumType(Line, foundNum); |
|---|
| 669 | |
|---|
| 670 | if (foundEtype == ALPHA_UPPER || foundEtype == ALPHA_LOWER) { |
|---|
| 671 | if (foundNum == (last_alpha_num+1) || foundNum == 1) { |
|---|
| 672 | last_alpha_num = foundNum; |
|---|
| 673 | } |
|---|
| 674 | else { |
|---|
| 675 | #if defined(WARN_IGNORED_ALPHA_ENUMS) |
|---|
| 676 | add_warning(reader.attached_message("Ignoring non-consecutive alpha-enum")); |
|---|
| 677 | #endif |
|---|
| 678 | foundEtype = NONE; |
|---|
| 679 | |
|---|
| 680 | reader.back(); |
|---|
| 681 | Line = reader.getNext(); |
|---|
| 682 | last_alpha_num = -1; |
|---|
| 683 | } |
|---|
| 684 | } |
|---|
| 685 | |
|---|
| 686 | if (foundEtype != NONE) { |
|---|
| 687 | pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); |
|---|
| 688 | |
|---|
| 689 | type = ENUMERATED; |
|---|
| 690 | num = foundNum; |
|---|
| 691 | etype = foundEtype; |
|---|
| 692 | |
|---|
| 693 | if (!num) { |
|---|
| 694 | h2x_assert(etype == DIGITS); |
|---|
| 695 | throw "Enumerations starting with zero are not supported"; |
|---|
| 696 | } |
|---|
| 697 | } |
|---|
| 698 | } |
|---|
| 699 | } |
|---|
| 700 | |
|---|
| 701 | if (paragraph.length()) { |
|---|
| 702 | paragraph = paragraph+"\n"+Line; |
|---|
| 703 | } |
|---|
| 704 | else { |
|---|
| 705 | paragraph = string("\n")+Line; |
|---|
| 706 | para_start_lineno = reader.getLineNo(); |
|---|
| 707 | } |
|---|
| 708 | } |
|---|
| 709 | } |
|---|
| 710 | |
|---|
| 711 | pushParagraph(sec, paragraph, para_start_lineno, type, etype, num); |
|---|
| 712 | |
|---|
| 713 | if (sec.Content().size()>0 && indentation>0) { |
|---|
| 714 | string spaces; |
|---|
| 715 | spaces.reserve(indentation); |
|---|
| 716 | spaces.append(indentation, ' '); |
|---|
| 717 | |
|---|
| 718 | string& ostr = sec.Content().front(); |
|---|
| 719 | ostr = string("\n") + spaces + ostr; |
|---|
| 720 | } |
|---|
| 721 | } |
|---|
| 722 | |
|---|
| 723 | inline void check_specific_duplicates(const string& link, const Links& existing, bool add_warnings) { |
|---|
| 724 | for (Links::const_iterator ex = existing.begin(); ex != existing.end(); ++ex) { |
|---|
| 725 | if (ex->Target() == link) { |
|---|
| 726 | if (add_warnings) add_warning(strf("First Link to '%s' was found here.", ex->Target().c_str()), ex->SourceLineno()); |
|---|
| 727 | throw strf("Link to '%s' duplicated here.", link.c_str()); |
|---|
| 728 | } |
|---|
| 729 | } |
|---|
| 730 | } |
|---|
| 731 | inline void check_duplicates(const string& link, const Links& uplinks, const Links& references, bool add_warnings) { |
|---|
| 732 | check_specific_duplicates(link, uplinks, add_warnings); |
|---|
| 733 | check_specific_duplicates(link, references, add_warnings); |
|---|
| 734 | } |
|---|
| 735 | |
|---|
| 736 | static void warnAboutDuplicate(SectionList& sections) { |
|---|
| 737 | set<string> seen; |
|---|
| 738 | SectionList::iterator end = sections.end(); |
|---|
| 739 | for (SectionList::iterator s = sections.begin(); s != end; ++s) { |
|---|
| 740 | const string& sname = s->getName(); |
|---|
| 741 | if (sname == "NOTES") continue; // do not warn about multiple NOTES sections |
|---|
| 742 | |
|---|
| 743 | SectionList::iterator o = s; ++o; |
|---|
| 744 | for (; o != end; ++o) { |
|---|
| 745 | if (sname == o->getName()) { |
|---|
| 746 | o->attach_warning("duplicated SECTION name"); |
|---|
| 747 | if (seen.find(sname) == seen.end()) { |
|---|
| 748 | s->attach_warning("name was first used"); |
|---|
| 749 | seen.insert(sname); |
|---|
| 750 | } |
|---|
| 751 | } |
|---|
| 752 | } |
|---|
| 753 | } |
|---|
| 754 | } |
|---|
| 755 | |
|---|
| 756 | void Helpfile::readHelp(istream& in, const string& filename) { |
|---|
| 757 | if (!in.good()) throw unattached_message(strf("Can't read from '%s'", filename.c_str())); |
|---|
| 758 | |
|---|
| 759 | Reader read(in); |
|---|
| 760 | |
|---|
| 761 | inputfile = filename; // remember file read (for comment) |
|---|
| 762 | |
|---|
| 763 | const char *line; |
|---|
| 764 | const char *name_only = strrchr(filename.c_str(), '/'); |
|---|
| 765 | |
|---|
| 766 | h2x_assert(name_only); |
|---|
| 767 | ++name_only; |
|---|
| 768 | |
|---|
| 769 | try { |
|---|
| 770 | while (1) { |
|---|
| 771 | line = read.getNext(); |
|---|
| 772 | if (!line) break; |
|---|
| 773 | |
|---|
| 774 | if (isEmptyOrComment(line)) { |
|---|
| 775 | checkControlComment(line); |
|---|
| 776 | check_TODO(line, read); |
|---|
| 777 | continue; |
|---|
| 778 | } |
|---|
| 779 | |
|---|
| 780 | check_TODO(line, read); |
|---|
| 781 | |
|---|
| 782 | string keyword; |
|---|
| 783 | const char *rest = extractKeyword(line, keyword); |
|---|
| 784 | |
|---|
| 785 | if (rest) { // found a keyword |
|---|
| 786 | if (keyword == "UP") { |
|---|
| 787 | rest = eatSpace(rest); |
|---|
| 788 | if (strlen(rest)) { |
|---|
| 789 | check_duplicates(rest, uplinks, references, true); |
|---|
| 790 | if (strcmp(name_only, rest) == 0) throw "UP link to self"; |
|---|
| 791 | |
|---|
| 792 | uplinks.push_back(Link(rest, read.getLineNo())); |
|---|
| 793 | } |
|---|
| 794 | } |
|---|
| 795 | else if (keyword == "SUB") { |
|---|
| 796 | rest = eatSpace(rest); |
|---|
| 797 | if (strlen(rest)) { |
|---|
| 798 | check_duplicates(rest, uplinks, references, true); |
|---|
| 799 | if (strcmp(name_only, rest) == 0) throw "SUB link to self"; |
|---|
| 800 | |
|---|
| 801 | references.push_back(Link(rest, read.getLineNo())); |
|---|
| 802 | } |
|---|
| 803 | } |
|---|
| 804 | else if (keyword == "TITLE") { |
|---|
| 805 | rest = eatSpace(rest); |
|---|
| 806 | parseSection(title, rest, 0, read); |
|---|
| 807 | |
|---|
| 808 | if (title.Content().empty()) throw "empty TITLE not allowed"; |
|---|
| 809 | |
|---|
| 810 | const string& t = title.Content().front(); |
|---|
| 811 | if (t.find("Standard help file form") != string::npos) { |
|---|
| 812 | throw strf("Illegal title for help file: '%s'", t.c_str()); |
|---|
| 813 | } |
|---|
| 814 | |
|---|
| 815 | const size_t len = t.length(); |
|---|
| 816 | if (len>MAX_TITLE_CHARS) { |
|---|
| 817 | // ignore non-alphanumeric characters at end of string: |
|---|
| 818 | size_t last_alnum_pos = len-1; |
|---|
| 819 | while (!isalnum(t[last_alnum_pos])) { |
|---|
| 820 | --last_alnum_pos; |
|---|
| 821 | } |
|---|
| 822 | ++last_alnum_pos; |
|---|
| 823 | arb_assert(last_alnum_pos<=len); |
|---|
| 824 | |
|---|
| 825 | const size_t ignored = len-last_alnum_pos; |
|---|
| 826 | if ((len-ignored)>MAX_TITLE_CHARS) { |
|---|
| 827 | title.attach_warning(strf("TITLE too verbose (max. %i chars allowed; found %zu%s)", |
|---|
| 828 | MAX_TITLE_CHARS, |
|---|
| 829 | len, |
|---|
| 830 | ignored ? strf("; acceptable trailing chars: %zu", ignored).c_str() : "" |
|---|
| 831 | )); |
|---|
| 832 | } |
|---|
| 833 | } |
|---|
| 834 | } |
|---|
| 835 | else { |
|---|
| 836 | if (keyword == "NOTE") keyword = "NOTES"; |
|---|
| 837 | if (keyword == "EXAMPLE") keyword = "EXAMPLES"; |
|---|
| 838 | if (keyword == "WARNING") keyword = "WARNINGS"; |
|---|
| 839 | |
|---|
| 840 | SectionType stype = SEC_NONE; |
|---|
| 841 | int idx; |
|---|
| 842 | for (idx = 0; idx<KNOWN_SECTION_TYPES; ++idx) { |
|---|
| 843 | if (knownSections[idx] == keyword) { |
|---|
| 844 | stype = SectionType(idx); |
|---|
| 845 | break; |
|---|
| 846 | } |
|---|
| 847 | } |
|---|
| 848 | |
|---|
| 849 | size_t lineno = read.getLineNo(); |
|---|
| 850 | |
|---|
| 851 | if (idx >= KNOWN_SECTION_TYPES) throw strf("unknown keyword '%s'", keyword.c_str()); |
|---|
| 852 | |
|---|
| 853 | if (stype == SEC_SECTION) { |
|---|
| 854 | string section_name = eatSpace(rest); |
|---|
| 855 | Section sec(section_name, stype, lineno); |
|---|
| 856 | parseSection(sec, "", 0, read); |
|---|
| 857 | sections.push_back(sec); |
|---|
| 858 | } |
|---|
| 859 | else { |
|---|
| 860 | Section sec(keyword, stype, lineno); |
|---|
| 861 | rest = eatSpace(rest); |
|---|
| 862 | parseSection(sec, rest, rest-line, read); |
|---|
| 863 | sections.push_back(sec); |
|---|
| 864 | } |
|---|
| 865 | } |
|---|
| 866 | } |
|---|
| 867 | else { |
|---|
| 868 | throw strf("Unhandled line"); |
|---|
| 869 | } |
|---|
| 870 | } |
|---|
| 871 | |
|---|
| 872 | warnAboutDuplicate(sections); |
|---|
| 873 | } |
|---|
| 874 | catch (string& err) { throw read.attached_message(err); } |
|---|
| 875 | catch (const char *err) { throw read.attached_message(err); } |
|---|
| 876 | } |
|---|
| 877 | |
|---|
| 878 | static bool shouldReflow(const string& s, int& foundIndentation) { |
|---|
| 879 | // foundIndentation is only valid if shouldReflow() returns true |
|---|
| 880 | enum { START, CHAR, SPACE, MULTIPLE, DOT, DOTSPACE } state = START; |
|---|
| 881 | bool equal_indent = true; |
|---|
| 882 | int lastIndent = -1; |
|---|
| 883 | int thisIndent = 0; |
|---|
| 884 | |
|---|
| 885 | for (string::const_iterator c = s.begin(); c != s.end(); ++c, ++thisIndent) { |
|---|
| 886 | if (*c == '\n') { |
|---|
| 887 | state = START; |
|---|
| 888 | thisIndent = 0; |
|---|
| 889 | } |
|---|
| 890 | else if (isSpace(*c)) { |
|---|
| 891 | if (state == DOT || state == DOTSPACE) state = DOTSPACE; // multiple spaces after DOT are allowed |
|---|
| 892 | else if (state == SPACE) state = MULTIPLE; // now seen multiple spaces |
|---|
| 893 | else if (state == CHAR) state = SPACE; // now seen 1 space |
|---|
| 894 | } |
|---|
| 895 | else { |
|---|
| 896 | if (state == MULTIPLE) return false; // character after multiple spaces |
|---|
| 897 | if (state == START) { |
|---|
| 898 | if (lastIndent == -1) lastIndent = thisIndent; |
|---|
| 899 | else if (lastIndent != thisIndent) equal_indent = false; |
|---|
| 900 | } |
|---|
| 901 | state = (*c == '.' || *c == ',') ? DOT : CHAR; |
|---|
| 902 | } |
|---|
| 903 | } |
|---|
| 904 | |
|---|
| 905 | if (lastIndent<0) { |
|---|
| 906 | equal_indent = false; |
|---|
| 907 | } |
|---|
| 908 | |
|---|
| 909 | if (equal_indent) { |
|---|
| 910 | foundIndentation = lastIndent-1; |
|---|
| 911 | h2x_assert(foundIndentation >= 0); |
|---|
| 912 | } |
|---|
| 913 | return equal_indent; |
|---|
| 914 | } |
|---|
| 915 | |
|---|
| 916 | static string correctSpaces(const string& text, int change) { |
|---|
| 917 | h2x_assert(text.find('\n') == string::npos); |
|---|
| 918 | |
|---|
| 919 | if (!change) return text; |
|---|
| 920 | |
|---|
| 921 | size_t first = text.find_first_not_of(' '); |
|---|
| 922 | if (first == string::npos) return ""; // empty line |
|---|
| 923 | |
|---|
| 924 | if (change<0) { |
|---|
| 925 | int remove = -change; |
|---|
| 926 | h2x_assert(remove <= int(first)); |
|---|
| 927 | return text.substr(remove); |
|---|
| 928 | } |
|---|
| 929 | |
|---|
| 930 | h2x_assert(change>0); // add spaces |
|---|
| 931 | return string(change, ' ')+text; |
|---|
| 932 | } |
|---|
| 933 | |
|---|
| 934 | static string correctIndentation(const string& text, int change) { |
|---|
| 935 | // removes 'remove' spaces from every line |
|---|
| 936 | |
|---|
| 937 | size_t this_lineend = text.find('\n'); |
|---|
| 938 | string result; |
|---|
| 939 | |
|---|
| 940 | if (this_lineend == string::npos) { |
|---|
| 941 | result = correctSpaces(text, change); |
|---|
| 942 | } |
|---|
| 943 | else { |
|---|
| 944 | result = correctSpaces(text.substr(0, this_lineend), change); |
|---|
| 945 | |
|---|
| 946 | while (this_lineend != string::npos) { |
|---|
| 947 | size_t next_lineend = text.find('\n', this_lineend+1); |
|---|
| 948 | if (next_lineend == string::npos) { // last line |
|---|
| 949 | result = result+"\n"+correctSpaces(text.substr(this_lineend+1), change); |
|---|
| 950 | } |
|---|
| 951 | else { |
|---|
| 952 | result = result+"\n"+correctSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1), change); |
|---|
| 953 | } |
|---|
| 954 | this_lineend = next_lineend; |
|---|
| 955 | } |
|---|
| 956 | } |
|---|
| 957 | return result; |
|---|
| 958 | } |
|---|
| 959 | |
|---|
| 960 | inline size_t countSpaces(const string& text) { |
|---|
| 961 | size_t first = text.find_first_not_of(' '); |
|---|
| 962 | if (first == string::npos) return INT_MAX; // empty line |
|---|
| 963 | return first; |
|---|
| 964 | } |
|---|
| 965 | |
|---|
| 966 | static size_t scanMinIndentation(const string& text) { |
|---|
| 967 | size_t this_lineend = text.find('\n'); |
|---|
| 968 | size_t min_indent = INT_MAX; |
|---|
| 969 | |
|---|
| 970 | if (this_lineend == string::npos) { |
|---|
| 971 | min_indent = countSpaces(text); |
|---|
| 972 | } |
|---|
| 973 | else { |
|---|
| 974 | while (this_lineend != string::npos) { |
|---|
| 975 | size_t next_lineend = text.find('\n', this_lineend+1); |
|---|
| 976 | if (next_lineend == string::npos) { |
|---|
| 977 | min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1))); |
|---|
| 978 | } |
|---|
| 979 | else { |
|---|
| 980 | min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1))); |
|---|
| 981 | } |
|---|
| 982 | this_lineend = next_lineend; |
|---|
| 983 | } |
|---|
| 984 | } |
|---|
| 985 | |
|---|
| 986 | if (min_indent == INT_MAX) min_indent = 0; // only empty lines |
|---|
| 987 | return min_indent; |
|---|
| 988 | } |
|---|
| 989 | |
|---|
| 990 | // ----------------------------- |
|---|
| 991 | // class ParagraphTree |
|---|
| 992 | |
|---|
| 993 | class ParagraphTree FINAL_TYPE : public MessageAttachable, virtual Noncopyable { |
|---|
| 994 | ParagraphTree *brother; // has same indentation as this |
|---|
| 995 | ParagraphTree *son; // indentation + 1 |
|---|
| 996 | |
|---|
| 997 | Ostring otext; // text of the Section (containing linefeeds) |
|---|
| 998 | |
|---|
| 999 | bool reflow; // should the paragraph be reflown ? (true if indentation is equal for all lines of text) |
|---|
| 1000 | int indentation; // the real indentation of the blank (behind removed enumeration) |
|---|
| 1001 | |
|---|
| 1002 | string location_description() const OVERRIDE { return "in paragraph starting here"; } |
|---|
| 1003 | size_t line_number() const OVERRIDE { return otext.get_lineno(); } |
|---|
| 1004 | |
|---|
| 1005 | ParagraphTree(Ostrings::const_iterator begin, const Ostrings::const_iterator end) |
|---|
| 1006 | : son(NULp), |
|---|
| 1007 | otext(*begin), |
|---|
| 1008 | indentation(0) |
|---|
| 1009 | { |
|---|
| 1010 | h2x_assert(begin != end); |
|---|
| 1011 | |
|---|
| 1012 | string& text = otext; |
|---|
| 1013 | reflow = otext.get_preformatted_width()==0 && shouldReflow(text, indentation); |
|---|
| 1014 | if (!reflow) { |
|---|
| 1015 | size_t reststart = text.find('\n', 1); |
|---|
| 1016 | |
|---|
| 1017 | if (reststart == 0) { |
|---|
| 1018 | attach_warning("[internal] Paragraph starts with LF -> reflow calculation will probably fail"); |
|---|
| 1019 | } |
|---|
| 1020 | |
|---|
| 1021 | if (reststart != string::npos) { |
|---|
| 1022 | int rest_indent = -1; |
|---|
| 1023 | string rest = text.substr(reststart); |
|---|
| 1024 | bool rest_reflow = shouldReflow(rest, rest_indent); |
|---|
| 1025 | |
|---|
| 1026 | if (rest_reflow) { |
|---|
| 1027 | int first_indent = countSpaces(text.substr(1)); |
|---|
| 1028 | if (get_type() == PLAIN_TEXT) { |
|---|
| 1029 | size_t last = text.find_last_not_of(' ', reststart-1); |
|---|
| 1030 | bool is_header = last != string::npos && text[last] == ':'; |
|---|
| 1031 | |
|---|
| 1032 | if (!is_header && rest_indent == (first_indent+8)) { |
|---|
| 1033 | #if defined(DEBUG) |
|---|
| 1034 | size_t textstart = text.find_first_not_of(" \n"); |
|---|
| 1035 | h2x_assert(textstart != string::npos); |
|---|
| 1036 | #endif // DEBUG |
|---|
| 1037 | |
|---|
| 1038 | text = text.substr(0, reststart)+correctIndentation(rest, -8); |
|---|
| 1039 | reflow = shouldReflow(text, indentation); |
|---|
| 1040 | } |
|---|
| 1041 | } |
|---|
| 1042 | else { |
|---|
| 1043 | int diff = rest_indent-first_indent; |
|---|
| 1044 | if (diff>0) { |
|---|
| 1045 | text = text.substr(0, reststart)+correctIndentation(rest, -diff); |
|---|
| 1046 | reflow = shouldReflow(text, indentation); |
|---|
| 1047 | } |
|---|
| 1048 | else if (diff<0) { |
|---|
| 1049 | // paragraph with more indent on first line (occurs?) |
|---|
| 1050 | attach_warning(strf("[internal] unhandled: more indentation on the 1st line (diff=%i)", diff)); |
|---|
| 1051 | } |
|---|
| 1052 | } |
|---|
| 1053 | } |
|---|
| 1054 | } |
|---|
| 1055 | } |
|---|
| 1056 | |
|---|
| 1057 | if (!reflow) { |
|---|
| 1058 | indentation = scanMinIndentation(text); |
|---|
| 1059 | } |
|---|
| 1060 | text = correctIndentation(text, -indentation); |
|---|
| 1061 | if (get_type() == ITEM) { |
|---|
| 1062 | h2x_assert(indentation >= 2); |
|---|
| 1063 | indentation -= 2; |
|---|
| 1064 | } |
|---|
| 1065 | |
|---|
| 1066 | brother = buildParagraphTree(++begin, end); |
|---|
| 1067 | } |
|---|
| 1068 | |
|---|
| 1069 | void brothers_to_sons(ParagraphTree *new_brother); |
|---|
| 1070 | |
|---|
| 1071 | unsigned get_preformatted_width() const { |
|---|
| 1072 | return otext.get_preformatted_width(); |
|---|
| 1073 | } |
|---|
| 1074 | |
|---|
| 1075 | public: |
|---|
| 1076 | virtual ~ParagraphTree() { |
|---|
| 1077 | delete brother; |
|---|
| 1078 | delete son; |
|---|
| 1079 | } |
|---|
| 1080 | |
|---|
| 1081 | ParagraphType get_type() const { return otext.get_type(); } |
|---|
| 1082 | |
|---|
| 1083 | bool is_itemlist_member() const { return get_type() == ITEM; } |
|---|
| 1084 | unsigned get_enumeration() const { return get_type() == ENUMERATED ? otext.get_number() : 0; } |
|---|
| 1085 | EnumerationType get_enum_type() const { return otext.get_enum_type(); } |
|---|
| 1086 | |
|---|
| 1087 | const char *readable_type() const { |
|---|
| 1088 | const char *res = NULp; |
|---|
| 1089 | switch (get_type()) { |
|---|
| 1090 | case PLAIN_TEXT: res = "PLAIN_TEXT"; break; |
|---|
| 1091 | case ITEM: res = "ITEM"; break; |
|---|
| 1092 | case ENUMERATED: res = "ENUMERATED"; break; |
|---|
| 1093 | } |
|---|
| 1094 | return res; |
|---|
| 1095 | } |
|---|
| 1096 | |
|---|
| 1097 | size_t countTextNodes() { |
|---|
| 1098 | size_t nodes = 1; // this |
|---|
| 1099 | if (son) nodes += son->countTextNodes(); |
|---|
| 1100 | if (brother) nodes += brother->countTextNodes(); |
|---|
| 1101 | return nodes; |
|---|
| 1102 | } |
|---|
| 1103 | |
|---|
| 1104 | #if defined(DUMP_PARAGRAPHS) |
|---|
| 1105 | void print_indent(ostream& out, int indent) { while (indent-->0) out << ' '; } |
|---|
| 1106 | char *masknl(const char *text) { |
|---|
| 1107 | char *result = ARB_strdup(text); |
|---|
| 1108 | for (int i = 0; result[i]; ++i) { |
|---|
| 1109 | if (result[i] == '\n') result[i] = '|'; |
|---|
| 1110 | } |
|---|
| 1111 | return result; |
|---|
| 1112 | } |
|---|
| 1113 | void dump(ostream& out, int indent = 0) { |
|---|
| 1114 | print_indent(out, indent+1); |
|---|
| 1115 | { |
|---|
| 1116 | char *mtext = masknl(otext.as_string().c_str()); |
|---|
| 1117 | out << "text='" << mtext << "'\n"; |
|---|
| 1118 | free(mtext); |
|---|
| 1119 | } |
|---|
| 1120 | |
|---|
| 1121 | print_indent(out, indent+1); |
|---|
| 1122 | out << "type='" << readable_type() << "' "; |
|---|
| 1123 | if (get_type() == ENUMERATED) { |
|---|
| 1124 | out << "enumeration='" << otext.get_number() << "' "; |
|---|
| 1125 | } |
|---|
| 1126 | out << "reflow='" << reflow << "' "; |
|---|
| 1127 | out << "indentation='" << indentation << "'\n"; |
|---|
| 1128 | |
|---|
| 1129 | if (son) { |
|---|
| 1130 | print_indent(out, indent+2); cout << "son:\n"; |
|---|
| 1131 | son->dump(out, indent+2); |
|---|
| 1132 | cout << "\n"; |
|---|
| 1133 | } |
|---|
| 1134 | if (brother) { |
|---|
| 1135 | print_indent(out, indent); cout << "brother:\n"; |
|---|
| 1136 | brother->dump(out, indent); |
|---|
| 1137 | } |
|---|
| 1138 | } |
|---|
| 1139 | #endif // DUMP_PARAGRAPHS |
|---|
| 1140 | |
|---|
| 1141 | private: |
|---|
| 1142 | static ParagraphTree* buildParagraphTree(Ostrings::const_iterator begin, const Ostrings::const_iterator end) { |
|---|
| 1143 | if (begin == end) return NULp; |
|---|
| 1144 | return new ParagraphTree(begin, end); |
|---|
| 1145 | } |
|---|
| 1146 | public: |
|---|
| 1147 | static ParagraphTree* buildParagraphTree(const Section& sec) { |
|---|
| 1148 | const Ostrings& txt = sec.Content(); |
|---|
| 1149 | if (txt.empty()) throw "attempt to build an empty ParagraphTree"; |
|---|
| 1150 | return buildParagraphTree(txt.begin(), txt.end()); |
|---|
| 1151 | } |
|---|
| 1152 | |
|---|
| 1153 | bool contains(ParagraphTree *that) { |
|---|
| 1154 | return |
|---|
| 1155 | this == that || |
|---|
| 1156 | (son && son->contains(that)) || |
|---|
| 1157 | (brother && brother->contains(that)); |
|---|
| 1158 | } |
|---|
| 1159 | |
|---|
| 1160 | ParagraphTree *predecessor(ParagraphTree *before_this) { |
|---|
| 1161 | if (brother == before_this) return this; |
|---|
| 1162 | if (!brother) return NULp; |
|---|
| 1163 | return brother->predecessor(before_this); |
|---|
| 1164 | } |
|---|
| 1165 | |
|---|
| 1166 | void append(ParagraphTree *new_brother) { |
|---|
| 1167 | if (!brother) brother = new_brother; |
|---|
| 1168 | else brother->append(new_brother); |
|---|
| 1169 | } |
|---|
| 1170 | |
|---|
| 1171 | bool is_some_brother(const ParagraphTree *other) const { |
|---|
| 1172 | return (other == brother) || (brother && brother->is_some_brother(other)); |
|---|
| 1173 | } |
|---|
| 1174 | |
|---|
| 1175 | ParagraphTree* takeAllInFrontOf(ParagraphTree *after) { |
|---|
| 1176 | ParagraphTree *removed = this; |
|---|
| 1177 | ParagraphTree *after_pred = this; |
|---|
| 1178 | |
|---|
| 1179 | h2x_assert(is_some_brother(after)); |
|---|
| 1180 | |
|---|
| 1181 | while (1) { |
|---|
| 1182 | h2x_assert(after_pred); |
|---|
| 1183 | h2x_assert(after_pred->brother); // takeAllInFrontOf called with non-existing 'after' |
|---|
| 1184 | |
|---|
| 1185 | if (after_pred->brother == after) { // found after |
|---|
| 1186 | after_pred->brother = NULp; // unlink |
|---|
| 1187 | break; |
|---|
| 1188 | } |
|---|
| 1189 | after_pred = after_pred->brother; |
|---|
| 1190 | } |
|---|
| 1191 | |
|---|
| 1192 | return removed; |
|---|
| 1193 | } |
|---|
| 1194 | |
|---|
| 1195 | ParagraphTree *firstListMember() { |
|---|
| 1196 | switch (get_type()) { |
|---|
| 1197 | case PLAIN_TEXT: break; |
|---|
| 1198 | case ITEM: return this; |
|---|
| 1199 | case ENUMERATED: { |
|---|
| 1200 | if (get_enumeration() == 1) return this; |
|---|
| 1201 | break; |
|---|
| 1202 | } |
|---|
| 1203 | } |
|---|
| 1204 | if (brother) return brother->firstListMember(); |
|---|
| 1205 | return NULp; |
|---|
| 1206 | } |
|---|
| 1207 | |
|---|
| 1208 | ParagraphTree *nextListMemberAfter(const ParagraphTree& previous) { |
|---|
| 1209 | if (indentation<previous.indentation) return NULp; |
|---|
| 1210 | if (indentation == previous.indentation && get_type() == previous.get_type()) { |
|---|
| 1211 | if (get_type() != ENUMERATED) return this; |
|---|
| 1212 | if (get_enumeration() > previous.get_enumeration()) return this; |
|---|
| 1213 | return NULp; |
|---|
| 1214 | } |
|---|
| 1215 | if (!brother) return NULp; |
|---|
| 1216 | return brother->nextListMemberAfter(previous); |
|---|
| 1217 | } |
|---|
| 1218 | ParagraphTree *nextListMember() const { |
|---|
| 1219 | return brother ? brother->nextListMemberAfter(*this) : NULp; |
|---|
| 1220 | } |
|---|
| 1221 | |
|---|
| 1222 | ParagraphTree* firstWithLessIndentThan(int wanted_indentation) { |
|---|
| 1223 | if (indentation < wanted_indentation) return this; |
|---|
| 1224 | if (!brother) return NULp; |
|---|
| 1225 | return brother->firstWithLessIndentThan(wanted_indentation); |
|---|
| 1226 | } |
|---|
| 1227 | |
|---|
| 1228 | void format_indentations(); |
|---|
| 1229 | void format_lists(); |
|---|
| 1230 | |
|---|
| 1231 | private: |
|---|
| 1232 | static ParagraphTree* buildNewParagraph(const string& Text, size_t beginLineNo, ParagraphType type) { |
|---|
| 1233 | Ostrings S; |
|---|
| 1234 | S.push_back(Ostring(Text, beginLineNo, type)); |
|---|
| 1235 | return new ParagraphTree(S.begin(), S.end()); |
|---|
| 1236 | } |
|---|
| 1237 | ParagraphTree *xml_write_list_contents(); |
|---|
| 1238 | ParagraphTree *xml_write_enum_contents(); |
|---|
| 1239 | void xml_write_textblock(); |
|---|
| 1240 | |
|---|
| 1241 | public: |
|---|
| 1242 | void xml_write(); |
|---|
| 1243 | }; |
|---|
| 1244 | |
|---|
| 1245 | #if defined(DUMP_PARAGRAPHS) |
|---|
| 1246 | static void dump_paragraph(ParagraphTree *para) { |
|---|
| 1247 | // helper function for use in gdb |
|---|
| 1248 | para->dump(cout, 0); |
|---|
| 1249 | } |
|---|
| 1250 | #endif |
|---|
| 1251 | |
|---|
| 1252 | void ParagraphTree::brothers_to_sons(ParagraphTree *new_brother) { |
|---|
| 1253 | /*! folds down brothers to sons |
|---|
| 1254 | * @param new_brother brother of 'this->brother', will become new brother. |
|---|
| 1255 | * If new_brother == NULp -> make all brothers sons. |
|---|
| 1256 | */ |
|---|
| 1257 | |
|---|
| 1258 | if (new_brother) { |
|---|
| 1259 | h2x_assert(is_some_brother(new_brother)); |
|---|
| 1260 | |
|---|
| 1261 | if (brother != new_brother) { |
|---|
| 1262 | #if defined(DEBUG) |
|---|
| 1263 | if (son) { |
|---|
| 1264 | son->attach_warning("Found unexpected son (in brothers_to_sons)"); |
|---|
| 1265 | brother->attach_warning("while trying to transform paragraphs from here .."); |
|---|
| 1266 | new_brother->attach_warning(".. to here .."); |
|---|
| 1267 | attach_warning(".. into sons of this paragraph."); |
|---|
| 1268 | return; |
|---|
| 1269 | } |
|---|
| 1270 | #endif |
|---|
| 1271 | |
|---|
| 1272 | h2x_assert(!son); |
|---|
| 1273 | h2x_assert(brother); |
|---|
| 1274 | |
|---|
| 1275 | if (!new_brother) { // all brothers -> sons |
|---|
| 1276 | son = brother; |
|---|
| 1277 | brother = NULp; |
|---|
| 1278 | } |
|---|
| 1279 | else { |
|---|
| 1280 | son = brother->takeAllInFrontOf(new_brother); |
|---|
| 1281 | brother = new_brother; |
|---|
| 1282 | } |
|---|
| 1283 | } |
|---|
| 1284 | } |
|---|
| 1285 | else { |
|---|
| 1286 | h2x_assert(!son); |
|---|
| 1287 | son = brother; |
|---|
| 1288 | brother = NULp; |
|---|
| 1289 | } |
|---|
| 1290 | } |
|---|
| 1291 | void ParagraphTree::format_lists() { |
|---|
| 1292 | // reformats tree such that all items/enumerations are brothers |
|---|
| 1293 | ParagraphTree *member = firstListMember(); |
|---|
| 1294 | if (member) { |
|---|
| 1295 | for (ParagraphTree *curr = this; curr != member; curr = curr->brother) { |
|---|
| 1296 | h2x_assert(curr); |
|---|
| 1297 | if (curr->son) curr->son->format_lists(); |
|---|
| 1298 | } |
|---|
| 1299 | |
|---|
| 1300 | for (ParagraphTree *next = member->nextListMember(); |
|---|
| 1301 | next; |
|---|
| 1302 | member = next, next = member->nextListMember()) |
|---|
| 1303 | { |
|---|
| 1304 | member->brothers_to_sons(next); |
|---|
| 1305 | h2x_assert(member->brother == next); |
|---|
| 1306 | |
|---|
| 1307 | if (member->son) member->son->format_lists(); |
|---|
| 1308 | } |
|---|
| 1309 | |
|---|
| 1310 | h2x_assert(!member->son); // member is the last item |
|---|
| 1311 | |
|---|
| 1312 | if (member->brother) { |
|---|
| 1313 | ParagraphTree *non_member = member->brother->firstWithLessIndentThan(member->indentation+1); |
|---|
| 1314 | member->brothers_to_sons(non_member); |
|---|
| 1315 | } |
|---|
| 1316 | |
|---|
| 1317 | if (member->son) member->son->format_lists(); |
|---|
| 1318 | if (member->brother) member->brother->format_lists(); |
|---|
| 1319 | } |
|---|
| 1320 | else { |
|---|
| 1321 | for (ParagraphTree *curr = this; curr; curr = curr->brother) { |
|---|
| 1322 | h2x_assert(curr); |
|---|
| 1323 | if (curr->son) curr->son->format_lists(); |
|---|
| 1324 | } |
|---|
| 1325 | } |
|---|
| 1326 | } |
|---|
| 1327 | |
|---|
| 1328 | void ParagraphTree::format_indentations() { |
|---|
| 1329 | if (brother) { |
|---|
| 1330 | ParagraphTree *same_indent = brother->firstWithLessIndentThan(indentation+1); |
|---|
| 1331 | #if defined(WARN_POSSIBLY_WRONG_INDENTATION_CORRECTION) |
|---|
| 1332 | if (same_indent && indentation != same_indent->indentation) { |
|---|
| 1333 | same_indent->attach_warning("indentation is assumed to be same as .."); |
|---|
| 1334 | attach_warning(".. here"); |
|---|
| 1335 | } |
|---|
| 1336 | #endif |
|---|
| 1337 | brothers_to_sons(same_indent); // if same_indent is NULp -> make all brothers childs |
|---|
| 1338 | if (brother) brother->format_indentations(); |
|---|
| 1339 | } |
|---|
| 1340 | |
|---|
| 1341 | if (son) son->format_indentations(); |
|---|
| 1342 | } |
|---|
| 1343 | |
|---|
| 1344 | // ----------------- |
|---|
| 1345 | // LinkType |
|---|
| 1346 | |
|---|
| 1347 | enum LinkType { |
|---|
| 1348 | LT_UNKNOWN = 0, |
|---|
| 1349 | LT_HTTP = 1, |
|---|
| 1350 | LT_HTTPS = 2, |
|---|
| 1351 | LT_FTP = 4, |
|---|
| 1352 | LT_FILE = 8, |
|---|
| 1353 | LT_EMAIL = 16, |
|---|
| 1354 | LT_HLP = 32, |
|---|
| 1355 | LT_PS = 64, |
|---|
| 1356 | LT_PDF = 128, |
|---|
| 1357 | LT_TICKET = 256, |
|---|
| 1358 | }; |
|---|
| 1359 | |
|---|
| 1360 | static const char *link_id[] = { |
|---|
| 1361 | "unknown", |
|---|
| 1362 | "www", // "http:" |
|---|
| 1363 | "www", // "https:" |
|---|
| 1364 | "www", // "ftp:" |
|---|
| 1365 | "www", // "file:" |
|---|
| 1366 | "email", |
|---|
| 1367 | "hlp", |
|---|
| 1368 | "ps", |
|---|
| 1369 | "pdf", |
|---|
| 1370 | "ticket", |
|---|
| 1371 | }; |
|---|
| 1372 | |
|---|
| 1373 | static string LinkType2id(LinkType type) { |
|---|
| 1374 | size_t idx = 0; |
|---|
| 1375 | while (type >= 1) { |
|---|
| 1376 | idx++; |
|---|
| 1377 | type = LinkType(type>>1); |
|---|
| 1378 | } |
|---|
| 1379 | arb_assert(idx<ARRAY_ELEMS(link_id)); |
|---|
| 1380 | return link_id[idx]; |
|---|
| 1381 | } |
|---|
| 1382 | |
|---|
| 1383 | inline const char *getExtension(const string& name) { |
|---|
| 1384 | size_t last_dot = name.find_last_of('.'); |
|---|
| 1385 | if (last_dot == string::npos) { |
|---|
| 1386 | return NULp; |
|---|
| 1387 | } |
|---|
| 1388 | return name.c_str()+last_dot+1; |
|---|
| 1389 | } |
|---|
| 1390 | |
|---|
| 1391 | static LinkType detectLinkType(const string& link_target) { |
|---|
| 1392 | LinkType type = LT_UNKNOWN; |
|---|
| 1393 | const char *ext = getExtension(link_target); |
|---|
| 1394 | |
|---|
| 1395 | if (ext && strcasecmp(ext, "hlp") == 0) type = LT_HLP; |
|---|
| 1396 | else if (link_target.find("http://") == 0) type = LT_HTTP; |
|---|
| 1397 | else if (link_target.find("https://") == 0) type = LT_HTTPS; |
|---|
| 1398 | else if (link_target.find("ftp://") == 0) type = LT_FTP; |
|---|
| 1399 | else if (link_target.find("file://") == 0) type = LT_FILE; |
|---|
| 1400 | else if (link_target.find('@') != string::npos) type = LT_EMAIL; |
|---|
| 1401 | else if (ext && strcasecmp(ext, "ps") == 0) type = LT_PS; |
|---|
| 1402 | else if (ext && strcasecmp(ext, "pdf") == 0) type = LT_PDF; |
|---|
| 1403 | else if (link_target[0] == '#') type = LT_TICKET; |
|---|
| 1404 | |
|---|
| 1405 | return type; |
|---|
| 1406 | } |
|---|
| 1407 | |
|---|
| 1408 | // -------------------------------------------------------------------------------- |
|---|
| 1409 | |
|---|
| 1410 | |
|---|
| 1411 | |
|---|
| 1412 | static string locate_helpfile(const string& helpname) { |
|---|
| 1413 | // search for 'helpname' in various helpfile locations |
|---|
| 1414 | |
|---|
| 1415 | #define PATHS 2 |
|---|
| 1416 | static string path[PATHS] = { "source/", "genhelp/" }; |
|---|
| 1417 | struct stat st; |
|---|
| 1418 | |
|---|
| 1419 | for (size_t p = 0; p<PATHS; p++) { |
|---|
| 1420 | string fullname = path[p]+helpname; |
|---|
| 1421 | if (stat(fullname.c_str(), &st) == 0) { |
|---|
| 1422 | return fullname; |
|---|
| 1423 | } |
|---|
| 1424 | } |
|---|
| 1425 | return ""; |
|---|
| 1426 | #undef PATHS |
|---|
| 1427 | } |
|---|
| 1428 | |
|---|
| 1429 | static string locate_document(const string& docname) { |
|---|
| 1430 | // search for 'docname' or 'docname.gz' in various helpfile locations |
|---|
| 1431 | |
|---|
| 1432 | string located = locate_helpfile(docname); |
|---|
| 1433 | if (located.empty()) { |
|---|
| 1434 | located = locate_helpfile(docname+".gz"); |
|---|
| 1435 | } |
|---|
| 1436 | return located; |
|---|
| 1437 | } |
|---|
| 1438 | |
|---|
| 1439 | static void add_link_attributes(XML_Tag& link, LinkType type, const string& dest, size_t source_line) { |
|---|
| 1440 | if (type == LT_UNKNOWN) { |
|---|
| 1441 | string msg = string("Unknown link type (dest='")+dest+"')"; |
|---|
| 1442 | throw LineAttachedMessage(msg, source_line); |
|---|
| 1443 | } |
|---|
| 1444 | |
|---|
| 1445 | link.add_attribute("dest", dest); |
|---|
| 1446 | link.add_attribute("type", LinkType2id(type)); |
|---|
| 1447 | link.add_attribute("source_line", source_line); |
|---|
| 1448 | |
|---|
| 1449 | if (type&(LT_HLP|LT_PDF|LT_PS)) { // other links (www, email) cannot be checked for existence here |
|---|
| 1450 | string fullhelp = ((type<_HLP) ? locate_helpfile : locate_document)(dest); |
|---|
| 1451 | if (fullhelp.empty()) { |
|---|
| 1452 | link.add_attribute("missing", "1"); |
|---|
| 1453 | string deadlink = strf("Dead link to '%s'", dest.c_str()); |
|---|
| 1454 | #if 1 |
|---|
| 1455 | throw LineAttachedMessage(deadlink, source_line); |
|---|
| 1456 | #else |
|---|
| 1457 | add_warning(deadlink, source_line); |
|---|
| 1458 | #endif |
|---|
| 1459 | } |
|---|
| 1460 | } |
|---|
| 1461 | } |
|---|
| 1462 | |
|---|
| 1463 | static void print_XML_Text_expanding_links(const string& text, size_t lineNo) { |
|---|
| 1464 | size_t found = text.find("LINK{", 0); |
|---|
| 1465 | if (found != string::npos) { |
|---|
| 1466 | size_t inside_link = found+5; |
|---|
| 1467 | size_t close = text.find('}', inside_link); |
|---|
| 1468 | |
|---|
| 1469 | if (close == string::npos) throw "unclosed 'LINK{}'"; |
|---|
| 1470 | |
|---|
| 1471 | string link_target = text.substr(inside_link, close-inside_link); |
|---|
| 1472 | LinkType type = detectLinkType(link_target); |
|---|
| 1473 | string dest = link_target; |
|---|
| 1474 | |
|---|
| 1475 | XML_Text(text.substr(0, found)); |
|---|
| 1476 | |
|---|
| 1477 | { |
|---|
| 1478 | XML_Tag link("LINK"); |
|---|
| 1479 | link.set_on_extra_line(false); |
|---|
| 1480 | add_link_attributes(link, type, dest, lineNo); |
|---|
| 1481 | } |
|---|
| 1482 | |
|---|
| 1483 | print_XML_Text_expanding_links(text.substr(close+1), lineNo); |
|---|
| 1484 | } |
|---|
| 1485 | else { |
|---|
| 1486 | XML_Text t(text); |
|---|
| 1487 | } |
|---|
| 1488 | } |
|---|
| 1489 | |
|---|
| 1490 | static string autolink_ticket_references(const string& text) { |
|---|
| 1491 | size_t hashpos = text.find('#'); |
|---|
| 1492 | if (hashpos == string::npos) { |
|---|
| 1493 | return text; |
|---|
| 1494 | } |
|---|
| 1495 | |
|---|
| 1496 | if (!isdigit(text[hashpos+1])) { // char after '#' is no digit = > not a ticketref |
|---|
| 1497 | size_t afterhash = hashpos+1; |
|---|
| 1498 | return |
|---|
| 1499 | text.substr(0, afterhash) + |
|---|
| 1500 | autolink_ticket_references(text.substr(afterhash)); |
|---|
| 1501 | } |
|---|
| 1502 | |
|---|
| 1503 | size_t hashlength = 2; |
|---|
| 1504 | while (isdigit(text[hashpos+hashlength])) ++hashlength; |
|---|
| 1505 | |
|---|
| 1506 | return |
|---|
| 1507 | text.substr(0, hashpos) + |
|---|
| 1508 | "LINK{" + |
|---|
| 1509 | text.substr(hashpos, hashlength) + |
|---|
| 1510 | "}" + |
|---|
| 1511 | autolink_ticket_references(text.substr(hashpos+hashlength)); |
|---|
| 1512 | } |
|---|
| 1513 | |
|---|
| 1514 | inline void print_XML_Text(const string& text, size_t lineNo) { |
|---|
| 1515 | string autolinkedText = autolink_ticket_references(text); |
|---|
| 1516 | print_XML_Text_expanding_links(autolinkedText, lineNo); |
|---|
| 1517 | } |
|---|
| 1518 | |
|---|
| 1519 | void ParagraphTree::xml_write_textblock() { |
|---|
| 1520 | XML_Tag textblock("T"); |
|---|
| 1521 | textblock.add_attribute("reflow", reflow ? "1" : "0"); |
|---|
| 1522 | if (!reflow) { |
|---|
| 1523 | unsigned width = get_preformatted_width(); |
|---|
| 1524 | if (width>0) { |
|---|
| 1525 | textblock.add_attribute("width", strf("%i", width)); |
|---|
| 1526 | } |
|---|
| 1527 | } |
|---|
| 1528 | |
|---|
| 1529 | { |
|---|
| 1530 | string usedText; |
|---|
| 1531 | const string& text = otext; |
|---|
| 1532 | if (reflow) { |
|---|
| 1533 | usedText = correctIndentation(text, (textblock.Indent()+1) * the_XML_Document->indentation_per_level); |
|---|
| 1534 | } |
|---|
| 1535 | else { |
|---|
| 1536 | usedText = text; |
|---|
| 1537 | } |
|---|
| 1538 | print_XML_Text(usedText, otext.get_lineno()); |
|---|
| 1539 | } |
|---|
| 1540 | } |
|---|
| 1541 | |
|---|
| 1542 | ParagraphTree *ParagraphTree::xml_write_list_contents() { |
|---|
| 1543 | h2x_assert(is_itemlist_member()); |
|---|
| 1544 | #if defined(WARN_FIXED_LAYOUT_LIST_ELEMENTS) |
|---|
| 1545 | if (!reflow) attach_warning("ITEM not reflown (check output)"); |
|---|
| 1546 | #endif |
|---|
| 1547 | { |
|---|
| 1548 | XML_Tag entry("ENTRY"); |
|---|
| 1549 | entry.add_attribute("item", "1"); |
|---|
| 1550 | xml_write_textblock(); |
|---|
| 1551 | if (son) son->xml_write(); |
|---|
| 1552 | } |
|---|
| 1553 | if (brother && brother->is_itemlist_member()) { |
|---|
| 1554 | return brother->xml_write_list_contents(); |
|---|
| 1555 | } |
|---|
| 1556 | return brother; |
|---|
| 1557 | } |
|---|
| 1558 | ParagraphTree *ParagraphTree::xml_write_enum_contents() { |
|---|
| 1559 | h2x_assert(get_enumeration()); |
|---|
| 1560 | #if defined(WARN_FIXED_LAYOUT_LIST_ELEMENTS) |
|---|
| 1561 | if (!reflow) attach_warning("ENUMERATED not reflown (check output)"); |
|---|
| 1562 | #endif |
|---|
| 1563 | { |
|---|
| 1564 | XML_Tag entry("ENTRY"); |
|---|
| 1565 | switch (get_enum_type()) { |
|---|
| 1566 | case DIGITS: |
|---|
| 1567 | entry.add_attribute("enumerated", strf("%i", get_enumeration())); |
|---|
| 1568 | break; |
|---|
| 1569 | case ALPHA_UPPER: |
|---|
| 1570 | entry.add_attribute("enumerated", strf("%c", 'A'-1+get_enumeration())); |
|---|
| 1571 | break; |
|---|
| 1572 | case ALPHA_LOWER: |
|---|
| 1573 | entry.add_attribute("enumerated", strf("%c", 'a'-1+get_enumeration())); |
|---|
| 1574 | break; |
|---|
| 1575 | default: |
|---|
| 1576 | h2x_assert(0); |
|---|
| 1577 | break; |
|---|
| 1578 | } |
|---|
| 1579 | xml_write_textblock(); |
|---|
| 1580 | if (son) son->xml_write(); |
|---|
| 1581 | } |
|---|
| 1582 | if (brother && brother->get_enumeration()) { |
|---|
| 1583 | int diff = brother->get_enumeration()-get_enumeration(); |
|---|
| 1584 | if (diff != 1) { |
|---|
| 1585 | attach_warning("Non-consecutive enumeration detected between here.."); |
|---|
| 1586 | brother->attach_warning(".. and here"); |
|---|
| 1587 | } |
|---|
| 1588 | return brother->xml_write_enum_contents(); |
|---|
| 1589 | } |
|---|
| 1590 | return brother; |
|---|
| 1591 | } |
|---|
| 1592 | |
|---|
| 1593 | void ParagraphTree::xml_write() { |
|---|
| 1594 | try { |
|---|
| 1595 | ParagraphTree *next = NULp; |
|---|
| 1596 | if (get_enumeration()) { |
|---|
| 1597 | XML_Tag enu("ENUM"); |
|---|
| 1598 | if (get_enumeration() != 1) { |
|---|
| 1599 | attach_warning(strf("First enum starts with '%u.' (maybe previous enum was not detected)", get_enumeration())); |
|---|
| 1600 | } |
|---|
| 1601 | next = xml_write_enum_contents(); |
|---|
| 1602 | #if defined(WARN_LONESOME_ENUM_ELEMENTS) |
|---|
| 1603 | if (next == brother) attach_warning("Suspicious single-element-ENUM"); |
|---|
| 1604 | #endif |
|---|
| 1605 | } |
|---|
| 1606 | else if (is_itemlist_member()) { |
|---|
| 1607 | XML_Tag list("LIST"); |
|---|
| 1608 | next = xml_write_list_contents(); |
|---|
| 1609 | #if defined(WARN_LONESOME_LIST_ELEMENTS) |
|---|
| 1610 | if (next == brother) attach_warning("Suspicious single-element-LIST"); |
|---|
| 1611 | #endif |
|---|
| 1612 | } |
|---|
| 1613 | else { |
|---|
| 1614 | { |
|---|
| 1615 | XML_Tag para("P"); |
|---|
| 1616 | xml_write_textblock(); |
|---|
| 1617 | if (son) son->xml_write(); |
|---|
| 1618 | } |
|---|
| 1619 | next = brother; |
|---|
| 1620 | } |
|---|
| 1621 | if (next) next->xml_write(); |
|---|
| 1622 | } |
|---|
| 1623 | catch (string& err) { throw attached_message(err); } |
|---|
| 1624 | catch (const char *err) { throw attached_message(err); } |
|---|
| 1625 | } |
|---|
| 1626 | |
|---|
| 1627 | static void create_top_links(const Links& links, const char *tag) { |
|---|
| 1628 | for (Links::const_iterator s = links.begin(); s != links.end(); ++s) { |
|---|
| 1629 | XML_Tag link(tag); |
|---|
| 1630 | add_link_attributes(link, detectLinkType(s->Target()), s->Target(), s->SourceLineno()); |
|---|
| 1631 | } |
|---|
| 1632 | } |
|---|
| 1633 | |
|---|
| 1634 | inline string remove_LF_and_indentation(string paragraph) { |
|---|
| 1635 | // remove linefeeds + spaces behind linefeed (=indentation) |
|---|
| 1636 | size_t pos = 0; |
|---|
| 1637 | while (1) { |
|---|
| 1638 | size_t lf = paragraph.find('\n', pos); |
|---|
| 1639 | if (lf == string::npos) break; // all LFs handled |
|---|
| 1640 | |
|---|
| 1641 | // eliminate spaces before the LF: |
|---|
| 1642 | if (lf>0 && paragraph[lf-1] == ' ') { // LF is preceeded by space(s) |
|---|
| 1643 | size_t sp = lf-1; |
|---|
| 1644 | while (sp>=1 && paragraph[sp-1] == ' ') --sp; // position to 1st space |
|---|
| 1645 | arb_assert(sp<lf); |
|---|
| 1646 | paragraph.erase(sp, lf-sp); |
|---|
| 1647 | lf = sp; |
|---|
| 1648 | } |
|---|
| 1649 | arb_assert(paragraph[lf] == '\n'); |
|---|
| 1650 | |
|---|
| 1651 | size_t ns = paragraph.find(' ', lf); // next space |
|---|
| 1652 | if (ns != lf+1) { // no space after LF |
|---|
| 1653 | paragraph[lf] = ' '; // -> replace LF by single space |
|---|
| 1654 | pos = lf+1; |
|---|
| 1655 | } |
|---|
| 1656 | else { |
|---|
| 1657 | size_t as = paragraph.find_first_not_of(' ', ns); // pos after consecutive space(s) |
|---|
| 1658 | size_t ls = as == string::npos ? ns : as-1; // last consecutive space |
|---|
| 1659 | paragraph.erase(lf, ls-lf); // keep one space (between concatenated line contents) |
|---|
| 1660 | } |
|---|
| 1661 | } |
|---|
| 1662 | // remove trailing spaces: |
|---|
| 1663 | size_t ls = paragraph.find_last_not_of(' '); |
|---|
| 1664 | if (ls == string::npos) { // only spaces |
|---|
| 1665 | paragraph.clear(); |
|---|
| 1666 | } |
|---|
| 1667 | else { |
|---|
| 1668 | ++ls; |
|---|
| 1669 | paragraph.erase(ls, paragraph.length()-ls); |
|---|
| 1670 | } |
|---|
| 1671 | return paragraph; |
|---|
| 1672 | } |
|---|
| 1673 | |
|---|
| 1674 | void Helpfile::writeXML(FILE *out, const string& page_name) { |
|---|
| 1675 | XML_Document xml("PAGE", "arb_help.dtd", out); |
|---|
| 1676 | |
|---|
| 1677 | xml.skip_empty_tags = true; |
|---|
| 1678 | xml.indentation_per_level = 2; |
|---|
| 1679 | |
|---|
| 1680 | xml.getRoot().add_attribute("name", page_name); |
|---|
| 1681 | #if defined(DEBUG) |
|---|
| 1682 | xml.getRoot().add_attribute("edit_warning", "devel"); // inserts a edit warning into development version |
|---|
| 1683 | #else |
|---|
| 1684 | xml.getRoot().add_attribute("edit_warning", "release"); // inserts a different edit warning into release version |
|---|
| 1685 | #endif // DEBUG |
|---|
| 1686 | |
|---|
| 1687 | xml.getRoot().add_attribute("source", inputfile.c_str()); |
|---|
| 1688 | |
|---|
| 1689 | { |
|---|
| 1690 | XML_Comment(string("automatically generated from ../")+inputfile+' '); |
|---|
| 1691 | } |
|---|
| 1692 | |
|---|
| 1693 | create_top_links(uplinks, "UP"); |
|---|
| 1694 | create_top_links(references, "SUB"); |
|---|
| 1695 | create_top_links(auto_references, "SUB"); |
|---|
| 1696 | |
|---|
| 1697 | try { |
|---|
| 1698 | string titleText, subtitleText; |
|---|
| 1699 | |
|---|
| 1700 | const Ostrings& T = title.Content(); |
|---|
| 1701 | Ostrings::const_iterator s = T.begin(); |
|---|
| 1702 | |
|---|
| 1703 | if (s != T.end()) titleText = *s++; |
|---|
| 1704 | |
|---|
| 1705 | bool subtitleAdded = false; // @@@ not needed! (use !subtitleText.empty()) |
|---|
| 1706 | for (; s != T.end(); ++s) { |
|---|
| 1707 | if (s->get_type() != PLAIN_TEXT) { |
|---|
| 1708 | throw s->attached_message("wrong paragraph type (plain text expected)"); |
|---|
| 1709 | } |
|---|
| 1710 | string text = s->as_string(); |
|---|
| 1711 | if (!text.empty()) { // ignore empty lines |
|---|
| 1712 | text = eatWhitespace(text.c_str()); |
|---|
| 1713 | if (!text.empty()) { |
|---|
| 1714 | if (subtitleAdded) throw s->attached_message("only one subtitle accepted"); |
|---|
| 1715 | |
|---|
| 1716 | text = remove_LF_and_indentation(text); |
|---|
| 1717 | |
|---|
| 1718 | if (text.length()>MAX_SUBTITLE_CHARS) { |
|---|
| 1719 | s->attach_warning(strf("subtitle too verbose (max. %i chars allowed; found %zu)", MAX_SUBTITLE_CHARS, text.length())); |
|---|
| 1720 | } |
|---|
| 1721 | subtitleText = text; |
|---|
| 1722 | subtitleAdded = true; // accept only one line |
|---|
| 1723 | } |
|---|
| 1724 | } |
|---|
| 1725 | } |
|---|
| 1726 | |
|---|
| 1727 | { |
|---|
| 1728 | XML_Tag title_tag("TITLE"); { XML_Text text(titleText); } |
|---|
| 1729 | } |
|---|
| 1730 | if (!subtitleText.empty()) { |
|---|
| 1731 | XML_Tag title_tag("SUBTITLE"); { XML_Text text(subtitleText); } |
|---|
| 1732 | } |
|---|
| 1733 | |
|---|
| 1734 | } |
|---|
| 1735 | catch (string& err) { throw title.attached_message(err); } |
|---|
| 1736 | catch (const char *err) { throw title.attached_message(err); } |
|---|
| 1737 | |
|---|
| 1738 | for (SectionList::const_iterator sec = sections.begin(); sec != sections.end(); ++sec) { |
|---|
| 1739 | try { |
|---|
| 1740 | XML_Tag section_tag("SECTION"); |
|---|
| 1741 | section_tag.add_attribute("name", sec->getName()); |
|---|
| 1742 | |
|---|
| 1743 | ParagraphTree *ptree = ParagraphTree::buildParagraphTree(*sec); |
|---|
| 1744 | |
|---|
| 1745 | #if defined(DEBUG) |
|---|
| 1746 | size_t textnodes = ptree->countTextNodes(); |
|---|
| 1747 | #endif |
|---|
| 1748 | #if defined(DUMP_PARAGRAPHS) |
|---|
| 1749 | cout << "Dump of section '" << sec->getName() << "' (before format_lists):\n"; |
|---|
| 1750 | ptree->dump(cout); |
|---|
| 1751 | cout << "----------------------------------------\n"; |
|---|
| 1752 | #endif |
|---|
| 1753 | |
|---|
| 1754 | ptree->format_lists(); |
|---|
| 1755 | |
|---|
| 1756 | #if defined(DUMP_PARAGRAPHS) |
|---|
| 1757 | cout << "Dump of section '" << sec->getName() << "' (after format_lists):\n"; |
|---|
| 1758 | ptree->dump(cout); |
|---|
| 1759 | cout << "----------------------------------------\n"; |
|---|
| 1760 | #endif |
|---|
| 1761 | #if defined(DEBUG) |
|---|
| 1762 | size_t textnodes2 = ptree->countTextNodes(); |
|---|
| 1763 | h2x_assert(textnodes2 == textnodes); // if this occurs format_lists has an error |
|---|
| 1764 | #endif |
|---|
| 1765 | |
|---|
| 1766 | ptree->format_indentations(); |
|---|
| 1767 | |
|---|
| 1768 | #if defined(DUMP_PARAGRAPHS) |
|---|
| 1769 | cout << "Dump of section '" << sec->getName() << "' (after format_indentations):\n"; |
|---|
| 1770 | ptree->dump(cout); |
|---|
| 1771 | cout << "----------------------------------------\n"; |
|---|
| 1772 | #endif |
|---|
| 1773 | #if defined(DEBUG) |
|---|
| 1774 | size_t textnodes3 = ptree->countTextNodes(); |
|---|
| 1775 | h2x_assert(textnodes3 == textnodes2); // if this occurs format_indentations has an error |
|---|
| 1776 | #endif |
|---|
| 1777 | |
|---|
| 1778 | ptree->xml_write(); |
|---|
| 1779 | |
|---|
| 1780 | delete ptree; |
|---|
| 1781 | } |
|---|
| 1782 | catch (string& err) { throw sec->attached_message(err); } |
|---|
| 1783 | catch (const char *err) { throw sec->attached_message(err); } |
|---|
| 1784 | } |
|---|
| 1785 | } |
|---|
| 1786 | |
|---|
| 1787 | void Helpfile::extractInternalLinks() { |
|---|
| 1788 | for (SectionList::const_iterator sec = sections.begin(); sec != sections.end(); ++sec) { |
|---|
| 1789 | try { |
|---|
| 1790 | const Ostrings& s = sec->Content(); |
|---|
| 1791 | |
|---|
| 1792 | for (Ostrings::const_iterator li = s.begin(); li != s.end(); ++li) { |
|---|
| 1793 | const string& line = *li; |
|---|
| 1794 | size_t start = 0; |
|---|
| 1795 | |
|---|
| 1796 | while (1) { |
|---|
| 1797 | size_t found = line.find("LINK{", start); |
|---|
| 1798 | if (found == string::npos) break; |
|---|
| 1799 | found += 5; |
|---|
| 1800 | size_t close = line.find('}', found); |
|---|
| 1801 | if (close == string::npos) break; |
|---|
| 1802 | |
|---|
| 1803 | string link_target = line.substr(found, close-found); |
|---|
| 1804 | |
|---|
| 1805 | if (link_target.find("http://") == string::npos && |
|---|
| 1806 | link_target.find("https://")== string::npos && |
|---|
| 1807 | link_target.find("ftp://") == string::npos && |
|---|
| 1808 | link_target.find("file://") == string::npos && |
|---|
| 1809 | link_target.find('@') == string::npos) |
|---|
| 1810 | { |
|---|
| 1811 | check_self_ref(link_target); |
|---|
| 1812 | |
|---|
| 1813 | try { |
|---|
| 1814 | check_specific_duplicates(link_target, references, false); // check only sublinks here |
|---|
| 1815 | check_specific_duplicates(link_target, uplinks, false); // check only uplinks here |
|---|
| 1816 | check_specific_duplicates(link_target, auto_references, false); // check only sublinks here |
|---|
| 1817 | |
|---|
| 1818 | // only auto-add inline reference if none of the above checks has thrown |
|---|
| 1819 | auto_references.push_back(Link(link_target, li->line_number())); |
|---|
| 1820 | } |
|---|
| 1821 | catch (string& err) { |
|---|
| 1822 | ; // silently ignore inlined |
|---|
| 1823 | } |
|---|
| 1824 | } |
|---|
| 1825 | start = close+1; |
|---|
| 1826 | } |
|---|
| 1827 | } |
|---|
| 1828 | } |
|---|
| 1829 | catch (string& err) { |
|---|
| 1830 | throw sec->attached_message("'"+err+"' while scanning LINK{}"); |
|---|
| 1831 | } |
|---|
| 1832 | } |
|---|
| 1833 | } |
|---|
| 1834 | |
|---|
| 1835 | static void show_err(const string& err, size_t lineno, const string& helpfile) { |
|---|
| 1836 | if (err.find(helpfile+':') != string::npos) { |
|---|
| 1837 | cerr << err; |
|---|
| 1838 | } |
|---|
| 1839 | else if (lineno == NO_LINENUMBER_INFO) { |
|---|
| 1840 | cerr << helpfile << ":1: [in unknown line] " << err; |
|---|
| 1841 | } |
|---|
| 1842 | else { |
|---|
| 1843 | cerr << helpfile << ":" << lineno << ": " << err; |
|---|
| 1844 | } |
|---|
| 1845 | cerr << '\n'; |
|---|
| 1846 | } |
|---|
| 1847 | inline void show_err(const LineAttachedMessage& line_err, const string& helpfile) { |
|---|
| 1848 | show_err(line_err.Message(), line_err.Lineno(), helpfile); |
|---|
| 1849 | } |
|---|
| 1850 | inline void show_warning(const LineAttachedMessage& line_err, const string& helpfile) { |
|---|
| 1851 | show_err(string("Warning: ")+line_err.Message(), line_err.Lineno(), helpfile); |
|---|
| 1852 | } |
|---|
| 1853 | inline void show_warnings(const string& helpfile) { |
|---|
| 1854 | for (list<LineAttachedMessage>::const_iterator wi = warnings.begin(); wi != warnings.end(); ++wi) { |
|---|
| 1855 | show_warning(*wi, helpfile); |
|---|
| 1856 | } |
|---|
| 1857 | } |
|---|
| 1858 | static void show_error_and_warnings(const LineAttachedMessage& error, const string& helpfile) { |
|---|
| 1859 | show_err(error, helpfile); |
|---|
| 1860 | show_warnings(helpfile); |
|---|
| 1861 | } |
|---|
| 1862 | |
|---|
| 1863 | int ARB_main(int argc, char *argv[]) { |
|---|
| 1864 | if (argc != 3) { |
|---|
| 1865 | cerr << "Usage: arb_help2xml <ARB helpfile> <XML output>\n"; |
|---|
| 1866 | return EXIT_FAILURE; |
|---|
| 1867 | } |
|---|
| 1868 | |
|---|
| 1869 | Helpfile help; |
|---|
| 1870 | string arb_help; |
|---|
| 1871 | |
|---|
| 1872 | try { |
|---|
| 1873 | try { |
|---|
| 1874 | arb_help = argv[1]; |
|---|
| 1875 | string xml_output = argv[2]; |
|---|
| 1876 | |
|---|
| 1877 | { |
|---|
| 1878 | ifstream in(arb_help.c_str()); |
|---|
| 1879 | help.readHelp(in, arb_help); |
|---|
| 1880 | } |
|---|
| 1881 | |
|---|
| 1882 | help.extractInternalLinks(); |
|---|
| 1883 | |
|---|
| 1884 | { |
|---|
| 1885 | FILE *out = std::fopen(xml_output.c_str(), "wt"); |
|---|
| 1886 | if (!out) throw string("Can't open '")+xml_output+'\''; |
|---|
| 1887 | |
|---|
| 1888 | try { |
|---|
| 1889 | // arb_help contains 'source/name.hlp' |
|---|
| 1890 | size_t slash = arb_help.find('/'); |
|---|
| 1891 | size_t dot = arb_help.find_last_of('.'); |
|---|
| 1892 | |
|---|
| 1893 | if (slash == string::npos || dot == string::npos) { |
|---|
| 1894 | throw string("parameter <ARB helpfile> has to be in format 'source/name.hlp' (not '"+arb_help+"')"); |
|---|
| 1895 | } |
|---|
| 1896 | |
|---|
| 1897 | string page_name(arb_help, slash+1, dot-slash-1); |
|---|
| 1898 | help.writeXML(out, page_name); |
|---|
| 1899 | fclose(out); |
|---|
| 1900 | } |
|---|
| 1901 | catch (...) { |
|---|
| 1902 | fclose(out); |
|---|
| 1903 | remove(xml_output.c_str()); |
|---|
| 1904 | throw; |
|---|
| 1905 | } |
|---|
| 1906 | } |
|---|
| 1907 | |
|---|
| 1908 | show_warnings(arb_help); |
|---|
| 1909 | |
|---|
| 1910 | return EXIT_SUCCESS; |
|---|
| 1911 | } |
|---|
| 1912 | catch (string& err) { throw unattached_message(err); } |
|---|
| 1913 | catch (const char * err) { throw unattached_message(err); } |
|---|
| 1914 | catch (LineAttachedMessage& err) { throw; } |
|---|
| 1915 | catch (...) { throw unattached_message("unknown exception in arb_help2xml"); } |
|---|
| 1916 | } |
|---|
| 1917 | catch (LineAttachedMessage& err) { show_error_and_warnings(err, arb_help); } |
|---|
| 1918 | catch (...) { h2x_assert(0); } |
|---|
| 1919 | |
|---|
| 1920 | return EXIT_FAILURE; |
|---|
| 1921 | } |
|---|
| 1922 | |
|---|
| 1923 | // -------------------------------------------------------------------------------- |
|---|
| 1924 | |
|---|
| 1925 | #ifdef UNIT_TESTS |
|---|
| 1926 | #include <test_unit.h> |
|---|
| 1927 | #include <arb_msg.h> |
|---|
| 1928 | #include <arb_file.h> |
|---|
| 1929 | |
|---|
| 1930 | // Hint: you may set ONLY_DO_UNITTEST = 1 to speed up code/test-cycle |
|---|
| 1931 | // see ./Makefile@ONLY_DO_UNITTEST |
|---|
| 1932 | |
|---|
| 1933 | #define TEST_REMOVE_LF_AND_INDENTATION(i,want) TEST_EXPECT_EQUAL(remove_LF_and_indentation(i).c_str(), want) |
|---|
| 1934 | #define TEST_REMOVE_LF_AND_INDENTATION__BROKEN(i,want,got) TEST_EXPECT_EQUAL__BROKEN(remove_LF_and_indentation(i).c_str(), want, got) |
|---|
| 1935 | |
|---|
| 1936 | void TEST_remove_LF_and_indentation() { |
|---|
| 1937 | TEST_REMOVE_LF_AND_INDENTATION("", |
|---|
| 1938 | ""); |
|---|
| 1939 | |
|---|
| 1940 | TEST_REMOVE_LF_AND_INDENTATION(" \n \n \n ", |
|---|
| 1941 | ""); |
|---|
| 1942 | TEST_REMOVE_LF_AND_INDENTATION("hello\nNewline", |
|---|
| 1943 | "hello Newline"); |
|---|
| 1944 | TEST_REMOVE_LF_AND_INDENTATION("hello\nNewline\n 1\n2 \n 3 \n4\n5\n 6 \n 7 \n 8\n", |
|---|
| 1945 | "hello Newline 1 2 3 4 5 6 7 8"); |
|---|
| 1946 | |
|---|
| 1947 | TEST_REMOVE_LF_AND_INDENTATION("Visualization of Three-dimensional\n structure of small subunit (16S) rRNA", |
|---|
| 1948 | "Visualization of Three-dimensional structure of small subunit (16S) rRNA"); |
|---|
| 1949 | } |
|---|
| 1950 | |
|---|
| 1951 | static arb_test::match_expectation help_file_compiles(const char *helpfile, const char *expected_title, const char *expected_error_part) { |
|---|
| 1952 | using namespace arb_test; |
|---|
| 1953 | expectation_group expected; |
|---|
| 1954 | |
|---|
| 1955 | ifstream in(helpfile); |
|---|
| 1956 | |
|---|
| 1957 | LineAttachedMessage *error = NULp; |
|---|
| 1958 | |
|---|
| 1959 | Helpfile help; |
|---|
| 1960 | try { |
|---|
| 1961 | help.readHelp(in, helpfile); |
|---|
| 1962 | help.extractInternalLinks(); |
|---|
| 1963 | |
|---|
| 1964 | FILE *devnul = fopen("/dev/null", "wt"); |
|---|
| 1965 | if (!devnul) throw unattached_message("can't write to null device"); |
|---|
| 1966 | help.writeXML(devnul, "dummy"); |
|---|
| 1967 | fclose(devnul); |
|---|
| 1968 | } |
|---|
| 1969 | catch (LineAttachedMessage& err) { error = new LineAttachedMessage(err); } |
|---|
| 1970 | catch (...) { error = new LineAttachedMessage(unattached_message("unknown exception")); } |
|---|
| 1971 | |
|---|
| 1972 | if (expected_error_part) { |
|---|
| 1973 | expected.add(that(error).does_differ_from_NULL()); |
|---|
| 1974 | if (error) expected.add(that(error->Message()).does_contain(expected_error_part)); |
|---|
| 1975 | } |
|---|
| 1976 | else { |
|---|
| 1977 | expected.add(that(error).is_equal_to_NULL()); |
|---|
| 1978 | if (!error) { |
|---|
| 1979 | Section title = help.get_title(); |
|---|
| 1980 | const Ostrings& title_strings = title.Content(); |
|---|
| 1981 | |
|---|
| 1982 | expected.add(that(title_strings.front().as_string()).is_equal_to(expected_title)); |
|---|
| 1983 | expected.add(that(title_strings.size()).is_equal_to(1)); |
|---|
| 1984 | } |
|---|
| 1985 | else { |
|---|
| 1986 | show_error_and_warnings(*error, helpfile); |
|---|
| 1987 | } |
|---|
| 1988 | } |
|---|
| 1989 | |
|---|
| 1990 | delete error; |
|---|
| 1991 | |
|---|
| 1992 | return all().ofgroup(expected); |
|---|
| 1993 | } |
|---|
| 1994 | |
|---|
| 1995 | #define HELP_FILE_COMPILES(name,expTitle) TEST_EXPECTATION(help_file_compiles(name,expTitle,NULp)) |
|---|
| 1996 | #define HELP_FILE_COMPILE_ERROR(name,expError) TEST_EXPECTATION(help_file_compiles(name,NULp,expError)) |
|---|
| 1997 | |
|---|
| 1998 | void TEST_hlp2xml_conversion() { |
|---|
| 1999 | TEST_EXPECT_ZERO(chdir("../../HELP_SOURCE")); |
|---|
| 2000 | |
|---|
| 2001 | HELP_FILE_COMPILES("genhelp/agde_treepuzzle.hlp", "treepuzzle"); // genhelp/agde_treepuzzle.hlp |
|---|
| 2002 | |
|---|
| 2003 | HELP_FILE_COMPILES("source/markbyref.hlp", "Mark by reference"); // source/markbyref.hlp |
|---|
| 2004 | HELP_FILE_COMPILES("source/ad_align.hlp", "Alignment Administration"); // source/ad_align.hlp |
|---|
| 2005 | HELP_FILE_COMPILES("genhelp/copyright.hlp", "Copyrights and licenses"); // genhelp/copyright.hlp |
|---|
| 2006 | |
|---|
| 2007 | // @@@ add test for helpfile with subtitle |
|---|
| 2008 | |
|---|
| 2009 | HELP_FILE_COMPILE_ERROR("akjsdlkad.hlp", "Can't read from"); // no such file |
|---|
| 2010 | } |
|---|
| 2011 | TEST_PUBLISH(TEST_hlp2xml_conversion); |
|---|
| 2012 | |
|---|
| 2013 | |
|---|
| 2014 | // #define TEST_AUTO_UPDATE // uncomment to update expected xml |
|---|
| 2015 | |
|---|
| 2016 | void TEST_hlp2xml_output() { |
|---|
| 2017 | string tested_helpfile[] = { |
|---|
| 2018 | "unittest" |
|---|
| 2019 | }; |
|---|
| 2020 | |
|---|
| 2021 | string HELP_SOURCE = "../../HELP_SOURCE/"; |
|---|
| 2022 | string LIB = "../../lib/"; |
|---|
| 2023 | string EXPECTED = "help/"; |
|---|
| 2024 | |
|---|
| 2025 | for (size_t i = 0; i<ARRAY_ELEMS(tested_helpfile); ++i) { |
|---|
| 2026 | string xml = HELP_SOURCE + "Xml/" + tested_helpfile[i] + ".xml"; |
|---|
| 2027 | string html = LIB + "help_html/" + tested_helpfile[i] + ".html"; |
|---|
| 2028 | string hlp = LIB + "help/" + tested_helpfile[i] + ".hlp"; |
|---|
| 2029 | |
|---|
| 2030 | string xml_expected = EXPECTED + tested_helpfile[i] + ".xml"; |
|---|
| 2031 | string html_expected = EXPECTED + tested_helpfile[i] + ".html"; |
|---|
| 2032 | string hlp_expected = EXPECTED + tested_helpfile[i] + ".hlp"; |
|---|
| 2033 | |
|---|
| 2034 | |
|---|
| 2035 | #if defined(TEST_AUTO_UPDATE) |
|---|
| 2036 | # if defined(NDEBUG) |
|---|
| 2037 | # error please use auto-update only in DEBUG mode |
|---|
| 2038 | # endif |
|---|
| 2039 | TEST_COPY_FILE(xml.c_str(), xml_expected.c_str()); |
|---|
| 2040 | TEST_COPY_FILE(html.c_str(), html_expected.c_str()); |
|---|
| 2041 | TEST_COPY_FILE(hlp.c_str(), hlp_expected.c_str()); |
|---|
| 2042 | |
|---|
| 2043 | #else // !defined(TEST_AUTO_UPDATE) |
|---|
| 2044 | |
|---|
| 2045 | # if defined(DEBUG) |
|---|
| 2046 | int expected_xml_difflines = 0; |
|---|
| 2047 | int expected_hlp_difflines = 0; |
|---|
| 2048 | # else // !defined(DEBUG) |
|---|
| 2049 | int expected_xml_difflines = 1; // value of "edit_warning" differs - see .@edit_warning |
|---|
| 2050 | int expected_hlp_difflines = 2; // resulting warning in helpfile |
|---|
| 2051 | # endif |
|---|
| 2052 | TEST_EXPECT_TEXTFILE_DIFFLINES(xml.c_str(), xml_expected.c_str(), expected_xml_difflines); |
|---|
| 2053 | TEST_EXPECT_TEXTFILE_DIFFLINES_IGNORE_DATES(html.c_str(), html_expected.c_str(), 0); // html contains the update-date |
|---|
| 2054 | TEST_EXPECT_TEXTFILE_DIFFLINES(hlp.c_str(), hlp_expected.c_str(), expected_hlp_difflines); |
|---|
| 2055 | #endif |
|---|
| 2056 | } |
|---|
| 2057 | } |
|---|
| 2058 | |
|---|
| 2059 | |
|---|
| 2060 | #if defined(PROTECT_HELP_VS_CHANGES) |
|---|
| 2061 | void TEST_protect_help_vs_changes() { // should normally be disabled |
|---|
| 2062 | // fails if help changes compared to another checkout |
|---|
| 2063 | // or just updates the diff w/o failing (if you comment out the last line) |
|---|
| 2064 | // |
|---|
| 2065 | // if the patch is hugo and you load it into xemacs |
|---|
| 2066 | // you might want to (turn-on-lazy-shot) |
|---|
| 2067 | // |
|---|
| 2068 | // patch-pointer: ../UNIT_TESTER/run/help_changes.patch |
|---|
| 2069 | |
|---|
| 2070 | bool do_help = true; |
|---|
| 2071 | bool do_html = true; |
|---|
| 2072 | |
|---|
| 2073 | const char *ref_WC = "ARB.help.ref"; |
|---|
| 2074 | |
|---|
| 2075 | // ---------------------------------------- config above |
|---|
| 2076 | |
|---|
| 2077 | string this_base = "../.."; |
|---|
| 2078 | string ref_base = this_base+"/../"+ref_WC; |
|---|
| 2079 | string to_help = "/lib/help"; |
|---|
| 2080 | string to_html = "/lib/help_html"; |
|---|
| 2081 | string diff_help = "diff -u "+ref_base+to_help+" "+this_base+to_help; |
|---|
| 2082 | string diff_html = "diff -u "+ref_base+to_html+" "+this_base+to_html; |
|---|
| 2083 | |
|---|
| 2084 | string update_cmd; |
|---|
| 2085 | |
|---|
| 2086 | if (do_help) { |
|---|
| 2087 | if (do_html) update_cmd = string("(")+diff_help+";"+diff_html+")"; |
|---|
| 2088 | else update_cmd = diff_help; |
|---|
| 2089 | } |
|---|
| 2090 | else if (do_html) update_cmd = diff_html; |
|---|
| 2091 | |
|---|
| 2092 | string patch = "help_changes.patch"; |
|---|
| 2093 | update_cmd += " >"+patch+" ||true"; |
|---|
| 2094 | |
|---|
| 2095 | string fail_on_change_cmd = "test \"`cat "+patch+" | grep -v '^Common subdirectories' | wc -l`\" = \"0\" || ( echo \"Error: Help changed\"; false)"; |
|---|
| 2096 | |
|---|
| 2097 | TEST_EXPECT_NO_ERROR(GBK_system(update_cmd.c_str())); |
|---|
| 2098 | TEST_EXPECT_NO_ERROR(GBK_system(fail_on_change_cmd.c_str())); // @@@ uncomment before commit |
|---|
| 2099 | } |
|---|
| 2100 | #endif |
|---|
| 2101 | |
|---|
| 2102 | #endif // UNIT_TESTS |
|---|