Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

arb_help2xml.cxx

Visit:

Last change on this file was 18694, checked in by westram, 4 years ago
fix expected unittest result.
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 58.4 KB

Line
1	// ==================================================================== //
2	// //
3	// File : arb_help2xml.cxx //
4	// Purpose : Converts old ARB help format to XML //
5	// //
6	// Coded by Ralf Westram (coder@reallysoft.de) in October 2001 //
7	// Copyright Department of Microbiology (Technical University Munich) //
8	// //
9	// Visit our web site at: http://www.arb-home.de/ //
10	// //
11	// ==================================================================== //
12
13	#include <xml.hxx>
14	#include <arb_defs.h>
15	#include <arb_diff.h>
16	#include <static_assert.h>
17
18	#include <list>
19	#include <set>
20	#include <iostream>
21	#include <fstream>
22
23	#include <cstdlib>
24	#include <cstdarg>
25	#include <cstring>
26	#include <climits>
27
28	#include <unistd.h>
29	#include <sys/stat.h>
30
31	using namespace std;
32
33	#define h2x_assert(bed) arb_assert(bed)
34
35	#if defined(DEBUG)
36	#define WARN_FORMATTING_PROBLEMS
37	#define WARN_MISSING_HELP
38	// #define DUMP_PARAGRAPHS
39	// #define PROTECT_HELP_VS_CHANGES
40	#endif // DEBUG
41
42
43	#if defined(WARN_FORMATTING_PROBLEMS)
44
45	#define WARN_FIXED_LAYOUT_LIST_ELEMENTS
46	#define WARN_LONESOME_ENUM_ELEMENTS
47
48	// warnings below are useless for production and shall be disabled in SVN
49	// #define WARN_LONESOME_LIST_ELEMENTS
50	// #define WARN_POSSIBLY_WRONG_INDENTATION_CORRECTION
51	// #define WARN_IGNORED_ALPHA_ENUMS
52
53	#endif
54
55
56	#define MAX_LINE_LENGTH 200 // maximum length of lines in input stream
57	#define TABSIZE 8
58
59	static const char *knownSections[] = {
60	"OCCURRENCE",
61	"DESCRIPTION",
62	"NOTES",
63	"EXAMPLES",
64	"WARNINGS",
65	"BUGS",
66	"SECTION",
67	};
68
69	enum SectionType {
70	SEC_OCCURRENCE,
71	SEC_DESCRIPTION,
72	SEC_NOTES,
73	SEC_EXAMPLES,
74	SEC_WARNINGS,
75	SEC_BUGS,
76	SEC_SECTION,
77
78	KNOWN_SECTION_TYPES,
79	SEC_NONE,
80	SEC_FAKE,
81	};
82
83	STATIC_ASSERT(ARRAY_ELEMS(knownSections) == KNOWN_SECTION_TYPES);
84
85	__ATTR__VFORMAT(1) static string vstrf(const char *format, va_list argPtr) {
86	static size_t buf_size = 256;
87	static char *buffer = new char[buf_size];
88
89	size_t length;
90	while (1) {
91	if (!buffer) {
92	h2x_assert(buffer); // to stop when debugging
93	throw string("out of memory");
94	}
95
96	length = vsnprintf(buffer, buf_size, format, argPtr);
97	if (length < buf_size) break; // string fits into current buffer
98
99	// otherwise resize buffer :
100	buf_size += buf_size/2;
101	delete [] buffer;
102	buffer = new char[buf_size];
103	}
104
105	return string(buffer, length);
106	}
107
108	__ATTR__FORMAT(1) static string strf(const char *format, ...) {
109	va_list argPtr;
110	va_start(argPtr, format);
111	string result = vstrf(format, argPtr);
112	va_end(argPtr);
113
114	return result;
115	}
116
117	// -----------------------------
118	// warnings and errors
119
120	class LineAttachedMessage {
121	string message;
122	size_t lineno;
123
124	public:
125	LineAttachedMessage(const string& message_, size_t lineno_) :
126	message(message_),
127	lineno(lineno_)
128	{}
129
130	const string& Message() const { return message; }
131	size_t Lineno() const { return lineno; }
132	};
133
134	const size_t NO_LINENUMBER_INFO = -1U;
135
136	LineAttachedMessage unattached_message(const string& message) { return LineAttachedMessage(message, NO_LINENUMBER_INFO); }
137
138
139	static list<LineAttachedMessage> warnings;
140	inline void add_warning(const LineAttachedMessage& laMsg) {
141	warnings.push_back(laMsg);
142	}
143	inline void add_warning(const string& warning, size_t lineno) {
144	add_warning(LineAttachedMessage(warning, lineno));
145	}
146
147	struct MessageAttachable {
148	virtual ~MessageAttachable() {}
149
150	virtual string location_description() const = 0; // may return empty string
151	virtual size_t line_number() const = 0; // if unknown -> should return NO_LINENUMBER_INFO
152
153	LineAttachedMessage attached_message(const string& message) const {
154	string where = location_description();
155	if (where.empty()) return LineAttachedMessage(message, line_number());
156	return LineAttachedMessage(message+" ["+where+"]", line_number());
157	}
158	void attach_warning(const string& message) const {
159	add_warning(attached_message(message));
160	}
161	};
162
163
164	// ----------------------
165	// class Reader
166
167	class Reader : public MessageAttachable {
168	private:
169	istream& in;
170	char lineBuffer[MAX_LINE_LENGTH];
171	char lineBuffer2[MAX_LINE_LENGTH];
172	bool readAgain;
173	bool eof;
174	int lineNo;
175
176	string location_description() const OVERRIDE { return ""; }
177	size_t line_number() const OVERRIDE { return lineNo; }
178
179	void getline() {
180	if (!eof) {
181	if (in.eof()) eof = true;
182	else {
183	h2x_assert(in.good());
184
185	in.getline(lineBuffer, MAX_LINE_LENGTH);
186	lineNo++;
187
188	if (in.eof()) eof = true;
189	else if (in.fail()) throw "line too long";
190
191	if (strchr(lineBuffer, '\t')) {
192	int o2 = 0;
193
194	for (int o = 0; lineBuffer[o]; ++o) {
195	if (lineBuffer[o] == '\t') {
196	int spaces = TABSIZE - (o2 % TABSIZE);
197	while (spaces--) lineBuffer2[o2++] = ' ';
198	}
199	else {
200	lineBuffer2[o2++] = lineBuffer[o];
201	}
202	}
203	lineBuffer2[o2] = 0;
204	strcpy(lineBuffer, lineBuffer2);
205	}
206
207	char *eol = strchr(lineBuffer, 0)-1;
208	while (eol >= lineBuffer && isspace(eol[0])) {
209	eol[0] = 0; // trim trailing whitespace
210	eol--;
211	}
212	if (eol > lineBuffer) {
213	// now eol points to last character
214	if (eol[0] == '-' && isalnum(eol[-1])) {
215	attach_warning("manual hyphenation detected");
216	}
217	}
218	}
219	}
220	}
221
222	public:
223	Reader(istream& in_) : in(in_), readAgain(true), eof(false), lineNo(0) { getline(); }
224	virtual ~Reader() {}
225
226	const char *getNext() {
227	if (readAgain) readAgain = false;
228	else getline();
229	return eof ? NULp : lineBuffer;
230	}
231
232	void back() {
233	h2x_assert(!readAgain);
234	readAgain = true;
235	}
236
237	int getLineNo() const { return lineNo; }
238	};
239
240	enum ParagraphType {
241	PLAIN_TEXT,
242	ENUMERATED,
243	ITEM,
244	};
245	enum EnumerationType {
246	NONE,
247	DIGITS,
248	ALPHA_UPPER,
249	ALPHA_LOWER,
250	};
251
252	class Ostring {
253	string content;
254	size_t lineNo; // where string came from
255	ParagraphType type;
256
257	// only valid for type==ENUMERATED:
258	EnumerationType etype;
259	unsigned number;
260
261	public:
262
263	Ostring(const string& s, size_t line_no, ParagraphType type_)
264	: content(s),
265	lineNo(line_no),
266	type(type_),
267	etype(NONE)
268	{
269	h2x_assert(type != ENUMERATED);
270	}
271	Ostring(const string& s, size_t line_no, ParagraphType type_, EnumerationType etype_, unsigned num)
272	: content(s),
273	lineNo(line_no),
274	type(type_),
275	etype(etype_),
276	number(num)
277	{
278	h2x_assert(type == ENUMERATED);
279	h2x_assert(etype == DIGITS \|\| etype == ALPHA_UPPER \|\| etype == ALPHA_LOWER);
280	h2x_assert(num>0);
281	}
282
283
284	operator const string&() const { return content; }
285	operator string&() { return content; }
286
287	const string& as_string() const { return content; }
288	string& as_string() { return content; }
289
290	size_t get_lineno() const { return lineNo; }
291
292	const ParagraphType& get_type() const { return type; }
293	const EnumerationType& get_enum_type() const {
294	h2x_assert(type == ENUMERATED);
295	return etype;
296	}
297	unsigned get_number() const {
298	h2x_assert(type == ENUMERATED);
299	return number;
300	}
301
302	// some wrapper to make Ostring act like string
303	const char *c_str() const { return content.c_str(); }
304	};
305
306	typedef list<Ostring> Ostrings;
307
308	#if defined(WARN_MISSING_HELP)
309	static void check_TODO(const char *line, const Reader& reader) {
310	if (strstr(line, "@@@") \|\| strstr(line, "TODO")) {
311	reader.attach_warning(strf("TODO: %s", line));
312	}
313	}
314	#else
315	inline void check_TODO(const char *, const Reader&) { }
316	#endif // WARN_MISSING_HELP
317
318	// ----------------------------
319	// class Section
320
321	class Section FINAL_TYPE : public MessageAttachable {
322	SectionType type;
323	string name;
324	Ostrings content;
325	size_t lineno;
326
327	string location_description() const OVERRIDE { return string("in SECTION '")+name+"'"; }
328
329	public:
330	Section(string name_, SectionType type_, size_t lineno_)
331	: type(type_),
332	name(name_),
333	lineno(lineno_)
334	{}
335	virtual ~Section() {}
336
337	const Ostrings& Content() const { return content; }
338	Ostrings& Content() { return content; }
339	SectionType get_type() const { return type; }
340	size_t line_number() const OVERRIDE { return lineno; }
341	const string& getName() const { return name; }
342	void setName(const string& name_) { name = name_; }
343	};
344
345	typedef list<Section> SectionList;
346
347	// --------------------
348	// class Link
349
350	class Link {
351	string target;
352	size_t source_lineno;
353
354	public:
355	Link(const string& target_, size_t source_lineno_) :
356	target(target_),
357	source_lineno(source_lineno_)
358	{}
359
360	const string& Target() const { return target; }
361	size_t SourceLineno() const { return source_lineno; }
362	};
363
364	typedef list<Link> Links;
365
366	// ------------------------
367	// class Helpfile
368
369	class Helpfile {
370	private:
371	Links uplinks;
372	Links references;
373	Links auto_references;
374	Section title;
375	SectionList sections;
376	string inputfile;
377
378	void check_self_ref(const string& link) {
379	size_t slash = inputfile.find('/');
380	if (slash != string::npos) {
381	if (inputfile.substr(slash+1) == link) {
382	throw string("Invalid link to self");
383	}
384	}
385	}
386
387	public:
388	Helpfile() : title("TITLE", SEC_FAKE, NO_LINENUMBER_INFO) {}
389	virtual ~Helpfile() {}
390
391	void readHelp(istream& in, const string& filename);
392	void writeXML(FILE *out, const string& page_name);
393	void extractInternalLinks();
394
395	const Section& get_title() const { return title; }
396	};
397
398	inline bool isWhite(char c) { return c == ' '; }
399
400	inline bool isEmptyOrComment(const char *s) {
401	if (s[0] == '#') return true;
402	for (int off = 0; ; ++off) {
403	if (s[off] == 0) return true;
404	if (!isWhite(s[off])) break;
405	}
406
407	return false;
408	}
409
410	inline const char extractKeyword(const char line, string& keyword) {
411	// returns NULp if no keyword was found
412	// otherwise returns position behind keyword and sets value of 'keyword'
413
414	const char *space = strchr(line, ' ');
415	if (space && space>line) {
416	keyword = string(line, 0, space-line);
417	return space;
418	}
419	else if (!space) { // test for keyword w/o content behind
420	if (line[0]) { // not empty
421	keyword = line;
422	return strchr(line, 0);
423	}
424	}
425	return NULp;
426	}
427
428	inline const char eatWhite(const char line) {
429	// skips whitespace
430	while (isWhite(*line)) ++line;
431	return line;
432	}
433
434	inline void pushParagraph(Section& sec, string& paragraph, size_t lineNo, ParagraphType& type, EnumerationType& etype, unsigned num) {
435	if (paragraph.length()) {
436	if (type == ENUMERATED) {
437	sec.Content().push_back(Ostring(paragraph, lineNo, type, etype, num));
438	}
439	else {
440	sec.Content().push_back(Ostring(paragraph, lineNo, type));
441	}
442
443	type = PLAIN_TEXT;
444	etype = NONE;
445	paragraph = "";
446	}
447	}
448
449	inline const char firstChar(const char s) {
450	while (isWhite(s[0])) ++s;
451	return s;
452	}
453
454	inline bool is_startof_itemlist_element(const char *contentStart) {
455	return
456	(contentStart[0] == '-' \|\|
457	contentStart[0] == '*')
458	&&
459	isspace(contentStart[1])
460	&&
461	!(isspace(contentStart[2]) \|\|
462	contentStart[2] == '-');
463	}
464
465	#define MAX_ALLOWED_ENUM 99 // otherwise it starts interpreting years as enums
466
467	static EnumerationType startsWithLetter(string& s, unsigned& number) {
468	// tests if first line starts with 'letter.'
469	// if true then 'letter.' is removed from the string
470	// the letter is converted and returned in 'number' ('a'->1, 'b'->2, ..)
471
472	size_t off = s.find_first_not_of(" \n");
473	if (off == string::npos) return NONE;
474	if (!isalpha(s[off])) return NONE;
475
476	size_t astart = off;
477	EnumerationType etype = isupper(s[off]) ? ALPHA_UPPER : ALPHA_LOWER;
478
479	number = s[off]-(etype == ALPHA_UPPER ? 'A' : 'a')+1;
480	++off;
481
482	h2x_assert(number>0 && number<MAX_ALLOWED_ENUM);
483
484	if (s[off] != '.' && s[off] != ')') return NONE;
485	if (s[off+1] != ' ') return NONE;
486
487	// remove 'letter.' from string :
488	++off;
489	while (s[off+1] == ' ') ++off;
490	s.erase(astart, off-astart+1);
491
492	return etype;
493	}
494
495	static bool startsWithNumber(string& s, unsigned& number) {
496	// tests if first line starts with 'number.'
497	// if true then 'number.' is removed from the string
498
499	size_t off = s.find_first_not_of(" \n");
500	if (off == string::npos) return false;
501	if (!isdigit(s[off])) return false;
502
503	size_t num_start = off;
504	number = 0;
505
506	for (; isdigit(s[off]); ++off) {
507	number = number*10 + (s[off]-'0');
508	}
509	if (number>MAX_ALLOWED_ENUM) return false;
510
511	if (s[off] != '.' && s[off] != ')') return false;
512	if (s[off+1] != ' ') return false;
513
514	// remove 'number.' from string :
515	++off;
516	while (s[off+1] == ' ') ++off;
517	s.erase(num_start, off-num_start+1);
518
519	return true;
520	}
521
522	static EnumerationType detectLineEnumType(string& line, unsigned& number) {
523	if (startsWithNumber(line, number)) return DIGITS;
524	return startsWithLetter(line, number);
525	}
526
527	static void parseSection(Section& sec, const char *line, int indentation, Reader& reader) {
528	string paragraph = line;
529	size_t para_start_lineno = reader.getLineNo();
530
531	ParagraphType type = PLAIN_TEXT;
532	EnumerationType etype = NONE;
533	unsigned num = 0;
534
535	unsigned last_alpha_num = -1;
536
537	h2x_assert(sec.Content().empty());
538
539	while (1) {
540	line = reader.getNext();
541	if (!line) break;
542
543	if (isEmptyOrComment(line)) {
544	pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
545	check_TODO(line, reader);
546	}
547	else {
548	string keyword;
549	const char *rest = extractKeyword(line, keyword);
550
551	if (rest) { // a new keyword
552	reader.back();
553	break;
554	}
555
556	check_TODO(line, reader);
557
558	string Line = line;
559
560	if (sec.get_type() == SEC_OCCURRENCE) {
561	pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
562	}
563	else {
564	const char *firstNonWhite = firstChar(line);
565	if (is_startof_itemlist_element(firstNonWhite)) {
566	h2x_assert(firstNonWhite != line);
567
568	pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
569
570	Line[firstNonWhite-line] = ' ';
571	type = ITEM; // is reset in call to pushParagraph
572	}
573	else {
574	unsigned foundNum;
575	EnumerationType foundEtype = detectLineEnumType(Line, foundNum);
576
577	if (foundEtype == ALPHA_UPPER \|\| foundEtype == ALPHA_LOWER) {
578	if (foundNum == (last_alpha_num+1) \|\| foundNum == 1) {
579	last_alpha_num = foundNum;
580	}
581	else {
582	#if defined(WARN_IGNORED_ALPHA_ENUMS)
583	add_warning(reader.attached_message("Ignoring non-consecutive alpha-enum"));
584	#endif
585	foundEtype = NONE;
586
587	reader.back();
588	Line = reader.getNext();
589	last_alpha_num = -1;
590	}
591	}
592
593	if (foundEtype != NONE) {
594	pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
595
596	type = ENUMERATED;
597	num = foundNum;
598	etype = foundEtype;
599
600	if (!num) {
601	h2x_assert(etype == DIGITS);
602	throw "Enumerations starting with zero are not supported";
603	}
604	}
605	}
606	}
607
608	if (paragraph.length()) {
609	paragraph = paragraph+"\n"+Line;
610	}
611	else {
612	paragraph = string("\n")+Line;
613	para_start_lineno = reader.getLineNo();
614	}
615	}
616	}
617
618	pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
619
620	if (sec.Content().size()>0 && indentation>0) {
621	string spaces;
622	spaces.reserve(indentation);
623	spaces.append(indentation, ' ');
624
625	string& ostr = sec.Content().front();
626	ostr = string("\n") + spaces + ostr;
627	}
628	}
629
630	inline void check_specific_duplicates(const string& link, const Links& existing, bool add_warnings) {
631	for (Links::const_iterator ex = existing.begin(); ex != existing.end(); ++ex) {
632	if (ex->Target() == link) {
633	if (add_warnings) add_warning(strf("First Link to '%s' was found here.", ex->Target().c_str()), ex->SourceLineno());
634	throw strf("Link to '%s' duplicated here.", link.c_str());
635	}
636	}
637	}
638	inline void check_duplicates(const string& link, const Links& uplinks, const Links& references, bool add_warnings) {
639	check_specific_duplicates(link, uplinks, add_warnings);
640	check_specific_duplicates(link, references, add_warnings);
641	}
642
643	static void warnAboutDuplicate(SectionList& sections) {
644	set<string> seen;
645	SectionList::iterator end = sections.end();
646	for (SectionList::iterator s = sections.begin(); s != end; ++s) {
647	const string& sname = s->getName();
648	if (sname == "NOTES") continue; // do not warn about multiple NOTES sections
649
650	SectionList::iterator o = s; ++o;
651	for (; o != end; ++o) {
652	if (sname == o->getName()) {
653	o->attach_warning("duplicated SECTION name");
654	if (seen.find(sname) == seen.end()) {
655	s->attach_warning("name was first used");
656	seen.insert(sname);
657	}
658	}
659	}
660	}
661	}
662
663	void Helpfile::readHelp(istream& in, const string& filename) {
664	if (!in.good()) throw unattached_message(strf("Can't read from '%s'", filename.c_str()));
665
666	Reader read(in);
667
668	inputfile = filename; // remember file read (for comment)
669
670	const char *line;
671	const char *name_only = strrchr(filename.c_str(), '/');
672
673	h2x_assert(name_only);
674	++name_only;
675
676	try {
677	while (1) {
678	line = read.getNext();
679	if (!line) break;
680
681	if (isEmptyOrComment(line)) {
682	check_TODO(line, read);
683	continue;
684	}
685
686	check_TODO(line, read);
687
688	string keyword;
689	const char *rest = extractKeyword(line, keyword);
690
691	if (rest) { // found a keyword
692	if (keyword == "UP") {
693	rest = eatWhite(rest);
694	if (strlen(rest)) {
695	check_duplicates(rest, uplinks, references, true);
696	if (strcmp(name_only, rest) == 0) throw "UP link to self";
697
698	uplinks.push_back(Link(rest, read.getLineNo()));
699	}
700	}
701	else if (keyword == "SUB") {
702	rest = eatWhite(rest);
703	if (strlen(rest)) {
704	check_duplicates(rest, uplinks, references, true);
705	if (strcmp(name_only, rest) == 0) throw "SUB link to self";
706
707	references.push_back(Link(rest, read.getLineNo()));
708	}
709	}
710	else if (keyword == "TITLE") {
711	rest = eatWhite(rest);
712	parseSection(title, rest, 0, read);
713
714	if (title.Content().empty()) throw "empty TITLE not allowed";
715
716	const char *t = title.Content().front().c_str();
717
718	if (strstr(t, "Standard help file form")) {
719	throw strf("Illegal title for help file: '%s'", t);
720	}
721	}
722	else {
723	if (keyword == "NOTE") keyword = "NOTES";
724	if (keyword == "EXAMPLE") keyword = "EXAMPLES";
725	if (keyword == "WARNING") keyword = "WARNINGS";
726
727	SectionType stype = SEC_NONE;
728	int idx;
729	for (idx = 0; idx<KNOWN_SECTION_TYPES; ++idx) {
730	if (knownSections[idx] == keyword) {
731	stype = SectionType(idx);
732	break;
733	}
734	}
735
736	size_t lineno = read.getLineNo();
737
738	if (idx >= KNOWN_SECTION_TYPES) throw strf("unknown keyword '%s'", keyword.c_str());
739
740	if (stype == SEC_SECTION) {
741	string section_name = eatWhite(rest);
742	Section sec(section_name, stype, lineno);
743	parseSection(sec, "", 0, read);
744	sections.push_back(sec);
745	}
746	else {
747	Section sec(keyword, stype, lineno);
748	rest = eatWhite(rest);
749	parseSection(sec, rest, rest-line, read);
750	sections.push_back(sec);
751	}
752	}
753	}
754	else {
755	throw strf("Unhandled line");
756	}
757	}
758
759	warnAboutDuplicate(sections);
760	}
761	catch (string& err) { throw read.attached_message(err); }
762	catch (const char *err) { throw read.attached_message(err); }
763	}
764
765	static bool shouldReflow(const string& s, int& foundIndentation) {
766	// foundIndentation is only valid if shouldReflow() returns true
767	enum { START, CHAR, SPACE, MULTIPLE, DOT, DOTSPACE } state = START;
768	bool equal_indent = true;
769	int lastIndent = -1;
770	int thisIndent = 0;
771
772	for (string::const_iterator c = s.begin(); c != s.end(); ++c, ++thisIndent) {
773	if (*c == '\n') {
774	state = START;
775	thisIndent = 0;
776	}
777	else if (isWhite(*c)) {
778	if (state == DOT \|\| state == DOTSPACE) state = DOTSPACE; // multiple spaces after DOT are allowed
779	else if (state == SPACE) state = MULTIPLE; // now seen multiple spaces
780	else if (state == CHAR) state = SPACE; // now seen 1 space
781	}
782	else {
783	if (state == MULTIPLE) return false; // character after multiple spaces
784	if (state == START) {
785	if (lastIndent == -1) lastIndent = thisIndent;
786	else if (lastIndent != thisIndent) equal_indent = false;
787	}
788	state = (c == '.' \|\| c == ',') ? DOT : CHAR;
789	}
790	}
791
792	if (lastIndent<0) {
793	equal_indent = false;
794	}
795
796	if (equal_indent) {
797	foundIndentation = lastIndent-1;
798	h2x_assert(foundIndentation >= 0);
799	}
800	return equal_indent;
801	}
802
803	static string correctSpaces(const string& text, int change) {
804	h2x_assert(text.find('\n') == string::npos);
805
806	if (!change) return text;
807
808	size_t first = text.find_first_not_of(' ');
809	if (first == string::npos) return ""; // empty line
810
811	if (change<0) {
812	int remove = -change;
813	h2x_assert(remove <= int(first));
814	return text.substr(remove);
815	}
816
817	h2x_assert(change>0); // add spaces
818	return string(change, ' ')+text;
819	}
820
821	static string correctIndentation(const string& text, int change) {
822	// removes 'remove' spaces from every line
823
824	size_t this_lineend = text.find('\n');
825	string result;
826
827	if (this_lineend == string::npos) {
828	result = correctSpaces(text, change);
829	}
830	else {
831	result = correctSpaces(text.substr(0, this_lineend), change);
832
833	while (this_lineend != string::npos) {
834	size_t next_lineend = text.find('\n', this_lineend+1);
835	if (next_lineend == string::npos) { // last line
836	result = result+"\n"+correctSpaces(text.substr(this_lineend+1), change);
837	}
838	else {
839	result = result+"\n"+correctSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1), change);
840	}
841	this_lineend = next_lineend;
842	}
843	}
844	return result;
845	}
846
847	inline size_t countSpaces(const string& text) {
848	size_t first = text.find_first_not_of(' ');
849	if (first == string::npos) return INT_MAX; // empty line
850	return first;
851	}
852
853	static size_t scanMinIndentation(const string& text) {
854	size_t this_lineend = text.find('\n');
855	size_t min_indent = INT_MAX;
856
857	if (this_lineend == string::npos) {
858	min_indent = countSpaces(text);
859	}
860	else {
861	while (this_lineend != string::npos) {
862	size_t next_lineend = text.find('\n', this_lineend+1);
863	if (next_lineend == string::npos) {
864	min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1)));
865	}
866	else {
867	min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1)));
868	}
869	this_lineend = next_lineend;
870	}
871	}
872
873	if (min_indent == INT_MAX) min_indent = 0; // only empty lines
874	return min_indent;
875	}
876
877	// -----------------------------
878	// class ParagraphTree
879
880	class ParagraphTree FINAL_TYPE : public MessageAttachable, virtual Noncopyable {
881	ParagraphTree *brother; // has same indentation as this
882	ParagraphTree *son; // indentation + 1
883
884	Ostring otext; // text of the Section (containing linefeeds)
885
886	bool reflow; // should the paragraph be reflown ? (true if indentation is equal for all lines of text)
887	int indentation; // the real indentation of the blank (behind removed enumeration)
888
889
890	string location_description() const OVERRIDE { return "in paragraph starting here"; }
891	size_t line_number() const OVERRIDE { return otext.get_lineno(); }
892
893	ParagraphTree(Ostrings::const_iterator begin, const Ostrings::const_iterator end)
894	: son(NULp),
895	otext(*begin),
896	indentation(0)
897	{
898	h2x_assert(begin != end);
899
900	string& text = otext;
901
902	reflow = shouldReflow(text, indentation);
903	if (!reflow) {
904	size_t reststart = text.find('\n', 1);
905
906	if (reststart == 0) {
907	attach_warning("[internal] Paragraph starts with LF -> reflow calculation will probably fail");
908	}
909
910	if (reststart != string::npos) {
911	int rest_indent = -1;
912	string rest = text.substr(reststart);
913	bool rest_reflow = shouldReflow(rest, rest_indent);
914
915	if (rest_reflow) {
916	int first_indent = countSpaces(text.substr(1));
917	if (get_type() == PLAIN_TEXT) {
918	size_t last = text.find_last_not_of(' ', reststart-1);
919	bool is_header = last != string::npos && text[last] == ':';
920
921	if (!is_header && rest_indent == (first_indent+8)) {
922	#if defined(DEBUG)
923	size_t textstart = text.find_first_not_of(" \n");
924	h2x_assert(textstart != string::npos);
925	#endif // DEBUG
926
927	text = text.substr(0, reststart)+correctIndentation(rest, -8);
928	reflow = shouldReflow(text, indentation);
929	}
930	}
931	else {
932	int diff = rest_indent-first_indent;
933	if (diff>0) {
934	text = text.substr(0, reststart)+correctIndentation(rest, -diff);
935	reflow = shouldReflow(text, indentation);
936	}
937	else if (diff<0) {
938	// paragraph with more indent on first line (occurs?)
939	attach_warning(strf("[internal] unhandled: more indentation on the 1st line (diff=%i)", diff));
940	}
941	}
942	}
943	}
944	}
945
946	if (!reflow) {
947	indentation = scanMinIndentation(text);
948	}
949	text = correctIndentation(text, -indentation);
950	if (get_type() == ITEM) {
951	h2x_assert(indentation >= 2);
952	indentation -= 2;
953	}
954
955	brother = buildParagraphTree(++begin, end);
956	}
957
958	void brothers_to_sons(ParagraphTree *new_brother);
959
960	public:
961	virtual ~ParagraphTree() {
962	delete brother;
963	delete son;
964	}
965
966	ParagraphType get_type() const { return otext.get_type(); }
967
968	bool is_itemlist_member() const { return get_type() == ITEM; }
969	unsigned get_enumeration() const { return get_type() == ENUMERATED ? otext.get_number() : 0; }
970	EnumerationType get_enum_type() const { return otext.get_enum_type(); }
971
972	const char *readable_type() const {
973	const char *res = NULp;
974	switch (get_type()) {
975	case PLAIN_TEXT: res = "PLAIN_TEXT"; break;
976	case ITEM: res = "ITEM"; break;
977	case ENUMERATED: res = "ENUMERATED"; break;
978	}
979	return res;
980	}
981
982	size_t countTextNodes() {
983	size_t nodes = 1; // this
984	if (son) nodes += son->countTextNodes();
985	if (brother) nodes += brother->countTextNodes();
986	return nodes;
987	}
988
989	#if defined(DUMP_PARAGRAPHS)
990	void print_indent(ostream& out, int indent) { while (indent-->0) out << ' '; }
991	char masknl(const char text) {
992	char *result = ARB_strdup(text);
993	for (int i = 0; result[i]; ++i) {
994	if (result[i] == '\n') result[i] = '\|';
995	}
996	return result;
997	}
998	void dump(ostream& out, int indent = 0) {
999	print_indent(out, indent+1);
1000	{
1001	char *mtext = masknl(otext.as_string().c_str());
1002	out << "text='" << mtext << "'\n";
1003	free(mtext);
1004	}
1005
1006	print_indent(out, indent+1);
1007	out << "type='" << readable_type() << "' ";
1008	if (get_type() == ENUMERATED) {
1009	out << "enumeration='" << otext.get_number() << "' ";
1010	}
1011	out << "reflow='" << reflow << "' ";
1012	out << "indentation='" << indentation << "'\n";
1013
1014	if (son) {
1015	print_indent(out, indent+2); cout << "son:\n";
1016	son->dump(out, indent+2);
1017	cout << "\n";
1018	}
1019	if (brother) {
1020	print_indent(out, indent); cout << "brother:\n";
1021	brother->dump(out, indent);
1022	}
1023	}
1024	#endif // DUMP_PARAGRAPHS
1025
1026	private:
1027	static ParagraphTree* buildParagraphTree(Ostrings::const_iterator begin, const Ostrings::const_iterator end) {
1028	if (begin == end) return NULp;
1029	return new ParagraphTree(begin, end);
1030	}
1031	public:
1032	static ParagraphTree* buildParagraphTree(const Section& sec) {
1033	const Ostrings& txt = sec.Content();
1034	if (txt.empty()) throw "attempt to build an empty ParagraphTree";
1035	return buildParagraphTree(txt.begin(), txt.end());
1036	}
1037
1038	bool contains(ParagraphTree *that) {
1039	return
1040	this == that \|\|
1041	(son && son->contains(that)) \|\|
1042	(brother && brother->contains(that));
1043	}
1044
1045	ParagraphTree predecessor(ParagraphTree before_this) {
1046	if (brother == before_this) return this;
1047	if (!brother) return NULp;
1048	return brother->predecessor(before_this);
1049	}
1050
1051	void append(ParagraphTree *new_brother) {
1052	if (!brother) brother = new_brother;
1053	else brother->append(new_brother);
1054	}
1055
1056	bool is_some_brother(const ParagraphTree *other) const {
1057	return (other == brother) \|\| (brother && brother->is_some_brother(other));
1058	}
1059
1060	ParagraphTree* takeAllInFrontOf(ParagraphTree *after) {
1061	ParagraphTree *removed = this;
1062	ParagraphTree *after_pred = this;
1063
1064	h2x_assert(is_some_brother(after));
1065
1066	while (1) {
1067	h2x_assert(after_pred);
1068	h2x_assert(after_pred->brother); // takeAllInFrontOf called with non-existing 'after'
1069
1070	if (after_pred->brother == after) { // found after
1071	after_pred->brother = NULp; // unlink
1072	break;
1073	}
1074	after_pred = after_pred->brother;
1075	}
1076
1077	return removed;
1078	}
1079
1080	ParagraphTree *firstListMember() {
1081	switch (get_type()) {
1082	case PLAIN_TEXT: break;
1083	case ITEM: return this;
1084	case ENUMERATED: {
1085	if (get_enumeration() == 1) return this;
1086	break;
1087	}
1088	}
1089	if (brother) return brother->firstListMember();
1090	return NULp;
1091	}
1092
1093	ParagraphTree *nextListMemberAfter(const ParagraphTree& previous) {
1094	if (indentation<previous.indentation) return NULp;
1095	if (indentation == previous.indentation && get_type() == previous.get_type()) {
1096	if (get_type() != ENUMERATED) return this;
1097	if (get_enumeration() > previous.get_enumeration()) return this;
1098	return NULp;
1099	}
1100	if (!brother) return NULp;
1101	return brother->nextListMemberAfter(previous);
1102	}
1103	ParagraphTree *nextListMember() const {
1104	return brother ? brother->nextListMemberAfter(*this) : NULp;
1105	}
1106
1107	ParagraphTree* firstWithLessIndentThan(int wanted_indentation) {
1108	if (indentation < wanted_indentation) return this;
1109	if (!brother) return NULp;
1110	return brother->firstWithLessIndentThan(wanted_indentation);
1111	}
1112
1113	void format_indentations();
1114	void format_lists();
1115
1116	private:
1117	static ParagraphTree* buildNewParagraph(const string& Text, size_t beginLineNo, ParagraphType type) {
1118	Ostrings S;
1119	S.push_back(Ostring(Text, beginLineNo, type));
1120	return new ParagraphTree(S.begin(), S.end());
1121	}
1122	ParagraphTree *xml_write_list_contents();
1123	ParagraphTree *xml_write_enum_contents();
1124	void xml_write_textblock();
1125
1126	public:
1127	void xml_write();
1128	};
1129
1130	#if defined(DUMP_PARAGRAPHS)
1131	static void dump_paragraph(ParagraphTree *para) {
1132	// helper function for use in gdb
1133	para->dump(cout, 0);
1134	}
1135	#endif
1136
1137	void ParagraphTree::brothers_to_sons(ParagraphTree *new_brother) {
1138	/*! folds down brothers to sons
1139	* @param new_brother brother of 'this->brother', will become new brother.
1140	* If new_brother == NULp -> make all brothers sons.
1141	*/
1142
1143	if (new_brother) {
1144	h2x_assert(is_some_brother(new_brother));
1145
1146	if (brother != new_brother) {
1147	#if defined(DEBUG)
1148	if (son) {
1149	son->attach_warning("Found unexpected son (in brothers_to_sons)");
1150	brother->attach_warning("while trying to transform paragraphs from here ..");
1151	new_brother->attach_warning(".. to here ..");
1152	attach_warning(".. into sons of this paragraph.");
1153	return;
1154	}
1155	#endif
1156
1157	h2x_assert(!son);
1158	h2x_assert(brother);
1159
1160	if (!new_brother) { // all brothers -> sons
1161	son = brother;
1162	brother = NULp;
1163	}
1164	else {
1165	son = brother->takeAllInFrontOf(new_brother);
1166	brother = new_brother;
1167	}
1168	}
1169	}
1170	else {
1171	h2x_assert(!son);
1172	son = brother;
1173	brother = NULp;
1174	}
1175	}
1176	void ParagraphTree::format_lists() {
1177	// reformats tree such that all items/enumerations are brothers
1178	ParagraphTree *member = firstListMember();
1179	if (member) {
1180	for (ParagraphTree *curr = this; curr != member; curr = curr->brother) {
1181	h2x_assert(curr);
1182	if (curr->son) curr->son->format_lists();
1183	}
1184
1185	for (ParagraphTree *next = member->nextListMember();
1186	next;
1187	member = next, next = member->nextListMember())
1188	{
1189	member->brothers_to_sons(next);
1190	h2x_assert(member->brother == next);
1191
1192	if (member->son) member->son->format_lists();
1193	}
1194
1195	h2x_assert(!member->son); // member is the last item
1196
1197	if (member->brother) {
1198	ParagraphTree *non_member = member->brother->firstWithLessIndentThan(member->indentation+1);
1199	member->brothers_to_sons(non_member);
1200	}
1201
1202	if (member->son) member->son->format_lists();
1203	if (member->brother) member->brother->format_lists();
1204	}
1205	else {
1206	for (ParagraphTree *curr = this; curr; curr = curr->brother) {
1207	h2x_assert(curr);
1208	if (curr->son) curr->son->format_lists();
1209	}
1210	}
1211	}
1212
1213	void ParagraphTree::format_indentations() {
1214	if (brother) {
1215	ParagraphTree *same_indent = brother->firstWithLessIndentThan(indentation+1);
1216	#if defined(WARN_POSSIBLY_WRONG_INDENTATION_CORRECTION)
1217	if (same_indent && indentation != same_indent->indentation) {
1218	same_indent->attach_warning("indentation is assumed to be same as ..");
1219	attach_warning(".. here");
1220	}
1221	#endif
1222	brothers_to_sons(same_indent); // if same_indent is NULp -> make all brothers childs
1223	if (brother) brother->format_indentations();
1224	}
1225
1226	if (son) son->format_indentations();
1227	}
1228
1229	// -----------------
1230	// LinkType
1231
1232	enum LinkType {
1233	LT_UNKNOWN = 0,
1234	LT_HTTP = 1,
1235	LT_HTTPS = 2,
1236	LT_FTP = 4,
1237	LT_FILE = 8,
1238	LT_EMAIL = 16,
1239	LT_HLP = 32,
1240	LT_PS = 64,
1241	LT_PDF = 128
1242	};
1243
1244	static const char *link_id[] = {
1245	"unknown",
1246	"www", // "http:"
1247	"www", // "https:"
1248	"www", // "ftp:"
1249	"www", // "file:"
1250	"email",
1251	"hlp",
1252	"ps",
1253	"pdf",
1254	};
1255
1256	static string LinkType2id(LinkType type) {
1257	int idx = 0;
1258	while (type >= 1) {
1259	idx++;
1260	type = LinkType(type>>1);
1261	}
1262	return link_id[idx];
1263	}
1264
1265	inline const char *getExtension(const string& name) {
1266	size_t last_dot = name.find_last_of('.');
1267	if (last_dot == string::npos) {
1268	return NULp;
1269	}
1270	return name.c_str()+last_dot+1;
1271	}
1272
1273	static LinkType detectLinkType(const string& link_target) {
1274	LinkType type = LT_UNKNOWN;
1275	const char *ext = getExtension(link_target);
1276
1277	if (ext && strcasecmp(ext, "hlp") == 0) type = LT_HLP;
1278	else if (link_target.find("http://") == 0) type = LT_HTTP;
1279	else if (link_target.find("https://") == 0) type = LT_HTTPS;
1280	else if (link_target.find("ftp://") == 0) type = LT_FTP;
1281	else if (link_target.find("file://") == 0) type = LT_FILE;
1282	else if (link_target.find('@') != string::npos) type = LT_EMAIL;
1283	else if (ext && strcasecmp(ext, "ps") == 0) type = LT_PS;
1284	else if (ext && strcasecmp(ext, "pdf") == 0) type = LT_PDF;
1285
1286	return type;
1287	}
1288
1289	// --------------------------------------------------------------------------------
1290
1291
1292
1293	static string locate_helpfile(const string& helpname) {
1294	// search for 'helpname' in various helpfile locations
1295
1296	#define PATHS 2
1297	static string path[PATHS] = { "oldhelp/", "genhelp/" };
1298	struct stat st;
1299
1300	for (size_t p = 0; p<PATHS; p++) {
1301	string fullname = path[p]+helpname;
1302	if (stat(fullname.c_str(), &st) == 0) {
1303	return fullname;
1304	}
1305	}
1306	return "";
1307	#undef PATHS
1308	}
1309
1310	static string locate_document(const string& docname) {
1311	// search for 'docname' or 'docname.gz' in various helpfile locations
1312
1313	string located = locate_helpfile(docname);
1314	if (located.empty()) {
1315	located = locate_helpfile(docname+".gz");
1316	}
1317	return located;
1318	}
1319
1320	static void add_link_attributes(XML_Tag& link, LinkType type, const string& dest, size_t source_line) {
1321	if (type == LT_UNKNOWN) {
1322	string msg = string("Invalid link (dest='")+dest+"')";
1323	throw LineAttachedMessage(msg, source_line);
1324	}
1325
1326	link.add_attribute("dest", dest);
1327	link.add_attribute("type", LinkType2id(type));
1328	link.add_attribute("source_line", source_line);
1329
1330	if (type&(LT_HLP\|LT_PDF\|LT_PS)) { // other links (www, email) cannot be checked for existence here
1331	string fullhelp = ((type&LT_HLP) ? locate_helpfile : locate_document)(dest);
1332	if (fullhelp.empty()) {
1333	link.add_attribute("missing", "1");
1334	string deadlink = strf("Dead link to '%s'", dest.c_str());
1335	#if defined(DEVEL_RELEASE)
1336	throw LineAttachedMessage(deadlink, source_line);
1337	#else // !defined(DEVEL_RELEASE)
1338	add_warning(deadlink, source_line);
1339	#endif
1340	}
1341	}
1342	}
1343
1344	static void print_XML_Text_expanding_links(const string& text, size_t lineNo) {
1345	size_t found = text.find("LINK{", 0);
1346	if (found != string::npos) {
1347	size_t inside_link = found+5;
1348	size_t close = text.find('}', inside_link);
1349
1350	if (close == string::npos) throw "unclosed 'LINK{}'";
1351
1352	string link_target = text.substr(inside_link, close-inside_link);
1353	LinkType type = detectLinkType(link_target);
1354	string dest = link_target;
1355
1356	XML_Text(text.substr(0, found));
1357
1358	{
1359	XML_Tag link("LINK");
1360	link.set_on_extra_line(false);
1361	add_link_attributes(link, type, dest, lineNo);
1362	}
1363
1364	print_XML_Text_expanding_links(text.substr(close+1), lineNo);
1365	}
1366	else {
1367	XML_Text t(text);
1368	}
1369	}
1370
1371	void ParagraphTree::xml_write_textblock() {
1372	XML_Tag textblock("T");
1373	textblock.add_attribute("reflow", reflow ? "1" : "0");
1374
1375	{
1376	string usedText;
1377	const string& text = otext;
1378	if (reflow) {
1379	usedText = correctIndentation(text, (textblock.Indent()+1) * the_XML_Document->indentation_per_level);
1380	}
1381	else {
1382	usedText = text;
1383	}
1384	print_XML_Text_expanding_links(usedText, otext.get_lineno());
1385	}
1386	}
1387
1388	ParagraphTree *ParagraphTree::xml_write_list_contents() {
1389	h2x_assert(is_itemlist_member());
1390	#if defined(WARN_FIXED_LAYOUT_LIST_ELEMENTS)
1391	if (!reflow) attach_warning("ITEM not reflown (check output)");
1392	#endif
1393	{
1394	XML_Tag entry("ENTRY");
1395	entry.add_attribute("item", "1");
1396	xml_write_textblock();
1397	if (son) son->xml_write();
1398	}
1399	if (brother && brother->is_itemlist_member()) {
1400	return brother->xml_write_list_contents();
1401	}
1402	return brother;
1403	}
1404	ParagraphTree *ParagraphTree::xml_write_enum_contents() {
1405	h2x_assert(get_enumeration());
1406	#if defined(WARN_FIXED_LAYOUT_LIST_ELEMENTS)
1407	if (!reflow) attach_warning("ENUMERATED not reflown (check output)");
1408	#endif
1409	{
1410	XML_Tag entry("ENTRY");
1411	switch (get_enum_type()) {
1412	case DIGITS:
1413	entry.add_attribute("enumerated", strf("%i", get_enumeration()));
1414	break;
1415	case ALPHA_UPPER:
1416	entry.add_attribute("enumerated", strf("%c", 'A'-1+get_enumeration()));
1417	break;
1418	case ALPHA_LOWER:
1419	entry.add_attribute("enumerated", strf("%c", 'a'-1+get_enumeration()));
1420	break;
1421	default:
1422	h2x_assert(0);
1423	break;
1424	}
1425	xml_write_textblock();
1426	if (son) son->xml_write();
1427	}
1428	if (brother && brother->get_enumeration()) {
1429	int diff = brother->get_enumeration()-get_enumeration();
1430	if (diff != 1) {
1431	attach_warning("Non-consecutive enumeration detected between here..");
1432	brother->attach_warning(".. and here");
1433	}
1434	return brother->xml_write_enum_contents();
1435	}
1436	return brother;
1437	}
1438
1439	void ParagraphTree::xml_write() {
1440	try {
1441	ParagraphTree *next = NULp;
1442	if (get_enumeration()) {
1443	XML_Tag enu("ENUM");
1444	if (get_enumeration() != 1) {
1445	attach_warning(strf("First enum starts with '%u.' (maybe previous enum was not detected)", get_enumeration()));
1446	}
1447	next = xml_write_enum_contents();
1448	#if defined(WARN_LONESOME_ENUM_ELEMENTS)
1449	if (next == brother) attach_warning("Suspicious single-element-ENUM");
1450	#endif
1451	}
1452	else if (is_itemlist_member()) {
1453	XML_Tag list("LIST");
1454	next = xml_write_list_contents();
1455	#if defined(WARN_LONESOME_LIST_ELEMENTS)
1456	if (next == brother) attach_warning("Suspicious single-element-LIST");
1457	#endif
1458	}
1459	else {
1460	{
1461	XML_Tag para("P");
1462	xml_write_textblock();
1463	if (son) son->xml_write();
1464	}
1465	next = brother;
1466	}
1467	if (next) next->xml_write();
1468	}
1469	catch (string& err) { throw attached_message(err); }
1470	catch (const char *err) { throw attached_message(err); }
1471	}
1472
1473	static void create_top_links(const Links& links, const char *tag) {
1474	for (Links::const_iterator s = links.begin(); s != links.end(); ++s) {
1475	XML_Tag link(tag);
1476	add_link_attributes(link, detectLinkType(s->Target()), s->Target(), s->SourceLineno());
1477	}
1478	}
1479
1480	void Helpfile::writeXML(FILE *out, const string& page_name) {
1481	XML_Document xml("PAGE", "arb_help.dtd", out);
1482
1483	xml.skip_empty_tags = true;
1484	xml.indentation_per_level = 2;
1485
1486	xml.getRoot().add_attribute("name", page_name);
1487	#if defined(DEBUG)
1488	xml.getRoot().add_attribute("edit_warning", "devel"); // inserts a edit warning into development version
1489	#else
1490	xml.getRoot().add_attribute("edit_warning", "release"); // inserts a different edit warning into release version
1491	#endif // DEBUG
1492
1493	xml.getRoot().add_attribute("source", inputfile.c_str());
1494
1495	{
1496	XML_Comment(string("automatically generated from ../")+inputfile+' ');
1497	}
1498
1499	create_top_links(uplinks, "UP");
1500	create_top_links(references, "SUB");
1501	create_top_links(auto_references, "SUB");
1502
1503	{
1504	XML_Tag title_tag("TITLE");
1505	const Ostrings& T = title.Content();
1506	for (Ostrings::const_iterator s = T.begin(); s != T.end(); ++s) {
1507	if (s != T.begin()) { XML_Text text("\n"); }
1508	XML_Text text(*s);
1509	}
1510	}
1511
1512	for (SectionList::const_iterator sec = sections.begin(); sec != sections.end(); ++sec) {
1513	try {
1514	XML_Tag section_tag("SECTION");
1515	section_tag.add_attribute("name", sec->getName());
1516
1517	ParagraphTree ptree = ParagraphTree::buildParagraphTree(sec);
1518
1519	#if defined(DEBUG)
1520	size_t textnodes = ptree->countTextNodes();
1521	#endif
1522	#if defined(DUMP_PARAGRAPHS)
1523	cout << "Dump of section '" << sec->getName() << "' (before format_lists):\n";
1524	ptree->dump(cout);
1525	cout << "----------------------------------------\n";
1526	#endif
1527
1528	ptree->format_lists();
1529
1530	#if defined(DUMP_PARAGRAPHS)
1531	cout << "Dump of section '" << sec->getName() << "' (after format_lists):\n";
1532	ptree->dump(cout);
1533	cout << "----------------------------------------\n";
1534	#endif
1535	#if defined(DEBUG)
1536	size_t textnodes2 = ptree->countTextNodes();
1537	h2x_assert(textnodes2 == textnodes); // if this occurs format_lists has an error
1538	#endif
1539
1540	ptree->format_indentations();
1541
1542	#if defined(DUMP_PARAGRAPHS)
1543	cout << "Dump of section '" << sec->getName() << "' (after format_indentations):\n";
1544	ptree->dump(cout);
1545	cout << "----------------------------------------\n";
1546	#endif
1547	#if defined(DEBUG)
1548	size_t textnodes3 = ptree->countTextNodes();
1549	h2x_assert(textnodes3 == textnodes2); // if this occurs format_indentations has an error
1550	#endif
1551
1552	ptree->xml_write();
1553
1554	delete ptree;
1555	}
1556	catch (string& err) { throw sec->attached_message(err); }
1557	catch (const char *err) { throw sec->attached_message(err); }
1558	}
1559	}
1560
1561	void Helpfile::extractInternalLinks() {
1562	for (SectionList::const_iterator sec = sections.begin(); sec != sections.end(); ++sec) {
1563	try {
1564	const Ostrings& s = sec->Content();
1565
1566	for (Ostrings::const_iterator li = s.begin(); li != s.end(); ++li) {
1567	const string& line = *li;
1568	size_t start = 0;
1569
1570	while (1) {
1571	size_t found = line.find("LINK{", start);
1572	if (found == string::npos) break;
1573	found += 5;
1574	size_t close = line.find('}', found);
1575	if (close == string::npos) break;
1576
1577	string link_target = line.substr(found, close-found);
1578
1579	if (link_target.find("http://") == string::npos &&
1580	link_target.find("https://")== string::npos &&
1581	link_target.find("ftp://") == string::npos &&
1582	link_target.find("file://") == string::npos &&
1583	link_target.find('@') == string::npos)
1584	{
1585	check_self_ref(link_target);
1586
1587	try {
1588	check_specific_duplicates(link_target, references, false); // check only sublinks here
1589	check_specific_duplicates(link_target, uplinks, false); // check only uplinks here
1590	check_specific_duplicates(link_target, auto_references, false); // check only sublinks here
1591
1592	// only auto-add inline reference if none of the above checks has thrown
1593	auto_references.push_back(Link(link_target, sec->line_number()));
1594	}
1595	catch (string& err) {
1596	; // silently ignore inlined
1597	}
1598	}
1599	start = close+1;
1600	}
1601	}
1602	}
1603	catch (string& err) {
1604	throw sec->attached_message("'"+err+"' while scanning LINK{}");
1605	}
1606	}
1607	}
1608
1609	static void show_err(const string& err, size_t lineno, const string& helpfile) {
1610	if (err.find(helpfile+':') != string::npos) {
1611	cerr << err;
1612	}
1613	else if (lineno == NO_LINENUMBER_INFO) {
1614	cerr << helpfile << ":1: [in unknown line] " << err;
1615	}
1616	else {
1617	cerr << helpfile << ":" << lineno << ": " << err;
1618	}
1619	cerr << '\n';
1620	}
1621	inline void show_err(const LineAttachedMessage& line_err, const string& helpfile) {
1622	show_err(line_err.Message(), line_err.Lineno(), helpfile);
1623	}
1624	inline void show_warning(const LineAttachedMessage& line_err, const string& helpfile) {
1625	show_err(string("Warning: ")+line_err.Message(), line_err.Lineno(), helpfile);
1626	}
1627	inline void show_warnings(const string& helpfile) {
1628	for (list<LineAttachedMessage>::const_iterator wi = warnings.begin(); wi != warnings.end(); ++wi) {
1629	show_warning(*wi, helpfile);
1630	}
1631	}
1632	static void show_error_and_warnings(const LineAttachedMessage& error, const string& helpfile) {
1633	show_err(error, helpfile);
1634	show_warnings(helpfile);
1635	}
1636
1637	int ARB_main(int argc, char *argv[]) {
1638	if (argc != 3) {
1639	cerr << "Usage: arb_help2xml <ARB helpfile> <XML output>\n";
1640	return EXIT_FAILURE;
1641	}
1642
1643	Helpfile help;
1644	string arb_help;
1645
1646	try {
1647	try {
1648	arb_help = argv[1];
1649	string xml_output = argv[2];
1650
1651	{
1652	ifstream in(arb_help.c_str());
1653	help.readHelp(in, arb_help);
1654	}
1655
1656	help.extractInternalLinks();
1657
1658	{
1659	FILE *out = std::fopen(xml_output.c_str(), "wt");
1660	if (!out) throw string("Can't open '")+xml_output+'\'';
1661
1662	try {
1663	// arb_help contains 'oldhelp/name.hlp'
1664	size_t slash = arb_help.find('/');
1665	size_t dot = arb_help.find_last_of('.');
1666
1667	if (slash == string::npos \|\| dot == string::npos) {
1668	throw string("parameter <ARB helpfile> has to be in format 'oldhelp/name.hlp' (not '"+arb_help+"')");
1669	}
1670
1671	string page_name(arb_help, slash+1, dot-slash-1);
1672	help.writeXML(out, page_name);
1673	fclose(out);
1674	}
1675	catch (...) {
1676	fclose(out);
1677	remove(xml_output.c_str());
1678	throw;
1679	}
1680	}
1681
1682	show_warnings(arb_help);
1683
1684	return EXIT_SUCCESS;
1685	}
1686	catch (string& err) { throw unattached_message(err); }
1687	catch (const char * err) { throw unattached_message(err); }
1688	catch (LineAttachedMessage& err) { throw; }
1689	catch (...) { throw unattached_message("unknown exception in arb_help2xml"); }
1690	}
1691	catch (LineAttachedMessage& err) { show_error_and_warnings(err, arb_help); }
1692	catch (...) { h2x_assert(0); }
1693
1694	return EXIT_FAILURE;
1695	}
1696
1697	// --------------------------------------------------------------------------------
1698
1699	#ifdef UNIT_TESTS
1700	#include <test_unit.h>
1701	#include <arb_msg.h>
1702
1703	static arb_test::match_expectation help_file_compiles(const char helpfile, const char expected_title, const char *expected_error_part) {
1704	using namespace arb_test;
1705	expectation_group expected;
1706
1707	ifstream in(helpfile);
1708
1709	LineAttachedMessage *error = NULp;
1710
1711	Helpfile help;
1712	try {
1713	help.readHelp(in, helpfile);
1714	help.extractInternalLinks();
1715
1716	FILE *devnul = fopen("/dev/null", "wt");
1717	if (!devnul) throw unattached_message("can't write to null device");
1718	help.writeXML(devnul, "dummy");
1719	fclose(devnul);
1720	}
1721	catch (LineAttachedMessage& err) { error = new LineAttachedMessage(err); }
1722	catch (...) { error = new LineAttachedMessage(unattached_message("unknown exception")); }
1723
1724	if (expected_error_part) {
1725	expected.add(that(error).does_differ_from_NULL());
1726	if (error) expected.add(that(error->Message()).does_contain(expected_error_part));
1727	}
1728	else {
1729	expected.add(that(error).is_equal_to_NULL());
1730	if (!error) {
1731	Section title = help.get_title();
1732	const Ostrings& title_strings = title.Content();
1733
1734	expected.add(that(title_strings.front().as_string()).is_equal_to(expected_title));
1735	expected.add(that(title_strings.size()).is_equal_to(1));
1736	}
1737	else {
1738	show_error_and_warnings(*error, helpfile);
1739	}
1740	}
1741
1742	delete error;
1743
1744	return all().ofgroup(expected);
1745	}
1746
1747	#define HELP_FILE_COMPILES(name,expTitle) TEST_EXPECTATION(help_file_compiles(name,expTitle,NULp))
1748	#define HELP_FILE_COMPILE_ERROR(name,expError) TEST_EXPECTATION(help_file_compiles(name,NULp,expError))
1749
1750	void TEST_hlp2xml_conversion() {
1751	TEST_EXPECT_ZERO(chdir("../../HELP_SOURCE"));
1752
1753	HELP_FILE_COMPILES("genhelp/agde_treepuzzle.hlp", "treepuzzle"); // genhelp/agde_treepuzzle.hlp
1754
1755	HELP_FILE_COMPILES("oldhelp/markbyref.hlp", "Mark by reference"); // oldhelp/markbyref.hlp
1756	HELP_FILE_COMPILES("oldhelp/ad_align.hlp", "Alignment Administration"); // oldhelp/ad_align.hlp
1757	HELP_FILE_COMPILES("genhelp/copyright.hlp", "Copyrights and licenses"); // genhelp/copyright.hlp
1758
1759	HELP_FILE_COMPILE_ERROR("akjsdlkad.hlp", "Can't read from"); // no such file
1760	}
1761	TEST_PUBLISH(TEST_hlp2xml_conversion);
1762
1763
1764	// #define TEST_AUTO_UPDATE // uncomment to update expected xml // @@@ comment-out!
1765
1766	void TEST_hlp2xml_output() {
1767	string tested_helpfile[] = {
1768	"unittest"
1769	};
1770
1771	string HELP_SOURCE = "../../HELP_SOURCE/";
1772	string LIB = "../../lib/";
1773	string EXPECTED = "help/";
1774
1775	for (size_t i = 0; i<ARRAY_ELEMS(tested_helpfile); ++i) {
1776	string xml = HELP_SOURCE + "Xml/" + tested_helpfile[i] + ".xml";
1777	string html = LIB + "help_html/" + tested_helpfile[i] + ".html";
1778	string hlp = LIB + "help/" + tested_helpfile[i] + ".hlp";
1779
1780	string xml_expected = EXPECTED + tested_helpfile[i] + ".xml";
1781	string html_expected = EXPECTED + tested_helpfile[i] + ".html";
1782	string hlp_expected = EXPECTED + tested_helpfile[i] + ".hlp";
1783
1784
1785	#if defined(TEST_AUTO_UPDATE)
1786	# if defined(NDEBUG)
1787	# error please use auto-update only in DEBUG mode
1788	# endif
1789	TEST_COPY_FILE(xml.c_str(), xml_expected.c_str());
1790	TEST_COPY_FILE(html.c_str(), html_expected.c_str());
1791	TEST_COPY_FILE(hlp.c_str(), hlp_expected.c_str());
1792
1793	#else // !defined(TEST_AUTO_UPDATE)
1794
1795	# if defined(DEBUG)
1796	int expected_xml_difflines = 0;
1797	int expected_hlp_difflines = 0;
1798	# else // !defined(DEBUG)
1799	int expected_xml_difflines = 1; // value of "edit_warning" differs - see .@edit_warning
1800	int expected_hlp_difflines = 1; // resulting warning in helpfile
1801	# endif
1802	TEST_EXPECT_TEXTFILE_DIFFLINES(xml_expected.c_str(), xml.c_str(), expected_xml_difflines);
1803	TEST_EXPECT_TEXTFILE_DIFFLINES_IGNORE_DATES(html_expected.c_str(), html.c_str(), 0); // html contains the update-date
1804	TEST_EXPECT_TEXTFILE_DIFFLINES(hlp_expected.c_str(), hlp.c_str(), expected_hlp_difflines);
1805	#endif
1806	}
1807	}
1808
1809
1810	#if defined(PROTECT_HELP_VS_CHANGES)
1811	void TEST_protect_help_vs_changes() { // should normally be disabled
1812	// fails if help changes compared to another checkout
1813	// or just updates the diff w/o failing (if you comment out the last line)
1814	//
1815	// if the patch is hugo and you load it into xemacs
1816	// you might want to (turn-on-lazy-shot)
1817	//
1818	// patch-pointer: ../UNIT_TESTER/run/help_changes.patch
1819
1820	bool do_help = true;
1821	bool do_html = true;
1822
1823	const char *ref_WC = "ARB.help.ref";
1824
1825	// ---------------------------------------- config above
1826
1827	string this_base = "../..";
1828	string ref_base = this_base+"/../"+ref_WC;
1829	string to_help = "/lib/help";
1830	string to_html = "/lib/help_html";
1831	string diff_help = "diff -u "+ref_base+to_help+" "+this_base+to_help;
1832	string diff_html = "diff -u "+ref_base+to_html+" "+this_base+to_html;
1833
1834	string update_cmd;
1835
1836	if (do_help) {
1837	if (do_html) update_cmd = string("(")+diff_help+";"+diff_html+")";
1838	else update_cmd = diff_help;
1839	}
1840	else if (do_html) update_cmd = diff_html;
1841
1842	string patch = "help_changes.patch";
1843	update_cmd += " >"+patch+" \|\|true";
1844
1845	string fail_on_change_cmd = "test \"`cat "+patch+" \| grep -v '^Common subdirectories' \| wc -l`\" = \"0\" \|\| ( echo \"Error: Help changed\"; false)";
1846
1847	TEST_EXPECT_NO_ERROR(GBK_system(update_cmd.c_str()));
1848	TEST_EXPECT_NO_ERROR(GBK_system(fail_on_change_cmd.c_str())); // @@@ uncomment before commit
1849	}
1850	#endif
1851
1852	#endif // UNIT_TESTS

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: tags/arb-7.0/HELP_SOURCE/arb_help2xml.cxx

Download in other formats: