Context Navigation

source: branches/profile/HELP_SOURCE/arb_help2xml.cxx

Visit:

Last change on this file was 12783, checked in by westram, 11 years ago
publish TESTs for which nm 2.24 fails to export source location
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 57.9 KB

Line
1	// ==================================================================== //
2	// //
3	// File : arb_help2xml.cxx //
4	// Purpose : Converts old ARB help format to XML //
5	// //
6	// Coded by Ralf Westram (coder@reallysoft.de) in October 2001 //
7	// Copyright Department of Microbiology (Technical University Munich) //
8	// //
9	// Visit our web site at: http://www.arb-home.de/ //
10	// //
11	// ==================================================================== //
12
13	#include <xml.hxx>
14	#include <arb_defs.h>
15	#include <arb_diff.h>
16	#include <static_assert.h>
17
18	#include <list>
19	#include <set>
20	#include <iostream>
21	#include <fstream>
22
23	#include <cstdlib>
24	#include <cstdarg>
25	#include <cstring>
26	#include <climits>
27
28	#include <unistd.h>
29	#include <sys/stat.h>
30
31	using namespace std;
32
33	#define h2x_assert(bed) arb_assert(bed)
34
35	#if defined(DEBUG)
36	#define WARN_FORMATTING_PROBLEMS
37	#define WARN_MISSING_HELP
38	// #define DUMP_PARAGRAPHS
39	// #define PROTECT_HELP_VS_CHANGES
40	#endif // DEBUG
41
42
43	#if defined(WARN_FORMATTING_PROBLEMS)
44
45	#define WARN_FIXED_LAYOUT_LIST_ELEMENTS
46	#define WARN_LONESOME_ENUM_ELEMENTS
47
48	// warnings below are useless for production and shall be disabled in SVN
49	// #define WARN_LONESOME_LIST_ELEMENTS
50	// #define WARN_POSSIBLY_WRONG_INDENTATION_CORRECTION
51	// #define WARN_IGNORED_ALPHA_ENUMS
52
53	#endif
54
55
56	#define MAX_LINE_LENGTH 200 // maximum length of lines in input stream
57	#define TABSIZE 8
58
59	static const char *knownSections[] = {
60	"OCCURRENCE",
61	"DESCRIPTION",
62	"NOTES",
63	"EXAMPLES",
64	"WARNINGS",
65	"BUGS",
66	"SECTION",
67	};
68
69	enum SectionType {
70	SEC_OCCURRENCE,
71	SEC_DESCRIPTION,
72	SEC_NOTES,
73	SEC_EXAMPLES,
74	SEC_WARNINGS,
75	SEC_BUGS,
76	SEC_SECTION,
77
78	KNOWN_SECTION_TYPES,
79	SEC_NONE,
80	SEC_FAKE,
81	};
82
83	STATIC_ASSERT(ARRAY_ELEMS(knownSections) == KNOWN_SECTION_TYPES);
84
85	__ATTR__VFORMAT(1) static string vstrf(const char *format, va_list argPtr) {
86	static size_t buf_size = 256;
87	static char *buffer = new char[buf_size];
88
89	size_t length;
90	while (1) {
91	if (!buffer) {
92	h2x_assert(buffer); // to stop when debugging
93	throw string("out of memory");
94	}
95
96	length = vsnprintf(buffer, buf_size, format, argPtr);
97	if (length < buf_size) break; // string fits into current buffer
98
99	// otherwise resize buffer :
100	buf_size += buf_size/2;
101	delete [] buffer;
102	buffer = new char[buf_size];
103	}
104
105	return string(buffer, length);
106	}
107
108	__ATTR__FORMAT(1) static string strf(const char *format, ...) {
109	va_list argPtr;
110	va_start(argPtr, format);
111	string result = vstrf(format, argPtr);
112	va_end(argPtr);
113
114	return result;
115	}
116
117	// -----------------------------
118	// warnings and errors
119
120	class LineAttachedMessage {
121	string message;
122	size_t lineno;
123
124	public:
125	LineAttachedMessage(const string& message_, size_t lineno_)
126	: message(message_)
127	, lineno(lineno_)
128	{}
129
130	const string& Message() const { return message; }
131	size_t Lineno() const { return lineno; }
132	};
133
134	const size_t NO_LINENUMBER_INFO = -1U;
135
136	LineAttachedMessage unattached_message(const string& message) { return LineAttachedMessage(message, NO_LINENUMBER_INFO); }
137
138
139	static list<LineAttachedMessage> warnings;
140	inline void add_warning(const LineAttachedMessage& laMsg) {
141	warnings.push_back(laMsg);
142	}
143	inline void add_warning(const string& warning, size_t lineno) {
144	add_warning(LineAttachedMessage(warning, lineno));
145	}
146
147	struct MessageAttachable {
148	virtual ~MessageAttachable() {}
149
150	virtual string location_description() const = 0; // may return empty string
151	virtual size_t line_number() const = 0; // if unknown -> should return NO_LINENUMBER_INFO
152
153	LineAttachedMessage attached_message(const string& message) const {
154	string where = location_description();
155	if (where.empty()) return LineAttachedMessage(message, line_number());
156	return LineAttachedMessage(message+" ["+where+"]", line_number());
157	}
158	void attach_warning(const string& message) const {
159	add_warning(attached_message(message));
160	}
161	};
162
163
164	// ----------------------
165	// class Reader
166
167	class Reader : public MessageAttachable {
168	private:
169	istream& in;
170	char lineBuffer[MAX_LINE_LENGTH];
171	char lineBuffer2[MAX_LINE_LENGTH];
172	bool readAgain;
173	bool eof;
174	int lineNo;
175
176	string location_description() const OVERRIDE { return ""; }
177	size_t line_number() const OVERRIDE { return lineNo; }
178
179	void getline() {
180	if (!eof) {
181	if (in.eof()) eof = true;
182	else {
183	h2x_assert(in.good());
184
185	in.getline(lineBuffer, MAX_LINE_LENGTH);
186	lineNo++;
187
188	if (in.eof()) eof = true;
189	else if (in.fail()) throw "line too long";
190
191	if (strchr(lineBuffer, '\t')) {
192	int o2 = 0;
193
194	for (int o = 0; lineBuffer[o]; ++o) {
195	if (lineBuffer[o] == '\t') {
196	int spaces = TABSIZE - (o2 % TABSIZE);
197	while (spaces--) lineBuffer2[o2++] = ' ';
198	}
199	else {
200	lineBuffer2[o2++] = lineBuffer[o];
201	}
202	}
203	lineBuffer2[o2] = 0;
204	strcpy(lineBuffer, lineBuffer2);
205	}
206
207	char *eol = strchr(lineBuffer, 0)-1;
208	while (eol >= lineBuffer && isspace(eol[0])) {
209	eol[0] = 0; // trim trailing whitespace
210	eol--;
211	}
212	if (eol > lineBuffer) {
213	// now eol points to last character
214	if (eol[0] == '-' && isalnum(eol[-1])) {
215	attach_warning("manual hyphenation detected");
216	}
217	}
218	}
219	}
220	}
221
222	public:
223	Reader(istream& in_) : in(in_), readAgain(true), eof(false), lineNo(0) { getline(); }
224	virtual ~Reader() {}
225
226	const char *getNext() {
227	if (readAgain) readAgain = false;
228	else getline();
229	return eof ? 0 : lineBuffer;
230	}
231
232	void back() {
233	h2x_assert(!readAgain);
234	readAgain = true;
235	}
236
237	int getLineNo() const { return lineNo; }
238	};
239
240	enum ParagraphType {
241	PLAIN_TEXT,
242	ENUMERATED,
243	ITEM,
244	};
245	enum EnumerationType {
246	NONE,
247	DIGITS,
248	ALPHA_UPPER,
249	ALPHA_LOWER,
250	};
251
252	class Ostring {
253	string content;
254	size_t lineNo; // where string came from
255	ParagraphType type;
256
257	// only valid for type==ENUMERATED:
258	EnumerationType etype;
259	unsigned number;
260
261	public:
262
263	Ostring(const string& s, size_t line_no, ParagraphType type_)
264	: content(s),
265	lineNo(line_no),
266	type(type_)
267	{
268	h2x_assert(type != ENUMERATED);
269	}
270	Ostring(const string& s, size_t line_no, ParagraphType type_, EnumerationType etype_, unsigned num)
271	: content(s),
272	lineNo(line_no),
273	type(type_),
274	etype(etype_),
275	number(num)
276	{
277	h2x_assert(type == ENUMERATED);
278	h2x_assert(etype == DIGITS \|\| etype == ALPHA_UPPER \|\| etype == ALPHA_LOWER);
279	h2x_assert(num>0);
280	}
281
282
283	operator const string&() const { return content; }
284	operator string&() { return content; }
285
286	const string& as_string() const { return content; }
287	string& as_string() { return content; }
288
289	size_t get_lineno() const { return lineNo; }
290
291	const ParagraphType& get_type() const { return type; }
292	const EnumerationType& get_enum_type() const {
293	h2x_assert(type == ENUMERATED);
294	return etype;
295	}
296	unsigned get_number() const {
297	h2x_assert(type == ENUMERATED);
298	return number;
299	}
300
301	// some wrapper to make Ostring act like string
302	const char *c_str() const { return content.c_str(); }
303	};
304
305	typedef list<Ostring> Ostrings;
306
307	#if defined(WARN_MISSING_HELP)
308	static void check_TODO(const char *line, const Reader& reader) {
309	if (strstr(line, "@@@") != NULL \|\| strstr(line, "TODO") != NULL) {
310	reader.attach_warning(strf("TODO: %s", line));
311	}
312	}
313	#else
314	inline void check_TODO(const char *, const Reader&) { }
315	#endif // WARN_MISSING_HELP
316
317	// ----------------------------
318	// class Section
319
320	class Section : public MessageAttachable {
321	SectionType type;
322	string name;
323	Ostrings content;
324	size_t lineno;
325
326	string location_description() const OVERRIDE { return string("in SECTION '")+name+"'"; }
327
328	public:
329	Section(string name_, SectionType type_, size_t lineno_)
330	: type(type_),
331	name(name_),
332	lineno(lineno_)
333	{}
334	virtual ~Section() {}
335
336	const Ostrings& Content() const { return content; }
337	Ostrings& Content() { return content; }
338	SectionType get_type() const { return type; }
339	size_t line_number() const OVERRIDE { return lineno; }
340	const string& getName() const { return name; }
341	void setName(const string& name_) { name = name_; }
342	};
343
344	typedef list<Section> SectionList;
345
346	// --------------------
347	// class Link
348
349	class Link {
350	string target;
351	size_t source_lineno;
352
353	public:
354	Link(const string& target_, size_t source_lineno_)
355	: target(target_)
356	, source_lineno(source_lineno_)
357	{}
358
359	const string& Target() const { return target; }
360	size_t SourceLineno() const { return source_lineno; }
361	};
362
363	typedef list<Link> Links;
364
365	// ------------------------
366	// class Helpfile
367
368	class Helpfile {
369	private:
370	Links uplinks;
371	Links references;
372	Links auto_references;
373	Section title;
374	SectionList sections;
375	string inputfile;
376
377	public:
378	Helpfile() : title("TITLE", SEC_FAKE, NO_LINENUMBER_INFO) {}
379	virtual ~Helpfile() {}
380
381	void readHelp(istream& in, const string& filename);
382	void writeXML(FILE *out, const string& page_name);
383	void extractInternalLinks();
384
385	const Section& get_title() const { return title; }
386	};
387
388	inline bool isWhite(char c) { return c == ' '; }
389
390	inline bool isEmptyOrComment(const char *s) {
391	if (s[0] == '#') return true;
392	for (int off = 0; ; ++off) {
393	if (s[off] == 0) return true;
394	if (!isWhite(s[off])) break;
395	}
396
397	return false;
398	}
399
400	inline const char extractKeyword(const char line, string& keyword) {
401	// returns NULL if no keyword was found
402	// otherwise returns position behind keyword and sets value of 'keyword'
403
404	const char *space = strchr(line, ' ');
405	if (space && space>line) {
406	keyword = string(line, 0, space-line);
407	return space;
408	}
409	else if (!space) { // test for keyword w/o content behind
410	if (line[0]) { // not empty
411	keyword = line;
412	return strchr(line, 0);
413	}
414	}
415	return 0;
416	}
417
418	inline const char eatWhite(const char line) {
419	// skips whitespace
420	while (isWhite(*line)) ++line;
421	return line;
422	}
423
424	inline void pushParagraph(Section& sec, string& paragraph, size_t lineNo, ParagraphType& type, EnumerationType& etype, unsigned num) {
425	if (paragraph.length()) {
426	if (type == ENUMERATED) {
427	sec.Content().push_back(Ostring(paragraph, lineNo, type, etype, num));
428	}
429	else {
430	sec.Content().push_back(Ostring(paragraph, lineNo, type));
431	}
432
433	type = PLAIN_TEXT;
434	etype = NONE;
435	paragraph = "";
436	}
437	}
438
439	inline const char firstChar(const char s) {
440	while (isWhite(s[0])) ++s;
441	return s;
442	}
443
444	inline bool is_startof_itemlist_element(const char *contentStart) {
445	return
446	(contentStart[0] == '-' \|\|
447	contentStart[0] == '*')
448	&&
449	isspace(contentStart[1])
450	&&
451	!(isspace(contentStart[2]) \|\|
452	contentStart[2] == '-');
453	}
454
455	#define MAX_ALLOWED_ENUM 99 // otherwise it starts interpreting years as enums
456
457	static EnumerationType startsWithLetter(string& s, unsigned& number) {
458	// tests if first line starts with 'letter.'
459	// if true then 'letter.' is removed from the string
460	// the letter is converted and returned in 'number' ('a'->1, 'b'->2, ..)
461
462	size_t off = s.find_first_not_of(" \n");
463	if (off == string::npos) return NONE;
464	if (!isalpha(s[off])) return NONE;
465
466	size_t astart = off;
467	EnumerationType etype = isupper(s[off]) ? ALPHA_UPPER : ALPHA_LOWER;
468
469	number = s[off]-(etype == ALPHA_UPPER ? 'A' : 'a')+1;
470	++off;
471
472	h2x_assert(number>0 && number<MAX_ALLOWED_ENUM);
473
474	if (s[off] != '.' && s[off] != ')') return NONE;
475	if (s[off+1] != ' ') return NONE;
476
477	// remove 'letter.' from string :
478	++off;
479	while (s[off+1] == ' ') ++off;
480	s.erase(astart, off-astart+1);
481
482	return etype;
483	}
484
485	static bool startsWithNumber(string& s, unsigned& number) {
486	// tests if first line starts with 'number.'
487	// if true then 'number.' is removed from the string
488
489	size_t off = s.find_first_not_of(" \n");
490	if (off == string::npos) return false;
491	if (!isdigit(s[off])) return false;
492
493	size_t num_start = off;
494	number = 0;
495
496	for (; isdigit(s[off]); ++off) {
497	number = number*10 + (s[off]-'0');
498	}
499	if (number>MAX_ALLOWED_ENUM) return false;
500
501	if (s[off] != '.' && s[off] != ')') return false;
502	if (s[off+1] != ' ') return false;
503
504	// remove 'number.' from string :
505	++off;
506	while (s[off+1] == ' ') ++off;
507	s.erase(num_start, off-num_start+1);
508
509	return true;
510	}
511
512	static EnumerationType detectLineEnumType(string& line, unsigned& number) {
513	if (startsWithNumber(line, number)) return DIGITS;
514	return startsWithLetter(line, number);
515	}
516
517	static void parseSection(Section& sec, const char *line, int indentation, Reader& reader) {
518	string paragraph = line;
519	size_t para_start_lineno = reader.getLineNo();
520
521	ParagraphType type = PLAIN_TEXT;
522	EnumerationType etype = NONE;
523	unsigned num = 0;
524
525	unsigned last_alpha_num = -1;
526
527	h2x_assert(sec.Content().empty());
528
529	while (1) {
530	line = reader.getNext();
531	if (!line) break;
532
533	if (isEmptyOrComment(line)) {
534	pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
535	check_TODO(line, reader);
536	}
537	else {
538	string keyword;
539	const char *rest = extractKeyword(line, keyword);
540
541	if (rest) { // a new keyword
542	reader.back();
543	break;
544	}
545
546	check_TODO(line, reader);
547
548	string Line = line;
549
550	if (sec.get_type() == SEC_OCCURRENCE) {
551	pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
552	}
553	else {
554	const char *firstNonWhite = firstChar(line);
555	if (is_startof_itemlist_element(firstNonWhite)) {
556	h2x_assert(firstNonWhite != line);
557
558	pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
559
560	Line[firstNonWhite-line] = ' ';
561	type = ITEM; // is reset in call to pushParagraph
562	}
563	else {
564	unsigned foundNum;
565	EnumerationType foundEtype = detectLineEnumType(Line, foundNum);
566
567	if (foundEtype == ALPHA_UPPER \|\| foundEtype == ALPHA_LOWER) {
568	if (foundNum == (last_alpha_num+1) \|\| foundNum == 1) {
569	last_alpha_num = foundNum;
570	}
571	else {
572	#if defined(WARN_IGNORED_ALPHA_ENUMS)
573	add_warning(reader.attached_message("Ignoring non-consecutive alpha-enum"));
574	#endif
575	foundEtype = NONE;
576
577	reader.back();
578	Line = reader.getNext();
579	last_alpha_num = -1;
580	}
581	}
582
583	if (foundEtype != NONE) {
584	pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
585
586	type = ENUMERATED;
587	num = foundNum;
588	etype = foundEtype;
589
590	if (!num) {
591	h2x_assert(etype == DIGITS);
592	throw "Enumerations starting with zero are not supported";
593	}
594	}
595	}
596	}
597
598	if (paragraph.length()) {
599	paragraph = paragraph+"\n"+Line;
600	}
601	else {
602	paragraph = string("\n")+Line;
603	para_start_lineno = reader.getLineNo();
604	}
605	}
606	}
607
608	pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
609
610	if (sec.Content().size()>0 && indentation>0) {
611	string spaces;
612	spaces.reserve(indentation);
613	spaces.append(indentation, ' ');
614
615	string& ostr = sec.Content().front();
616	ostr = string("\n") + spaces + ostr;
617	}
618	}
619	inline void check_specific_duplicates(const string& link, const Links& existing, bool add_warnings) {
620	for (Links::const_iterator ex = existing.begin(); ex != existing.end(); ++ex) {
621	if (ex->Target() == link) {
622	if (add_warnings) add_warning(strf("First Link to '%s' was found here.", ex->Target().c_str()), ex->SourceLineno());
623	throw strf("Link to '%s' duplicated here.", link.c_str());
624	}
625	}
626	}
627	inline void check_duplicates(const string& link, const Links& uplinks, const Links& references, bool add_warnings) {
628	check_specific_duplicates(link, uplinks, add_warnings);
629	check_specific_duplicates(link, references, add_warnings);
630	}
631
632	static void warnAboutDuplicate(SectionList& sections) {
633	set<string> seen;
634	SectionList::iterator end = sections.end();
635	for (SectionList::iterator s = sections.begin(); s != end; ++s) {
636	const string& sname = s->getName();
637	if (sname == "NOTES") continue; // do not warn about multiple NOTES sections
638
639	SectionList::iterator o = s; ++o;
640	for (; o != end; ++o) {
641	if (sname == o->getName()) {
642	o->attach_warning("duplicated SECTION name");
643	if (seen.find(sname) == seen.end()) {
644	s->attach_warning("name was first used");
645	seen.insert(sname);
646	}
647	}
648	}
649	}
650	}
651
652	void Helpfile::readHelp(istream& in, const string& filename) {
653	if (!in.good()) throw unattached_message(strf("Can't read from '%s'", filename.c_str()));
654
655	Reader read(in);
656
657	inputfile = filename; // remember file read (for comment)
658
659	const char *line;
660	const char *name_only = strrchr(filename.c_str(), '/');
661
662	h2x_assert(name_only);
663	++name_only;
664
665	try {
666	while (1) {
667	line = read.getNext();
668	if (!line) break;
669
670	if (isEmptyOrComment(line)) {
671	check_TODO(line, read);
672	continue;
673	}
674
675	check_TODO(line, read);
676
677	string keyword;
678	const char *rest = extractKeyword(line, keyword);
679
680	if (rest) { // found a keyword
681	if (keyword == "UP") {
682	rest = eatWhite(rest);
683	if (strlen(rest)) {
684	check_duplicates(rest, uplinks, references, true);
685	if (strcmp(name_only, rest) == 0) throw "UP link to self";
686
687	uplinks.push_back(Link(rest, read.getLineNo()));
688	}
689	}
690	else if (keyword == "SUB") {
691	rest = eatWhite(rest);
692	if (strlen(rest)) {
693	check_duplicates(rest, uplinks, references, true);
694	if (strcmp(name_only, rest) == 0) throw "SUB link to self";
695
696	references.push_back(Link(rest, read.getLineNo()));
697	}
698	}
699	else if (keyword == "TITLE") {
700	rest = eatWhite(rest);
701	parseSection(title, rest, 0, read);
702
703	if (title.Content().empty()) throw "empty TITLE not allowed";
704
705	const char *t = title.Content().front().c_str();
706
707	if (strstr(t, "Standard help file form") != 0) {
708	throw strf("Illegal title for help file: '%s'", t);
709	}
710	}
711	else {
712	if (keyword == "NOTE") keyword = "NOTES";
713	if (keyword == "EXAMPLE") keyword = "EXAMPLES";
714	if (keyword == "WARNING") keyword = "WARNINGS";
715
716	SectionType stype = SEC_NONE;
717	int idx;
718	for (idx = 0; idx<KNOWN_SECTION_TYPES; ++idx) {
719	if (knownSections[idx] == keyword) {
720	stype = SectionType(idx);
721	break;
722	}
723	}
724
725	size_t lineno = read.getLineNo();
726
727	if (idx >= KNOWN_SECTION_TYPES) throw strf("unknown keyword '%s'", keyword.c_str());
728
729	if (stype == SEC_SECTION) {
730	string section_name = eatWhite(rest);
731	Section sec(section_name, stype, lineno);
732	parseSection(sec, "", 0, read);
733	sections.push_back(sec);
734	}
735	else {
736	Section sec(keyword, stype, lineno);
737	rest = eatWhite(rest);
738	parseSection(sec, rest, rest-line, read);
739	sections.push_back(sec);
740	}
741	}
742	}
743	else {
744	throw strf("Unhandled line");
745	}
746	}
747
748	warnAboutDuplicate(sections);
749	}
750	catch (string& err) { throw read.attached_message(err); }
751	catch (const char *err) { throw read.attached_message(err); }
752	}
753
754	static bool shouldReflow(const string& s, int& foundIndentation) {
755	// foundIndentation is only valid if shouldReflow() returns true
756	enum { START, CHAR, SPACE, MULTIPLE, DOT, DOTSPACE } state = START;
757	bool equal_indent = true;
758	int lastIndent = -1;
759	int thisIndent = 0;
760
761	for (string::const_iterator c = s.begin(); c != s.end(); ++c, ++thisIndent) {
762	if (*c == '\n') {
763	state = START;
764	thisIndent = 0;
765	}
766	else if (isWhite(*c)) {
767	if (state == DOT \|\| state == DOTSPACE) state = DOTSPACE; // multiple spaces after DOT are allowed
768	else if (state == SPACE) state = MULTIPLE; // now seen multiple spaces
769	else if (state == CHAR) state = SPACE; // now seen 1 space
770	}
771	else {
772	if (state == MULTIPLE) return false; // character after multiple spaces
773	if (state == START) {
774	if (lastIndent == -1) lastIndent = thisIndent;
775	else if (lastIndent != thisIndent) equal_indent = false;
776	}
777	state = (c == '.' \|\| c == ',') ? DOT : CHAR;
778	}
779	}
780
781	if (lastIndent<0) {
782	equal_indent = false;
783	}
784
785	if (equal_indent) {
786	foundIndentation = lastIndent-1;
787	h2x_assert(foundIndentation >= 0);
788	}
789	return equal_indent;
790	}
791
792	static string correctSpaces(const string& text, int change) {
793	h2x_assert(text.find('\n') == string::npos);
794
795	if (!change) return text;
796
797	size_t first = text.find_first_not_of(' ');
798	if (first == string::npos) return ""; // empty line
799
800	if (change<0) {
801	int remove = -change;
802	h2x_assert(remove <= int(first));
803	return text.substr(remove);
804	}
805
806	h2x_assert(change>0); // add spaces
807	return string(change, ' ')+text;
808	}
809
810	static string correctIndentation(const string& text, int change) {
811	// removes 'remove' spaces from every line
812
813	size_t this_lineend = text.find('\n');
814	string result;
815
816	if (this_lineend == string::npos) {
817	result = correctSpaces(text, change);
818	}
819	else {
820	result = correctSpaces(text.substr(0, this_lineend), change);
821
822	while (this_lineend != string::npos) {
823	size_t next_lineend = text.find('\n', this_lineend+1);
824	if (next_lineend == string::npos) { // last line
825	result = result+"\n"+correctSpaces(text.substr(this_lineend+1), change);
826	}
827	else {
828	result = result+"\n"+correctSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1), change);
829	}
830	this_lineend = next_lineend;
831	}
832	}
833	return result;
834	}
835
836	inline size_t countSpaces(const string& text) {
837	size_t first = text.find_first_not_of(' ');
838	if (first == string::npos) return INT_MAX; // empty line
839	return first;
840	}
841
842	static size_t scanMinIndentation(const string& text) {
843	size_t this_lineend = text.find('\n');
844	size_t min_indent = INT_MAX;
845
846	if (this_lineend == string::npos) {
847	min_indent = countSpaces(text);
848	}
849	else {
850	while (this_lineend != string::npos) {
851	size_t next_lineend = text.find('\n', this_lineend+1);
852	if (next_lineend == string::npos) {
853	min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1)));
854	}
855	else {
856	min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1)));
857	}
858	this_lineend = next_lineend;
859	}
860	}
861
862	if (min_indent == INT_MAX) min_indent = 0; // only empty lines
863	return min_indent;
864	}
865
866	// -----------------------------
867	// class ParagraphTree
868
869	class ParagraphTree : public MessageAttachable, virtual Noncopyable {
870	ParagraphTree *brother; // has same indentation as this
871	ParagraphTree *son; // indentation + 1
872
873	Ostring otext; // text of the Section (containing linefeeds)
874
875	bool reflow; // should the paragraph be reflown ? (true if indentation is equal for all lines of text)
876	int indentation; // the real indentation of the blank (behind removed enumeration)
877
878
879	string location_description() const OVERRIDE { return "in paragraph starting here"; }
880	size_t line_number() const OVERRIDE { return otext.get_lineno(); }
881
882	ParagraphTree(Ostrings::const_iterator begin, const Ostrings::const_iterator end)
883	: son(NULL),
884	otext(*begin),
885	indentation(0)
886	{
887	h2x_assert(begin != end);
888
889	string& text = otext;
890
891	reflow = shouldReflow(text, indentation);
892	if (!reflow) {
893	size_t reststart = text.find('\n', 1);
894
895	if (reststart == 0) {
896	attach_warning("[internal] Paragraph starts with LF -> reflow calculation will probably fail");
897	}
898
899	if (reststart != string::npos) {
900	int rest_indent = -1;
901	string rest = text.substr(reststart);
902	bool rest_reflow = shouldReflow(rest, rest_indent);
903
904	if (rest_reflow) {
905	int first_indent = countSpaces(text.substr(1));
906	if (get_type() == PLAIN_TEXT) {
907	size_t last = text.find_last_not_of(' ', reststart-1);
908	bool is_header = last != string::npos && text[last] == ':';
909
910	if (!is_header && rest_indent == (first_indent+8)) {
911	#if defined(DEBUG)
912	size_t textstart = text.find_first_not_of(" \n");
913	h2x_assert(textstart != string::npos);
914	#endif // DEBUG
915
916	text = text.substr(0, reststart)+correctIndentation(rest, -8);
917	reflow = shouldReflow(text, indentation);
918	}
919	}
920	else {
921	int diff = rest_indent-first_indent;
922	if (diff>0) {
923	text = text.substr(0, reststart)+correctIndentation(rest, -diff);
924	reflow = shouldReflow(text, indentation);
925	}
926	else if (diff<0) {
927	// paragraph with more indent on first line (occurs?)
928	attach_warning(strf("[internal] unhandled: more indentation on the 1st line (diff=%i)", diff));
929	}
930	}
931	}
932	}
933	}
934
935	if (!reflow) {
936	indentation = scanMinIndentation(text);
937	}
938	text = correctIndentation(text, -indentation);
939	if (get_type() == ITEM) {
940	h2x_assert(indentation >= 2);
941	indentation -= 2;
942	}
943
944	brother = buildParagraphTree(++begin, end);
945	}
946
947	void brothers_to_sons(ParagraphTree *new_brother);
948
949	public:
950	virtual ~ParagraphTree() {
951	delete brother;
952	delete son;
953	}
954
955	ParagraphType get_type() const { return otext.get_type(); }
956
957	bool is_itemlist_member() const { return get_type() == ITEM; }
958	unsigned get_enumeration() const { return get_type() == ENUMERATED ? otext.get_number() : 0; }
959	EnumerationType get_enum_type() const { return otext.get_enum_type(); }
960
961	const char *readable_type() const {
962	const char *res = NULL;
963	switch (get_type()) {
964	case PLAIN_TEXT: res = "PLAIN_TEXT"; break;
965	case ITEM: res = "ITEM"; break;
966	case ENUMERATED: res = "ENUMERATED"; break;
967	}
968	return res;
969	}
970
971	size_t countTextNodes() {
972	size_t nodes = 1; // this
973	if (son) nodes += son->countTextNodes();
974	if (brother) nodes += brother->countTextNodes();
975	return nodes;
976	}
977
978	#if defined(DUMP_PARAGRAPHS)
979	void print_indent(ostream& out, int indent) { while (indent-->0) out << ' '; }
980	char masknl(const char text) {
981	char *result = strdup(text);
982	for (int i = 0; result[i]; ++i) {
983	if (result[i] == '\n') result[i] = '\|';
984	}
985	return result;
986	}
987	void dump(ostream& out, int indent = 0) {
988	print_indent(out, indent+1);
989	{
990	char *mtext = masknl(otext.as_string().c_str());
991	out << "text='" << mtext << "'\n";
992	free(mtext);
993	}
994
995	print_indent(out, indent+1);
996	out << "type='" << readable_type() << "' ";
997	if (get_type() == ENUMERATED) {
998	out << "enumeration='" << otext.get_number() << "' ";
999	}
1000	out << "reflow='" << reflow << "' ";
1001	out << "indentation='" << indentation << "'\n";
1002
1003	if (son) {
1004	print_indent(out, indent+2); cout << "son:\n";
1005	son->dump(out, indent+2);
1006	cout << "\n";
1007	}
1008	if (brother) {
1009	print_indent(out, indent); cout << "brother:\n";
1010	brother->dump(out, indent);
1011	}
1012	}
1013	#endif // DUMP_PARAGRAPHS
1014
1015	private:
1016	static ParagraphTree* buildParagraphTree(Ostrings::const_iterator begin, const Ostrings::const_iterator end) {
1017	if (begin == end) return 0;
1018	return new ParagraphTree(begin, end);
1019	}
1020	public:
1021	static ParagraphTree* buildParagraphTree(const Section& sec) {
1022	const Ostrings& txt = sec.Content();
1023	if (txt.empty()) throw "attempt to build an empty ParagraphTree";
1024	return buildParagraphTree(txt.begin(), txt.end());
1025	}
1026
1027	bool contains(ParagraphTree *that) {
1028	return
1029	this == that \|\|
1030	(son && son->contains(that)) \|\|
1031	(brother && brother->contains(that));
1032	}
1033
1034	ParagraphTree predecessor(ParagraphTree before_this) {
1035	if (brother == before_this) return this;
1036	if (!brother) return 0;
1037	return brother->predecessor(before_this);
1038	}
1039
1040	void append(ParagraphTree *new_brother) {
1041	if (!brother) brother = new_brother;
1042	else brother->append(new_brother);
1043	}
1044
1045	bool is_some_brother(const ParagraphTree *other) const {
1046	return (other == brother) \|\| (brother && brother->is_some_brother(other));
1047	}
1048
1049	ParagraphTree* takeAllInFrontOf(ParagraphTree *after) {
1050	ParagraphTree *removed = this;
1051	ParagraphTree *after_pred = this;
1052
1053	h2x_assert(is_some_brother(after));
1054
1055	while (1) {
1056	h2x_assert(after_pred);
1057	h2x_assert(after_pred->brother); // takeAllInFrontOf called with non-existing 'after'
1058
1059	if (after_pred->brother == after) { // found after
1060	after_pred->brother = 0; // unlink
1061	break;
1062	}
1063	after_pred = after_pred->brother;
1064	}
1065
1066	return removed;
1067	}
1068
1069	ParagraphTree *firstListMember() {
1070	switch (get_type()) {
1071	case PLAIN_TEXT: break;
1072	case ITEM: return this;
1073	case ENUMERATED: {
1074	if (get_enumeration() == 1) return this;
1075	break;
1076	}
1077	}
1078	if (brother) return brother->firstListMember();
1079	return NULL;
1080	}
1081
1082	ParagraphTree *nextListMemberAfter(const ParagraphTree& previous) {
1083	if (indentation<previous.indentation) return NULL;
1084	if (indentation == previous.indentation && get_type() == previous.get_type()) {
1085	if (get_type() != ENUMERATED) return this;
1086	if (get_enumeration() > previous.get_enumeration()) return this;
1087	return NULL;
1088	}
1089	if (!brother) return NULL;
1090	return brother->nextListMemberAfter(previous);
1091	}
1092	ParagraphTree *nextListMember() const {
1093	return brother ? brother->nextListMemberAfter(*this) : NULL;
1094	}
1095
1096	ParagraphTree* firstWithLessIndentThan(int wanted_indentation) {
1097	if (indentation < wanted_indentation) return this;
1098	if (!brother) return 0;
1099	return brother->firstWithLessIndentThan(wanted_indentation);
1100	}
1101
1102	void format_indentations();
1103	void format_lists();
1104
1105	private:
1106	static ParagraphTree* buildNewParagraph(const string& Text, size_t beginLineNo, ParagraphType type) {
1107	Ostrings S;
1108	S.push_back(Ostring(Text, beginLineNo, type));
1109	return new ParagraphTree(S.begin(), S.end());
1110	}
1111	ParagraphTree *xml_write_list_contents();
1112	ParagraphTree *xml_write_enum_contents();
1113	void xml_write_textblock();
1114
1115	public:
1116	void xml_write();
1117	};
1118
1119	#if defined(DUMP_PARAGRAPHS)
1120	static void dump_paragraph(ParagraphTree *para) {
1121	// helper function for use in gdb
1122	para->dump(cout, 0);
1123	}
1124	#endif
1125
1126	void ParagraphTree::brothers_to_sons(ParagraphTree *new_brother) {
1127	/*! folds down brothers to sons
1128	* @param new_brother brother of 'this->brother', will become new brother.
1129	* If new_brother == NULL -> make all brothers sons.
1130	*/
1131
1132	if (new_brother) {
1133	h2x_assert(is_some_brother(new_brother));
1134
1135	if (brother != new_brother) {
1136	#if defined(DEBUG)
1137	if (son) {
1138	son->attach_warning("Found unexpected son (in brothers_to_sons)");
1139	brother->attach_warning("while trying to transform paragraphs from here ..");
1140	new_brother->attach_warning(".. to here ..");
1141	attach_warning(".. into sons of this paragraph.");
1142	return;
1143	}
1144	#endif
1145
1146	h2x_assert(!son);
1147	h2x_assert(brother);
1148
1149	if (new_brother == NULL) { // all brothers -> sons
1150	son = brother;
1151	brother = NULL;
1152	}
1153	else {
1154	son = brother->takeAllInFrontOf(new_brother);
1155	brother = new_brother;
1156	}
1157	}
1158	}
1159	else {
1160	h2x_assert(!son);
1161	son = brother;
1162	brother = NULL;
1163	}
1164	}
1165	void ParagraphTree::format_lists() {
1166	// reformats tree such that all items/enumerations are brothers
1167	ParagraphTree *member = firstListMember();
1168	if (member) {
1169	for (ParagraphTree *curr = this; curr != member; curr = curr->brother) {
1170	h2x_assert(curr);
1171	if (curr->son) curr->son->format_lists();
1172	}
1173
1174	for (ParagraphTree *next = member->nextListMember();
1175	next;
1176	member = next, next = member->nextListMember())
1177	{
1178	member->brothers_to_sons(next);
1179	h2x_assert(member->brother == next);
1180
1181	if (member->son) member->son->format_lists();
1182	}
1183
1184	h2x_assert(!member->son); // member is the last item
1185
1186	if (member->brother) {
1187	ParagraphTree *non_member = member->brother->firstWithLessIndentThan(member->indentation+1);
1188	member->brothers_to_sons(non_member);
1189	}
1190
1191	if (member->son) member->son->format_lists();
1192	if (member->brother) member->brother->format_lists();
1193	}
1194	else {
1195	for (ParagraphTree *curr = this; curr; curr = curr->brother) {
1196	h2x_assert(curr);
1197	if (curr->son) curr->son->format_lists();
1198	}
1199	}
1200	}
1201
1202	void ParagraphTree::format_indentations() {
1203	if (brother) {
1204	ParagraphTree *same_indent = brother->firstWithLessIndentThan(indentation+1);
1205	#if defined(WARN_POSSIBLY_WRONG_INDENTATION_CORRECTION)
1206	if (same_indent && indentation != same_indent->indentation) {
1207	same_indent->attach_warning("indentation is assumed to be same as ..");
1208	attach_warning(".. here");
1209	}
1210	#endif
1211	brothers_to_sons(same_indent); // if same_indent==NULL -> make all brothers childs
1212	if (brother) brother->format_indentations();
1213	}
1214
1215	if (son) son->format_indentations();
1216	}
1217
1218	// -----------------
1219	// LinkType
1220
1221	enum LinkType {
1222	LT_UNKNOWN = 0,
1223	LT_HTTP = 1,
1224	LT_FTP = 2,
1225	LT_FILE = 4,
1226	LT_EMAIL = 8,
1227	LT_HLP = 16,
1228	LT_PS = 32,
1229	LT_PDF = 64
1230	};
1231
1232	static const char *link_id[] = {
1233	"unknown",
1234	"www",
1235	"www",
1236	"www",
1237	"email",
1238	"hlp",
1239	"ps",
1240	"pdf",
1241	};
1242
1243	static string LinkType2id(LinkType type) {
1244	int idx = 0;
1245	while (type >= 1) {
1246	idx++;
1247	type = LinkType(type>>1);
1248	}
1249	return link_id[idx];
1250	}
1251
1252	inline const char *getExtension(const string& name) {
1253	size_t last_dot = name.find_last_of('.');
1254	if (last_dot == string::npos) {
1255	return NULL;
1256	}
1257	return name.c_str()+last_dot+1;
1258	}
1259
1260	static LinkType detectLinkType(const string& link_target) {
1261	LinkType type = LT_UNKNOWN;
1262	const char *ext = getExtension(link_target);
1263
1264	if (ext && strcasecmp(ext, "hlp") == 0) type = LT_HLP;
1265	else if (link_target.find("http://") == 0) type = LT_HTTP;
1266	else if (link_target.find("ftp://") == 0) type = LT_FTP;
1267	else if (link_target.find("file://") == 0) type = LT_FILE;
1268	else if (link_target.find('@') != string::npos) type = LT_EMAIL;
1269	else if (ext && strcasecmp(ext, "ps") == 0) type = LT_PS;
1270	else if (ext && strcasecmp(ext, "pdf") == 0) type = LT_PDF;
1271
1272	return type;
1273	}
1274
1275	// --------------------------------------------------------------------------------
1276
1277
1278
1279	static string locate_helpfile(const string& helpname) {
1280	// search for 'helpname' in various helpfile locations
1281
1282	#define PATHS 2
1283	static string path[PATHS] = { "oldhelp/", "genhelp/" };
1284	struct stat st;
1285
1286	for (size_t p = 0; p<PATHS; p++) {
1287	string fullname = path[p]+helpname;
1288	if (stat(fullname.c_str(), &st) == 0) {
1289	return fullname;
1290	}
1291	}
1292	return "";
1293	#undef PATHS
1294	}
1295
1296	static string locate_document(const string& docname) {
1297	// search for 'docname' or 'docname.gz' in various helpfile locations
1298
1299	string located = locate_helpfile(docname);
1300	if (located.empty()) {
1301	located = locate_helpfile(docname+".gz");
1302	}
1303	return located;
1304	}
1305
1306	static void add_link_attributes(XML_Tag& link, LinkType type, const string& dest, size_t source_line) {
1307	if (type == LT_UNKNOWN) {
1308	string msg = string("Invalid link (dest='")+dest+"')";
1309	throw LineAttachedMessage(msg, source_line);
1310	}
1311
1312	link.add_attribute("dest", dest);
1313	link.add_attribute("type", LinkType2id(type));
1314	link.add_attribute("source_line", source_line);
1315
1316	if (type&(LT_HLP\|LT_PDF\|LT_PS)) { // other links (www, email) cannot be checked for existence here
1317	string fullhelp = ((type&LT_HLP) ? locate_helpfile : locate_document)(dest);
1318	if (fullhelp.empty()) {
1319	link.add_attribute("missing", "1");
1320	string warning = strf("Dead link to '%s'", dest.c_str());
1321	h2x_assert(source_line<1000); // illegal line number ?
1322	add_warning(warning, source_line);
1323	}
1324	}
1325	}
1326
1327	static void print_XML_Text_expanding_links(const string& text, size_t lineNo) {
1328	size_t found = text.find("LINK{", 0);
1329	if (found != string::npos) {
1330	size_t inside_link = found+5;
1331	size_t close = text.find('}', inside_link);
1332
1333	if (close == string::npos) throw "unclosed 'LINK{}'";
1334
1335	string link_target = text.substr(inside_link, close-inside_link);
1336	LinkType type = detectLinkType(link_target);
1337	string dest = link_target;
1338
1339	XML_Text(text.substr(0, found));
1340
1341	{
1342	XML_Tag link("LINK");
1343	link.set_on_extra_line(false);
1344	add_link_attributes(link, type, dest, lineNo);
1345	}
1346
1347	print_XML_Text_expanding_links(text.substr(close+1), lineNo);
1348	}
1349	else {
1350	XML_Text t(text);
1351	}
1352	}
1353
1354	void ParagraphTree::xml_write_textblock() {
1355	XML_Tag textblock("T");
1356	textblock.add_attribute("reflow", reflow ? "1" : "0");
1357
1358	{
1359	string usedText;
1360	const string& text = otext;
1361	if (reflow) {
1362	usedText = correctIndentation(text, (textblock.Indent()+1) * the_XML_Document->indentation_per_level);
1363	}
1364	else {
1365	usedText = text;
1366	}
1367	print_XML_Text_expanding_links(usedText, otext.get_lineno());
1368	}
1369	}
1370
1371	ParagraphTree *ParagraphTree::xml_write_list_contents() {
1372	h2x_assert(is_itemlist_member());
1373	#if defined(WARN_FIXED_LAYOUT_LIST_ELEMENTS)
1374	if (!reflow) attach_warning("ITEM not reflown (check output)");
1375	#endif
1376	{
1377	XML_Tag entry("ENTRY");
1378	entry.add_attribute("item", "1");
1379	xml_write_textblock();
1380	if (son) son->xml_write();
1381	}
1382	if (brother && brother->is_itemlist_member()) {
1383	return brother->xml_write_list_contents();
1384	}
1385	return brother;
1386	}
1387	ParagraphTree *ParagraphTree::xml_write_enum_contents() {
1388	h2x_assert(get_enumeration());
1389	#if defined(WARN_FIXED_LAYOUT_LIST_ELEMENTS)
1390	if (!reflow) attach_warning("ENUMERATED not reflown (check output)");
1391	#endif
1392	{
1393	XML_Tag entry("ENTRY");
1394	switch (get_enum_type()) {
1395	case DIGITS:
1396	entry.add_attribute("enumerated", strf("%i", get_enumeration()));
1397	break;
1398	case ALPHA_UPPER:
1399	entry.add_attribute("enumerated", strf("%c", 'A'-1+get_enumeration()));
1400	break;
1401	case ALPHA_LOWER:
1402	entry.add_attribute("enumerated", strf("%c", 'a'-1+get_enumeration()));
1403	break;
1404	default:
1405	h2x_assert(0);
1406	break;
1407	}
1408	xml_write_textblock();
1409	if (son) son->xml_write();
1410	}
1411	if (brother && brother->get_enumeration()) {
1412	int diff = brother->get_enumeration()-get_enumeration();
1413	if (diff != 1) {
1414	attach_warning("Non-consecutive enumeration detected between here..");
1415	brother->attach_warning(".. and here");
1416	}
1417	return brother->xml_write_enum_contents();
1418	}
1419	return brother;
1420	}
1421
1422	void ParagraphTree::xml_write() {
1423	try {
1424	ParagraphTree *next = NULL;
1425	if (get_enumeration()) {
1426	XML_Tag enu("ENUM");
1427	if (get_enumeration() != 1) {
1428	attach_warning(strf("First enum starts with '%u.' (maybe previous enum was not detected)", get_enumeration()));
1429	}
1430	next = xml_write_enum_contents();
1431	#if defined(WARN_LONESOME_ENUM_ELEMENTS)
1432	if (next == brother) attach_warning("Suspicious single-element-ENUM");
1433	#endif
1434	}
1435	else if (is_itemlist_member()) {
1436	XML_Tag list("LIST");
1437	next = xml_write_list_contents();
1438	#if defined(WARN_LONESOME_LIST_ELEMENTS)
1439	if (next == brother) attach_warning("Suspicious single-element-LIST");
1440	#endif
1441	}
1442	else {
1443	{
1444	XML_Tag para("P");
1445	xml_write_textblock();
1446	if (son) son->xml_write();
1447	}
1448	next = brother;
1449	}
1450	if (next) next->xml_write();
1451	}
1452	catch (string& err) { throw attached_message(err); }
1453	catch (const char *err) { throw attached_message(err); }
1454	}
1455
1456	static void create_top_links(const Links& links, const char *tag) {
1457	for (Links::const_iterator s = links.begin(); s != links.end(); ++s) {
1458	XML_Tag link(tag);
1459	add_link_attributes(link, detectLinkType(s->Target()), s->Target(), s->SourceLineno());
1460	}
1461	}
1462
1463	void Helpfile::writeXML(FILE *out, const string& page_name) {
1464	XML_Document xml("PAGE", "arb_help.dtd", out);
1465
1466	xml.skip_empty_tags = true;
1467	xml.indentation_per_level = 2;
1468
1469	xml.getRoot().add_attribute("name", page_name);
1470	#if defined(DEBUG)
1471	xml.getRoot().add_attribute("edit_warning", "devel"); // inserts a edit warning into development version
1472	#else
1473	xml.getRoot().add_attribute("edit_warning", "release"); // inserts a different edit warning into release version
1474	#endif // DEBUG
1475
1476	xml.getRoot().add_attribute("source", inputfile.c_str());
1477
1478	{
1479	XML_Comment(string("automatically generated from ../")+inputfile+' ');
1480	}
1481
1482	create_top_links(uplinks, "UP");
1483	create_top_links(references, "SUB");
1484	create_top_links(auto_references, "SUB");
1485
1486	{
1487	XML_Tag title_tag("TITLE");
1488	const Ostrings& T = title.Content();
1489	for (Ostrings::const_iterator s = T.begin(); s != T.end(); ++s) {
1490	if (s != T.begin()) { XML_Text text("\n"); }
1491	XML_Text text(*s);
1492	}
1493	}
1494
1495	for (SectionList::const_iterator sec = sections.begin(); sec != sections.end(); ++sec) {
1496	try {
1497	XML_Tag section_tag("SECTION");
1498	section_tag.add_attribute("name", sec->getName());
1499
1500	ParagraphTree ptree = ParagraphTree::buildParagraphTree(sec);
1501
1502	#if defined(DEBUG)
1503	size_t textnodes = ptree->countTextNodes();
1504	#endif
1505	#if defined(DUMP_PARAGRAPHS)
1506	cout << "Dump of section '" << sec->getName() << "' (before format_lists):\n";
1507	ptree->dump(cout);
1508	cout << "----------------------------------------\n";
1509	#endif
1510
1511	ptree->format_lists();
1512
1513	#if defined(DUMP_PARAGRAPHS)
1514	cout << "Dump of section '" << sec->getName() << "' (after format_lists):\n";
1515	ptree->dump(cout);
1516	cout << "----------------------------------------\n";
1517	#endif
1518	#if defined(DEBUG)
1519	size_t textnodes2 = ptree->countTextNodes();
1520	h2x_assert(textnodes2 == textnodes); // if this occurs format_lists has an error
1521	#endif
1522
1523	ptree->format_indentations();
1524
1525	#if defined(DUMP_PARAGRAPHS)
1526	cout << "Dump of section '" << sec->getName() << "' (after format_indentations):\n";
1527	ptree->dump(cout);
1528	cout << "----------------------------------------\n";
1529	#endif
1530	#if defined(DEBUG)
1531	size_t textnodes3 = ptree->countTextNodes();
1532	h2x_assert(textnodes3 == textnodes2); // if this occurs format_indentations has an error
1533	#endif
1534
1535	ptree->xml_write();
1536
1537	delete ptree;
1538	}
1539	catch (string& err) { throw sec->attached_message(err); }
1540	catch (const char *err) { throw sec->attached_message(err); }
1541	}
1542	}
1543
1544	void Helpfile::extractInternalLinks() {
1545	for (SectionList::const_iterator sec = sections.begin(); sec != sections.end(); ++sec) {
1546	try {
1547	const Ostrings& s = sec->Content();
1548
1549	for (Ostrings::const_iterator li = s.begin(); li != s.end(); ++li) {
1550	const string& line = *li;
1551	size_t start = 0;
1552
1553	while (1) {
1554	size_t found = line.find("LINK{", start);
1555	if (found == string::npos) break;
1556	found += 5;
1557	size_t close = line.find('}', found);
1558	if (close == string::npos) break;
1559
1560	string link_target = line.substr(found, close-found);
1561
1562	if (link_target.find("http://") == string::npos &&
1563	link_target.find("ftp://") == string::npos &&
1564	link_target.find("file://") == string::npos &&
1565	link_target.find('@') == string::npos)
1566	{
1567	try {
1568	check_specific_duplicates(link_target, references, false); // check only sublinks here
1569	check_specific_duplicates(link_target, uplinks, false); // check only uplinks here
1570	check_specific_duplicates(link_target, auto_references, false); // check only sublinks here
1571
1572	// only auto-add inline reference if none of the above checks has thrown
1573	auto_references.push_back(Link(link_target, sec->line_number()));
1574	}
1575	catch (string& err) {
1576	; // silently ignore inlined
1577	}
1578	}
1579	start = close+1;
1580	}
1581	}
1582	}
1583	catch (string& err) {
1584	throw sec->attached_message("'"+err+"' while scanning LINK{}");
1585	}
1586	}
1587	}
1588
1589	static void show_err(const string& err, size_t lineno, const string& helpfile) {
1590	if (err.find(helpfile+':') != string::npos) {
1591	cerr << err;
1592	}
1593	else if (lineno == NO_LINENUMBER_INFO) {
1594	cerr << helpfile << ":1: [in unknown line] " << err;
1595	}
1596	else {
1597	cerr << helpfile << ":" << lineno << ": " << err;
1598	}
1599	cerr << '\n';
1600	}
1601	inline void show_err(const LineAttachedMessage& line_err, const string& helpfile) {
1602	show_err(line_err.Message(), line_err.Lineno(), helpfile);
1603	}
1604	inline void show_warning(const LineAttachedMessage& line_err, const string& helpfile) {
1605	show_err(string("Warning: ")+line_err.Message(), line_err.Lineno(), helpfile);
1606	}
1607	inline void show_warnings(const string& helpfile) {
1608	for (list<LineAttachedMessage>::const_iterator wi = warnings.begin(); wi != warnings.end(); ++wi) {
1609	show_warning(*wi, helpfile);
1610	}
1611	}
1612	static void show_error_and_warnings(const LineAttachedMessage& error, const string& helpfile) {
1613	show_err(error, helpfile);
1614	show_warnings(helpfile);
1615	}
1616
1617	int ARB_main(int argc, char *argv[]) {
1618	if (argc != 3) {
1619	cerr << "Usage: arb_help2xml <ARB helpfile> <XML output>\n";
1620	return EXIT_FAILURE;
1621	}
1622
1623	Helpfile help;
1624	string arb_help;
1625
1626	try {
1627	try {
1628	arb_help = argv[1];
1629	string xml_output = argv[2];
1630
1631	{
1632	ifstream in(arb_help.c_str());
1633	help.readHelp(in, arb_help);
1634	}
1635
1636	help.extractInternalLinks();
1637
1638	{
1639	FILE *out = std::fopen(xml_output.c_str(), "wt");
1640	if (!out) throw string("Can't open '")+xml_output+'\'';
1641
1642	try {
1643	// arb_help contains 'oldhelp/name.hlp'
1644	size_t slash = arb_help.find('/');
1645	size_t dot = arb_help.find_last_of('.');
1646
1647	if (slash == string::npos \|\| dot == string::npos) {
1648	throw string("parameter <ARB helpfile> has to be in format 'oldhelp/name.hlp' (not '"+arb_help+"')");
1649	}
1650
1651	string page_name(arb_help, slash+1, dot-slash-1);
1652	help.writeXML(out, page_name);
1653	fclose(out);
1654	}
1655	catch (...) {
1656	fclose(out);
1657	remove(xml_output.c_str());
1658	throw;
1659	}
1660	}
1661
1662	show_warnings(arb_help);
1663
1664	return EXIT_SUCCESS;
1665	}
1666	catch (string& err) { throw unattached_message(err); }
1667	catch (const char * err) { throw unattached_message(err); }
1668	catch (LineAttachedMessage& err) { throw; }
1669	catch (...) { throw unattached_message("unknown exception in arb_help2xml"); }
1670	}
1671	catch (LineAttachedMessage& err) { show_error_and_warnings(err, arb_help); }
1672	catch (...) { h2x_assert(0); }
1673
1674	return EXIT_FAILURE;
1675	}
1676
1677	// --------------------------------------------------------------------------------
1678
1679	#ifdef UNIT_TESTS
1680	#include <test_unit.h>
1681
1682	static arb_test::match_expectation help_file_compiles(const char helpfile, const char expected_title, const char *expected_error_part) {
1683	using namespace arb_test;
1684	expectation_group expected;
1685
1686	ifstream in(helpfile);
1687
1688	LineAttachedMessage *error = NULL;
1689
1690	Helpfile help;
1691	try {
1692	help.readHelp(in, helpfile);
1693	help.extractInternalLinks();
1694
1695	FILE *devnul = fopen("/dev/null", "wt");
1696	if (!devnul) throw unattached_message("can't write to null device");
1697	help.writeXML(devnul, "dummy");
1698	fclose(devnul);
1699	}
1700	catch (LineAttachedMessage& err) { error = new LineAttachedMessage(err); }
1701	catch (...) { error = new LineAttachedMessage(unattached_message("unknown exception")); }
1702
1703	if (expected_error_part) {
1704	expected.add(that(error).does_differ_from_NULL());
1705	if (error) expected.add(that(error->Message()).does_contain(expected_error_part));
1706	}
1707	else {
1708	expected.add(that(error).is_equal_to_NULL());
1709	if (!error) {
1710	Section title = help.get_title();
1711	const Ostrings& title_strings = title.Content();
1712
1713	expected.add(that(title_strings.front().as_string()).is_equal_to(expected_title));
1714	expected.add(that(title_strings.size()).is_equal_to(1));
1715	}
1716	else {
1717	show_error_and_warnings(*error, helpfile);
1718	}
1719	}
1720
1721	delete error;
1722
1723	return all().ofgroup(expected);
1724	}
1725
1726	#define HELP_FILE_COMPILES(name,expTitle) TEST_EXPECTATION(help_file_compiles(name,expTitle,NULL))
1727	#define HELP_FILE_COMPILE_ERROR(name,expError) TEST_EXPECTATION(help_file_compiles(name,NULL,expError))
1728
1729	void TEST_hlp2xml_conversion() {
1730	TEST_EXPECT_ZERO(chdir("../../HELP_SOURCE"));
1731
1732	HELP_FILE_COMPILES("genhelp/agde_treepuzzle.hlp", "treepuzzle"); // genhelp/agde_treepuzzle.hlp
1733
1734	HELP_FILE_COMPILES("oldhelp/markbyref.hlp", "Mark by reference"); // oldhelp/markbyref.hlp
1735	HELP_FILE_COMPILES("oldhelp/ad_align.hlp", "Alignment Administration"); // oldhelp/ad_align.hlp
1736	HELP_FILE_COMPILES("genhelp/copyright.hlp", "Copyrights"); // genhelp/copyright.hlp
1737
1738	HELP_FILE_COMPILE_ERROR("akjsdlkad.hlp", "Can't read from"); // no such file
1739	}
1740	TEST_PUBLISH(TEST_hlp2xml_conversion);
1741
1742
1743	// #define TEST_AUTO_UPDATE // uncomment to update expected xml // @@@ comment-out!
1744
1745	void TEST_hlp2xml_output() {
1746	string tested_helpfile[] = {
1747	"unittest"
1748	};
1749
1750	string HELP_SOURCE = "../../HELP_SOURCE/";
1751	string LIB = "../../lib/";
1752	string EXPECTED = "help/";
1753
1754	for (size_t i = 0; i<ARRAY_ELEMS(tested_helpfile); ++i) {
1755	string xml = HELP_SOURCE + "Xml/" + tested_helpfile[i] + ".xml";
1756	string html = LIB + "help_html/" + tested_helpfile[i] + ".html";
1757	string hlp = LIB + "help/" + tested_helpfile[i] + ".hlp";
1758
1759	string xml_expected = EXPECTED + tested_helpfile[i] + ".xml";
1760	string html_expected = EXPECTED + tested_helpfile[i] + ".html";
1761	string hlp_expected = EXPECTED + tested_helpfile[i] + ".hlp";
1762
1763	string cmd;
1764
1765	#if defined(TEST_AUTO_UPDATE)
1766	# if defined(NDEBUG)
1767	# error please use auto-update only in DEBUG mode
1768	# endif
1769	cmd = string("cp ") + xml + ' ' + xml_expected; TEST_EXPECT_NO_ERROR(GBK_system(cmd.c_str()));
1770	cmd = string("cp ") + html + ' ' + html_expected; TEST_EXPECT_NO_ERROR(GBK_system(cmd.c_str()));
1771	cmd = string("cp ") + hlp + ' ' + hlp_expected; TEST_EXPECT_NO_ERROR(GBK_system(cmd.c_str()));
1772	#else // !defined(TEST_AUTO_UPDATE)
1773
1774	# if defined(DEBUG)
1775	int expected_xml_difflines = 0;
1776	int expected_hlp_difflines = 0;
1777	# else // !defined(DEBUG)
1778	int expected_xml_difflines = 1; // value of "edit_warning" differs - see .@edit_warning
1779	int expected_hlp_difflines = 1; // resulting warning in helpfile
1780	# endif
1781	TEST_EXPECT_TEXTFILE_DIFFLINES(xml_expected.c_str(), xml.c_str(), expected_xml_difflines);
1782	TEST_EXPECT_TEXTFILE_DIFFLINES_IGNORE_DATES(html_expected.c_str(), html.c_str(), 0); // html contains the update-date
1783	TEST_EXPECT_TEXTFILE_DIFFLINES(hlp_expected.c_str(), hlp.c_str(), expected_hlp_difflines);
1784	#endif
1785	}
1786	}
1787
1788
1789	#if defined(PROTECT_HELP_VS_CHANGES)
1790	void TEST_protect_help_vs_changes() { // should normally be disabled
1791	// fails if help changes compared to another checkout
1792	// or just updates the diff w/o failing (if you comment out the last line)
1793	//
1794	// if the patch is hugo and you load it into xemacs
1795	// you might want to (turn-on-lazy-shot)
1796	//
1797	// patch-pointer: ../UNIT_TESTER/run/help_changes.patch
1798
1799	bool do_help = true;
1800	bool do_html = true;
1801
1802	const char *ref_WC = "ARB.help.ref";
1803
1804	// ---------------------------------------- config above
1805
1806	string this_base = "../..";
1807	string ref_base = this_base+"/../"+ref_WC;
1808	string to_help = "/lib/help";
1809	string to_html = "/lib/help_html";
1810	string diff_help = "diff -u "+ref_base+to_help+" "+this_base+to_help;
1811	string diff_html = "diff -u "+ref_base+to_html+" "+this_base+to_html;
1812
1813	string update_cmd;
1814
1815	if (do_help) {
1816	if (do_html) update_cmd = string("(")+diff_help+";"+diff_html+")";
1817	else update_cmd = diff_help;
1818	}
1819	else if (do_html) update_cmd = diff_html;
1820
1821	string patch = "help_changes.patch";
1822	update_cmd += " >"+patch+" \|\|true";
1823
1824	string fail_on_change_cmd = "test \"`cat "+patch+" \| grep -v '^Common subdirectories' \| wc -l`\" = \"0\" \|\| ( echo \"Error: Help changed\"; false)";
1825
1826	TEST_EXPECT_NO_ERROR(GBK_system(update_cmd.c_str()));
1827	TEST_EXPECT_NO_ERROR(GBK_system(fail_on_change_cmd.c_str())); // @@@ uncomment before commit
1828	}
1829	#endif
1830
1831	#endif // UNIT_TESTS

Note: See TracBrowser for help on using the repository browser.

Download in other formats: