Context Navigation

source: tags/ms_r16q2/HELP_SOURCE/arb_help2xml.cxx

Visit:

Last change on this file was 14050, checked in by westram, 10 years ago
revert part of [14048]: accept broken LINKs in non-RELEASE modes
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 58.2 KB

Line
1	// ==================================================================== //
2	// //
3	// File : arb_help2xml.cxx //
4	// Purpose : Converts old ARB help format to XML //
5	// //
6	// Coded by Ralf Westram (coder@reallysoft.de) in October 2001 //
7	// Copyright Department of Microbiology (Technical University Munich) //
8	// //
9	// Visit our web site at: http://www.arb-home.de/ //
10	// //
11	// ==================================================================== //
12
13	#include <xml.hxx>
14	#include <arb_defs.h>
15	#include <arb_diff.h>
16	#include <static_assert.h>
17
18	#include <list>
19	#include <set>
20	#include <iostream>
21	#include <fstream>
22
23	#include <cstdlib>
24	#include <cstdarg>
25	#include <cstring>
26	#include <climits>
27
28	#include <unistd.h>
29	#include <sys/stat.h>
30
31	using namespace std;
32
33	#define h2x_assert(bed) arb_assert(bed)
34
35	#if defined(DEBUG)
36	#define WARN_FORMATTING_PROBLEMS
37	#define WARN_MISSING_HELP
38	// #define DUMP_PARAGRAPHS
39	// #define PROTECT_HELP_VS_CHANGES
40	#endif // DEBUG
41
42
43	#if defined(WARN_FORMATTING_PROBLEMS)
44
45	#define WARN_FIXED_LAYOUT_LIST_ELEMENTS
46	#define WARN_LONESOME_ENUM_ELEMENTS
47
48	// warnings below are useless for production and shall be disabled in SVN
49	// #define WARN_LONESOME_LIST_ELEMENTS
50	// #define WARN_POSSIBLY_WRONG_INDENTATION_CORRECTION
51	// #define WARN_IGNORED_ALPHA_ENUMS
52
53	#endif
54
55
56	#define MAX_LINE_LENGTH 200 // maximum length of lines in input stream
57	#define TABSIZE 8
58
59	static const char *knownSections[] = {
60	"OCCURRENCE",
61	"DESCRIPTION",
62	"NOTES",
63	"EXAMPLES",
64	"WARNINGS",
65	"BUGS",
66	"SECTION",
67	};
68
69	enum SectionType {
70	SEC_OCCURRENCE,
71	SEC_DESCRIPTION,
72	SEC_NOTES,
73	SEC_EXAMPLES,
74	SEC_WARNINGS,
75	SEC_BUGS,
76	SEC_SECTION,
77
78	KNOWN_SECTION_TYPES,
79	SEC_NONE,
80	SEC_FAKE,
81	};
82
83	STATIC_ASSERT(ARRAY_ELEMS(knownSections) == KNOWN_SECTION_TYPES);
84
85	__ATTR__VFORMAT(1) static string vstrf(const char *format, va_list argPtr) {
86	static size_t buf_size = 256;
87	static char *buffer = new char[buf_size];
88
89	size_t length;
90	while (1) {
91	if (!buffer) {
92	h2x_assert(buffer); // to stop when debugging
93	throw string("out of memory");
94	}
95
96	length = vsnprintf(buffer, buf_size, format, argPtr);
97	if (length < buf_size) break; // string fits into current buffer
98
99	// otherwise resize buffer :
100	buf_size += buf_size/2;
101	delete [] buffer;
102	buffer = new char[buf_size];
103	}
104
105	return string(buffer, length);
106	}
107
108	__ATTR__FORMAT(1) static string strf(const char *format, ...) {
109	va_list argPtr;
110	va_start(argPtr, format);
111	string result = vstrf(format, argPtr);
112	va_end(argPtr);
113
114	return result;
115	}
116
117	// -----------------------------
118	// warnings and errors
119
120	class LineAttachedMessage {
121	string message;
122	size_t lineno;
123
124	public:
125	LineAttachedMessage(const string& message_, size_t lineno_)
126	: message(message_)
127	, lineno(lineno_)
128	{}
129
130	const string& Message() const { return message; }
131	size_t Lineno() const { return lineno; }
132	};
133
134	const size_t NO_LINENUMBER_INFO = -1U;
135
136	LineAttachedMessage unattached_message(const string& message) { return LineAttachedMessage(message, NO_LINENUMBER_INFO); }
137
138
139	static list<LineAttachedMessage> warnings;
140	inline void add_warning(const LineAttachedMessage& laMsg) {
141	warnings.push_back(laMsg);
142	}
143	inline void add_warning(const string& warning, size_t lineno) {
144	add_warning(LineAttachedMessage(warning, lineno));
145	}
146
147	struct MessageAttachable {
148	virtual ~MessageAttachable() {}
149
150	virtual string location_description() const = 0; // may return empty string
151	virtual size_t line_number() const = 0; // if unknown -> should return NO_LINENUMBER_INFO
152
153	LineAttachedMessage attached_message(const string& message) const {
154	string where = location_description();
155	if (where.empty()) return LineAttachedMessage(message, line_number());
156	return LineAttachedMessage(message+" ["+where+"]", line_number());
157	}
158	void attach_warning(const string& message) const {
159	add_warning(attached_message(message));
160	}
161	};
162
163
164	// ----------------------
165	// class Reader
166
167	class Reader : public MessageAttachable {
168	private:
169	istream& in;
170	char lineBuffer[MAX_LINE_LENGTH];
171	char lineBuffer2[MAX_LINE_LENGTH];
172	bool readAgain;
173	bool eof;
174	int lineNo;
175
176	string location_description() const OVERRIDE { return ""; }
177	size_t line_number() const OVERRIDE { return lineNo; }
178
179	void getline() {
180	if (!eof) {
181	if (in.eof()) eof = true;
182	else {
183	h2x_assert(in.good());
184
185	in.getline(lineBuffer, MAX_LINE_LENGTH);
186	lineNo++;
187
188	if (in.eof()) eof = true;
189	else if (in.fail()) throw "line too long";
190
191	if (strchr(lineBuffer, '\t')) {
192	int o2 = 0;
193
194	for (int o = 0; lineBuffer[o]; ++o) {
195	if (lineBuffer[o] == '\t') {
196	int spaces = TABSIZE - (o2 % TABSIZE);
197	while (spaces--) lineBuffer2[o2++] = ' ';
198	}
199	else {
200	lineBuffer2[o2++] = lineBuffer[o];
201	}
202	}
203	lineBuffer2[o2] = 0;
204	strcpy(lineBuffer, lineBuffer2);
205	}
206
207	char *eol = strchr(lineBuffer, 0)-1;
208	while (eol >= lineBuffer && isspace(eol[0])) {
209	eol[0] = 0; // trim trailing whitespace
210	eol--;
211	}
212	if (eol > lineBuffer) {
213	// now eol points to last character
214	if (eol[0] == '-' && isalnum(eol[-1])) {
215	attach_warning("manual hyphenation detected");
216	}
217	}
218	}
219	}
220	}
221
222	public:
223	Reader(istream& in_) : in(in_), readAgain(true), eof(false), lineNo(0) { getline(); }
224	virtual ~Reader() {}
225
226	const char *getNext() {
227	if (readAgain) readAgain = false;
228	else getline();
229	return eof ? 0 : lineBuffer;
230	}
231
232	void back() {
233	h2x_assert(!readAgain);
234	readAgain = true;
235	}
236
237	int getLineNo() const { return lineNo; }
238	};
239
240	enum ParagraphType {
241	PLAIN_TEXT,
242	ENUMERATED,
243	ITEM,
244	};
245	enum EnumerationType {
246	NONE,
247	DIGITS,
248	ALPHA_UPPER,
249	ALPHA_LOWER,
250	};
251
252	class Ostring {
253	string content;
254	size_t lineNo; // where string came from
255	ParagraphType type;
256
257	// only valid for type==ENUMERATED:
258	EnumerationType etype;
259	unsigned number;
260
261	public:
262
263	Ostring(const string& s, size_t line_no, ParagraphType type_)
264	: content(s),
265	lineNo(line_no),
266	type(type_),
267	etype(NONE)
268	{
269	h2x_assert(type != ENUMERATED);
270	}
271	Ostring(const string& s, size_t line_no, ParagraphType type_, EnumerationType etype_, unsigned num)
272	: content(s),
273	lineNo(line_no),
274	type(type_),
275	etype(etype_),
276	number(num)
277	{
278	h2x_assert(type == ENUMERATED);
279	h2x_assert(etype == DIGITS \|\| etype == ALPHA_UPPER \|\| etype == ALPHA_LOWER);
280	h2x_assert(num>0);
281	}
282
283
284	operator const string&() const { return content; }
285	operator string&() { return content; }
286
287	const string& as_string() const { return content; }
288	string& as_string() { return content; }
289
290	size_t get_lineno() const { return lineNo; }
291
292	const ParagraphType& get_type() const { return type; }
293	const EnumerationType& get_enum_type() const {
294	h2x_assert(type == ENUMERATED);
295	return etype;
296	}
297	unsigned get_number() const {
298	h2x_assert(type == ENUMERATED);
299	return number;
300	}
301
302	// some wrapper to make Ostring act like string
303	const char *c_str() const { return content.c_str(); }
304	};
305
306	typedef list<Ostring> Ostrings;
307
308	#if defined(WARN_MISSING_HELP)
309	static void check_TODO(const char *line, const Reader& reader) {
310	if (strstr(line, "@@@") != NULL \|\| strstr(line, "TODO") != NULL) {
311	reader.attach_warning(strf("TODO: %s", line));
312	}
313	}
314	#else
315	inline void check_TODO(const char *, const Reader&) { }
316	#endif // WARN_MISSING_HELP
317
318	// ----------------------------
319	// class Section
320
321	class Section : public MessageAttachable {
322	SectionType type;
323	string name;
324	Ostrings content;
325	size_t lineno;
326
327	string location_description() const OVERRIDE { return string("in SECTION '")+name+"'"; }
328
329	public:
330	Section(string name_, SectionType type_, size_t lineno_)
331	: type(type_),
332	name(name_),
333	lineno(lineno_)
334	{}
335	virtual ~Section() {}
336
337	const Ostrings& Content() const { return content; }
338	Ostrings& Content() { return content; }
339	SectionType get_type() const { return type; }
340	size_t line_number() const OVERRIDE { return lineno; }
341	const string& getName() const { return name; }
342	void setName(const string& name_) { name = name_; }
343	};
344
345	typedef list<Section> SectionList;
346
347	// --------------------
348	// class Link
349
350	class Link {
351	string target;
352	size_t source_lineno;
353
354	public:
355	Link(const string& target_, size_t source_lineno_)
356	: target(target_)
357	, source_lineno(source_lineno_)
358	{}
359
360	const string& Target() const { return target; }
361	size_t SourceLineno() const { return source_lineno; }
362	};
363
364	typedef list<Link> Links;
365
366	// ------------------------
367	// class Helpfile
368
369	class Helpfile {
370	private:
371	Links uplinks;
372	Links references;
373	Links auto_references;
374	Section title;
375	SectionList sections;
376	string inputfile;
377
378	void check_self_ref(const string& link) {
379	size_t slash = inputfile.find('/');
380	if (slash != string::npos) {
381	if (inputfile.substr(slash+1) == link) {
382	throw string("Invalid link to self");
383	}
384	}
385	}
386
387	public:
388	Helpfile() : title("TITLE", SEC_FAKE, NO_LINENUMBER_INFO) {}
389	virtual ~Helpfile() {}
390
391	void readHelp(istream& in, const string& filename);
392	void writeXML(FILE *out, const string& page_name);
393	void extractInternalLinks();
394
395	const Section& get_title() const { return title; }
396	};
397
398	inline bool isWhite(char c) { return c == ' '; }
399
400	inline bool isEmptyOrComment(const char *s) {
401	if (s[0] == '#') return true;
402	for (int off = 0; ; ++off) {
403	if (s[off] == 0) return true;
404	if (!isWhite(s[off])) break;
405	}
406
407	return false;
408	}
409
410	inline const char extractKeyword(const char line, string& keyword) {
411	// returns NULL if no keyword was found
412	// otherwise returns position behind keyword and sets value of 'keyword'
413
414	const char *space = strchr(line, ' ');
415	if (space && space>line) {
416	keyword = string(line, 0, space-line);
417	return space;
418	}
419	else if (!space) { // test for keyword w/o content behind
420	if (line[0]) { // not empty
421	keyword = line;
422	return strchr(line, 0);
423	}
424	}
425	return 0;
426	}
427
428	inline const char eatWhite(const char line) {
429	// skips whitespace
430	while (isWhite(*line)) ++line;
431	return line;
432	}
433
434	inline void pushParagraph(Section& sec, string& paragraph, size_t lineNo, ParagraphType& type, EnumerationType& etype, unsigned num) {
435	if (paragraph.length()) {
436	if (type == ENUMERATED) {
437	sec.Content().push_back(Ostring(paragraph, lineNo, type, etype, num));
438	}
439	else {
440	sec.Content().push_back(Ostring(paragraph, lineNo, type));
441	}
442
443	type = PLAIN_TEXT;
444	etype = NONE;
445	paragraph = "";
446	}
447	}
448
449	inline const char firstChar(const char s) {
450	while (isWhite(s[0])) ++s;
451	return s;
452	}
453
454	inline bool is_startof_itemlist_element(const char *contentStart) {
455	return
456	(contentStart[0] == '-' \|\|
457	contentStart[0] == '*')
458	&&
459	isspace(contentStart[1])
460	&&
461	!(isspace(contentStart[2]) \|\|
462	contentStart[2] == '-');
463	}
464
465	#define MAX_ALLOWED_ENUM 99 // otherwise it starts interpreting years as enums
466
467	static EnumerationType startsWithLetter(string& s, unsigned& number) {
468	// tests if first line starts with 'letter.'
469	// if true then 'letter.' is removed from the string
470	// the letter is converted and returned in 'number' ('a'->1, 'b'->2, ..)
471
472	size_t off = s.find_first_not_of(" \n");
473	if (off == string::npos) return NONE;
474	if (!isalpha(s[off])) return NONE;
475
476	size_t astart = off;
477	EnumerationType etype = isupper(s[off]) ? ALPHA_UPPER : ALPHA_LOWER;
478
479	number = s[off]-(etype == ALPHA_UPPER ? 'A' : 'a')+1;
480	++off;
481
482	h2x_assert(number>0 && number<MAX_ALLOWED_ENUM);
483
484	if (s[off] != '.' && s[off] != ')') return NONE;
485	if (s[off+1] != ' ') return NONE;
486
487	// remove 'letter.' from string :
488	++off;
489	while (s[off+1] == ' ') ++off;
490	s.erase(astart, off-astart+1);
491
492	return etype;
493	}
494
495	static bool startsWithNumber(string& s, unsigned& number) {
496	// tests if first line starts with 'number.'
497	// if true then 'number.' is removed from the string
498
499	size_t off = s.find_first_not_of(" \n");
500	if (off == string::npos) return false;
501	if (!isdigit(s[off])) return false;
502
503	size_t num_start = off;
504	number = 0;
505
506	for (; isdigit(s[off]); ++off) {
507	number = number*10 + (s[off]-'0');
508	}
509	if (number>MAX_ALLOWED_ENUM) return false;
510
511	if (s[off] != '.' && s[off] != ')') return false;
512	if (s[off+1] != ' ') return false;
513
514	// remove 'number.' from string :
515	++off;
516	while (s[off+1] == ' ') ++off;
517	s.erase(num_start, off-num_start+1);
518
519	return true;
520	}
521
522	static EnumerationType detectLineEnumType(string& line, unsigned& number) {
523	if (startsWithNumber(line, number)) return DIGITS;
524	return startsWithLetter(line, number);
525	}
526
527	static void parseSection(Section& sec, const char *line, int indentation, Reader& reader) {
528	string paragraph = line;
529	size_t para_start_lineno = reader.getLineNo();
530
531	ParagraphType type = PLAIN_TEXT;
532	EnumerationType etype = NONE;
533	unsigned num = 0;
534
535	unsigned last_alpha_num = -1;
536
537	h2x_assert(sec.Content().empty());
538
539	while (1) {
540	line = reader.getNext();
541	if (!line) break;
542
543	if (isEmptyOrComment(line)) {
544	pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
545	check_TODO(line, reader);
546	}
547	else {
548	string keyword;
549	const char *rest = extractKeyword(line, keyword);
550
551	if (rest) { // a new keyword
552	reader.back();
553	break;
554	}
555
556	check_TODO(line, reader);
557
558	string Line = line;
559
560	if (sec.get_type() == SEC_OCCURRENCE) {
561	pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
562	}
563	else {
564	const char *firstNonWhite = firstChar(line);
565	if (is_startof_itemlist_element(firstNonWhite)) {
566	h2x_assert(firstNonWhite != line);
567
568	pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
569
570	Line[firstNonWhite-line] = ' ';
571	type = ITEM; // is reset in call to pushParagraph
572	}
573	else {
574	unsigned foundNum;
575	EnumerationType foundEtype = detectLineEnumType(Line, foundNum);
576
577	if (foundEtype == ALPHA_UPPER \|\| foundEtype == ALPHA_LOWER) {
578	if (foundNum == (last_alpha_num+1) \|\| foundNum == 1) {
579	last_alpha_num = foundNum;
580	}
581	else {
582	#if defined(WARN_IGNORED_ALPHA_ENUMS)
583	add_warning(reader.attached_message("Ignoring non-consecutive alpha-enum"));
584	#endif
585	foundEtype = NONE;
586
587	reader.back();
588	Line = reader.getNext();
589	last_alpha_num = -1;
590	}
591	}
592
593	if (foundEtype != NONE) {
594	pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
595
596	type = ENUMERATED;
597	num = foundNum;
598	etype = foundEtype;
599
600	if (!num) {
601	h2x_assert(etype == DIGITS);
602	throw "Enumerations starting with zero are not supported";
603	}
604	}
605	}
606	}
607
608	if (paragraph.length()) {
609	paragraph = paragraph+"\n"+Line;
610	}
611	else {
612	paragraph = string("\n")+Line;
613	para_start_lineno = reader.getLineNo();
614	}
615	}
616	}
617
618	pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
619
620	if (sec.Content().size()>0 && indentation>0) {
621	string spaces;
622	spaces.reserve(indentation);
623	spaces.append(indentation, ' ');
624
625	string& ostr = sec.Content().front();
626	ostr = string("\n") + spaces + ostr;
627	}
628	}
629
630	inline void check_specific_duplicates(const string& link, const Links& existing, bool add_warnings) {
631	for (Links::const_iterator ex = existing.begin(); ex != existing.end(); ++ex) {
632	if (ex->Target() == link) {
633	if (add_warnings) add_warning(strf("First Link to '%s' was found here.", ex->Target().c_str()), ex->SourceLineno());
634	throw strf("Link to '%s' duplicated here.", link.c_str());
635	}
636	}
637	}
638	inline void check_duplicates(const string& link, const Links& uplinks, const Links& references, bool add_warnings) {
639	check_specific_duplicates(link, uplinks, add_warnings);
640	check_specific_duplicates(link, references, add_warnings);
641	}
642
643	static void warnAboutDuplicate(SectionList& sections) {
644	set<string> seen;
645	SectionList::iterator end = sections.end();
646	for (SectionList::iterator s = sections.begin(); s != end; ++s) {
647	const string& sname = s->getName();
648	if (sname == "NOTES") continue; // do not warn about multiple NOTES sections
649
650	SectionList::iterator o = s; ++o;
651	for (; o != end; ++o) {
652	if (sname == o->getName()) {
653	o->attach_warning("duplicated SECTION name");
654	if (seen.find(sname) == seen.end()) {
655	s->attach_warning("name was first used");
656	seen.insert(sname);
657	}
658	}
659	}
660	}
661	}
662
663	void Helpfile::readHelp(istream& in, const string& filename) {
664	if (!in.good()) throw unattached_message(strf("Can't read from '%s'", filename.c_str()));
665
666	Reader read(in);
667
668	inputfile = filename; // remember file read (for comment)
669
670	const char *line;
671	const char *name_only = strrchr(filename.c_str(), '/');
672
673	h2x_assert(name_only);
674	++name_only;
675
676	try {
677	while (1) {
678	line = read.getNext();
679	if (!line) break;
680
681	if (isEmptyOrComment(line)) {
682	check_TODO(line, read);
683	continue;
684	}
685
686	check_TODO(line, read);
687
688	string keyword;
689	const char *rest = extractKeyword(line, keyword);
690
691	if (rest) { // found a keyword
692	if (keyword == "UP") {
693	rest = eatWhite(rest);
694	if (strlen(rest)) {
695	check_duplicates(rest, uplinks, references, true);
696	if (strcmp(name_only, rest) == 0) throw "UP link to self";
697
698	uplinks.push_back(Link(rest, read.getLineNo()));
699	}
700	}
701	else if (keyword == "SUB") {
702	rest = eatWhite(rest);
703	if (strlen(rest)) {
704	check_duplicates(rest, uplinks, references, true);
705	if (strcmp(name_only, rest) == 0) throw "SUB link to self";
706
707	references.push_back(Link(rest, read.getLineNo()));
708	}
709	}
710	else if (keyword == "TITLE") {
711	rest = eatWhite(rest);
712	parseSection(title, rest, 0, read);
713
714	if (title.Content().empty()) throw "empty TITLE not allowed";
715
716	const char *t = title.Content().front().c_str();
717
718	if (strstr(t, "Standard help file form") != 0) {
719	throw strf("Illegal title for help file: '%s'", t);
720	}
721	}
722	else {
723	if (keyword == "NOTE") keyword = "NOTES";
724	if (keyword == "EXAMPLE") keyword = "EXAMPLES";
725	if (keyword == "WARNING") keyword = "WARNINGS";
726
727	SectionType stype = SEC_NONE;
728	int idx;
729	for (idx = 0; idx<KNOWN_SECTION_TYPES; ++idx) {
730	if (knownSections[idx] == keyword) {
731	stype = SectionType(idx);
732	break;
733	}
734	}
735
736	size_t lineno = read.getLineNo();
737
738	if (idx >= KNOWN_SECTION_TYPES) throw strf("unknown keyword '%s'", keyword.c_str());
739
740	if (stype == SEC_SECTION) {
741	string section_name = eatWhite(rest);
742	Section sec(section_name, stype, lineno);
743	parseSection(sec, "", 0, read);
744	sections.push_back(sec);
745	}
746	else {
747	Section sec(keyword, stype, lineno);
748	rest = eatWhite(rest);
749	parseSection(sec, rest, rest-line, read);
750	sections.push_back(sec);
751	}
752	}
753	}
754	else {
755	throw strf("Unhandled line");
756	}
757	}
758
759	warnAboutDuplicate(sections);
760	}
761	catch (string& err) { throw read.attached_message(err); }
762	catch (const char *err) { throw read.attached_message(err); }
763	}
764
765	static bool shouldReflow(const string& s, int& foundIndentation) {
766	// foundIndentation is only valid if shouldReflow() returns true
767	enum { START, CHAR, SPACE, MULTIPLE, DOT, DOTSPACE } state = START;
768	bool equal_indent = true;
769	int lastIndent = -1;
770	int thisIndent = 0;
771
772	for (string::const_iterator c = s.begin(); c != s.end(); ++c, ++thisIndent) {
773	if (*c == '\n') {
774	state = START;
775	thisIndent = 0;
776	}
777	else if (isWhite(*c)) {
778	if (state == DOT \|\| state == DOTSPACE) state = DOTSPACE; // multiple spaces after DOT are allowed
779	else if (state == SPACE) state = MULTIPLE; // now seen multiple spaces
780	else if (state == CHAR) state = SPACE; // now seen 1 space
781	}
782	else {
783	if (state == MULTIPLE) return false; // character after multiple spaces
784	if (state == START) {
785	if (lastIndent == -1) lastIndent = thisIndent;
786	else if (lastIndent != thisIndent) equal_indent = false;
787	}
788	state = (c == '.' \|\| c == ',') ? DOT : CHAR;
789	}
790	}
791
792	if (lastIndent<0) {
793	equal_indent = false;
794	}
795
796	if (equal_indent) {
797	foundIndentation = lastIndent-1;
798	h2x_assert(foundIndentation >= 0);
799	}
800	return equal_indent;
801	}
802
803	static string correctSpaces(const string& text, int change) {
804	h2x_assert(text.find('\n') == string::npos);
805
806	if (!change) return text;
807
808	size_t first = text.find_first_not_of(' ');
809	if (first == string::npos) return ""; // empty line
810
811	if (change<0) {
812	int remove = -change;
813	h2x_assert(remove <= int(first));
814	return text.substr(remove);
815	}
816
817	h2x_assert(change>0); // add spaces
818	return string(change, ' ')+text;
819	}
820
821	static string correctIndentation(const string& text, int change) {
822	// removes 'remove' spaces from every line
823
824	size_t this_lineend = text.find('\n');
825	string result;
826
827	if (this_lineend == string::npos) {
828	result = correctSpaces(text, change);
829	}
830	else {
831	result = correctSpaces(text.substr(0, this_lineend), change);
832
833	while (this_lineend != string::npos) {
834	size_t next_lineend = text.find('\n', this_lineend+1);
835	if (next_lineend == string::npos) { // last line
836	result = result+"\n"+correctSpaces(text.substr(this_lineend+1), change);
837	}
838	else {
839	result = result+"\n"+correctSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1), change);
840	}
841	this_lineend = next_lineend;
842	}
843	}
844	return result;
845	}
846
847	inline size_t countSpaces(const string& text) {
848	size_t first = text.find_first_not_of(' ');
849	if (first == string::npos) return INT_MAX; // empty line
850	return first;
851	}
852
853	static size_t scanMinIndentation(const string& text) {
854	size_t this_lineend = text.find('\n');
855	size_t min_indent = INT_MAX;
856
857	if (this_lineend == string::npos) {
858	min_indent = countSpaces(text);
859	}
860	else {
861	while (this_lineend != string::npos) {
862	size_t next_lineend = text.find('\n', this_lineend+1);
863	if (next_lineend == string::npos) {
864	min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1)));
865	}
866	else {
867	min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1)));
868	}
869	this_lineend = next_lineend;
870	}
871	}
872
873	if (min_indent == INT_MAX) min_indent = 0; // only empty lines
874	return min_indent;
875	}
876
877	// -----------------------------
878	// class ParagraphTree
879
880	class ParagraphTree : public MessageAttachable, virtual Noncopyable {
881	ParagraphTree *brother; // has same indentation as this
882	ParagraphTree *son; // indentation + 1
883
884	Ostring otext; // text of the Section (containing linefeeds)
885
886	bool reflow; // should the paragraph be reflown ? (true if indentation is equal for all lines of text)
887	int indentation; // the real indentation of the blank (behind removed enumeration)
888
889
890	string location_description() const OVERRIDE { return "in paragraph starting here"; }
891	size_t line_number() const OVERRIDE { return otext.get_lineno(); }
892
893	ParagraphTree(Ostrings::const_iterator begin, const Ostrings::const_iterator end)
894	: son(NULL),
895	otext(*begin),
896	indentation(0)
897	{
898	h2x_assert(begin != end);
899
900	string& text = otext;
901
902	reflow = shouldReflow(text, indentation);
903	if (!reflow) {
904	size_t reststart = text.find('\n', 1);
905
906	if (reststart == 0) {
907	attach_warning("[internal] Paragraph starts with LF -> reflow calculation will probably fail");
908	}
909
910	if (reststart != string::npos) {
911	int rest_indent = -1;
912	string rest = text.substr(reststart);
913	bool rest_reflow = shouldReflow(rest, rest_indent);
914
915	if (rest_reflow) {
916	int first_indent = countSpaces(text.substr(1));
917	if (get_type() == PLAIN_TEXT) {
918	size_t last = text.find_last_not_of(' ', reststart-1);
919	bool is_header = last != string::npos && text[last] == ':';
920
921	if (!is_header && rest_indent == (first_indent+8)) {
922	#if defined(DEBUG)
923	size_t textstart = text.find_first_not_of(" \n");
924	h2x_assert(textstart != string::npos);
925	#endif // DEBUG
926
927	text = text.substr(0, reststart)+correctIndentation(rest, -8);
928	reflow = shouldReflow(text, indentation);
929	}
930	}
931	else {
932	int diff = rest_indent-first_indent;
933	if (diff>0) {
934	text = text.substr(0, reststart)+correctIndentation(rest, -diff);
935	reflow = shouldReflow(text, indentation);
936	}
937	else if (diff<0) {
938	// paragraph with more indent on first line (occurs?)
939	attach_warning(strf("[internal] unhandled: more indentation on the 1st line (diff=%i)", diff));
940	}
941	}
942	}
943	}
944	}
945
946	if (!reflow) {
947	indentation = scanMinIndentation(text);
948	}
949	text = correctIndentation(text, -indentation);
950	if (get_type() == ITEM) {
951	h2x_assert(indentation >= 2);
952	indentation -= 2;
953	}
954
955	brother = buildParagraphTree(++begin, end);
956	}
957
958	void brothers_to_sons(ParagraphTree *new_brother);
959
960	public:
961	virtual ~ParagraphTree() {
962	delete brother;
963	delete son;
964	}
965
966	ParagraphType get_type() const { return otext.get_type(); }
967
968	bool is_itemlist_member() const { return get_type() == ITEM; }
969	unsigned get_enumeration() const { return get_type() == ENUMERATED ? otext.get_number() : 0; }
970	EnumerationType get_enum_type() const { return otext.get_enum_type(); }
971
972	const char *readable_type() const {
973	const char *res = NULL;
974	switch (get_type()) {
975	case PLAIN_TEXT: res = "PLAIN_TEXT"; break;
976	case ITEM: res = "ITEM"; break;
977	case ENUMERATED: res = "ENUMERATED"; break;
978	}
979	return res;
980	}
981
982	size_t countTextNodes() {
983	size_t nodes = 1; // this
984	if (son) nodes += son->countTextNodes();
985	if (brother) nodes += brother->countTextNodes();
986	return nodes;
987	}
988
989	#if defined(DUMP_PARAGRAPHS)
990	void print_indent(ostream& out, int indent) { while (indent-->0) out << ' '; }
991	char masknl(const char text) {
992	char *result = strdup(text);
993	for (int i = 0; result[i]; ++i) {
994	if (result[i] == '\n') result[i] = '\|';
995	}
996	return result;
997	}
998	void dump(ostream& out, int indent = 0) {
999	print_indent(out, indent+1);
1000	{
1001	char *mtext = masknl(otext.as_string().c_str());
1002	out << "text='" << mtext << "'\n";
1003	free(mtext);
1004	}
1005
1006	print_indent(out, indent+1);
1007	out << "type='" << readable_type() << "' ";
1008	if (get_type() == ENUMERATED) {
1009	out << "enumeration='" << otext.get_number() << "' ";
1010	}
1011	out << "reflow='" << reflow << "' ";
1012	out << "indentation='" << indentation << "'\n";
1013
1014	if (son) {
1015	print_indent(out, indent+2); cout << "son:\n";
1016	son->dump(out, indent+2);
1017	cout << "\n";
1018	}
1019	if (brother) {
1020	print_indent(out, indent); cout << "brother:\n";
1021	brother->dump(out, indent);
1022	}
1023	}
1024	#endif // DUMP_PARAGRAPHS
1025
1026	private:
1027	static ParagraphTree* buildParagraphTree(Ostrings::const_iterator begin, const Ostrings::const_iterator end) {
1028	if (begin == end) return 0;
1029	return new ParagraphTree(begin, end);
1030	}
1031	public:
1032	static ParagraphTree* buildParagraphTree(const Section& sec) {
1033	const Ostrings& txt = sec.Content();
1034	if (txt.empty()) throw "attempt to build an empty ParagraphTree";
1035	return buildParagraphTree(txt.begin(), txt.end());
1036	}
1037
1038	bool contains(ParagraphTree *that) {
1039	return
1040	this == that \|\|
1041	(son && son->contains(that)) \|\|
1042	(brother && brother->contains(that));
1043	}
1044
1045	ParagraphTree predecessor(ParagraphTree before_this) {
1046	if (brother == before_this) return this;
1047	if (!brother) return 0;
1048	return brother->predecessor(before_this);
1049	}
1050
1051	void append(ParagraphTree *new_brother) {
1052	if (!brother) brother = new_brother;
1053	else brother->append(new_brother);
1054	}
1055
1056	bool is_some_brother(const ParagraphTree *other) const {
1057	return (other == brother) \|\| (brother && brother->is_some_brother(other));
1058	}
1059
1060	ParagraphTree* takeAllInFrontOf(ParagraphTree *after) {
1061	ParagraphTree *removed = this;
1062	ParagraphTree *after_pred = this;
1063
1064	h2x_assert(is_some_brother(after));
1065
1066	while (1) {
1067	h2x_assert(after_pred);
1068	h2x_assert(after_pred->brother); // takeAllInFrontOf called with non-existing 'after'
1069
1070	if (after_pred->brother == after) { // found after
1071	after_pred->brother = 0; // unlink
1072	break;
1073	}
1074	after_pred = after_pred->brother;
1075	}
1076
1077	return removed;
1078	}
1079
1080	ParagraphTree *firstListMember() {
1081	switch (get_type()) {
1082	case PLAIN_TEXT: break;
1083	case ITEM: return this;
1084	case ENUMERATED: {
1085	if (get_enumeration() == 1) return this;
1086	break;
1087	}
1088	}
1089	if (brother) return brother->firstListMember();
1090	return NULL;
1091	}
1092
1093	ParagraphTree *nextListMemberAfter(const ParagraphTree& previous) {
1094	if (indentation<previous.indentation) return NULL;
1095	if (indentation == previous.indentation && get_type() == previous.get_type()) {
1096	if (get_type() != ENUMERATED) return this;
1097	if (get_enumeration() > previous.get_enumeration()) return this;
1098	return NULL;
1099	}
1100	if (!brother) return NULL;
1101	return brother->nextListMemberAfter(previous);
1102	}
1103	ParagraphTree *nextListMember() const {
1104	return brother ? brother->nextListMemberAfter(*this) : NULL;
1105	}
1106
1107	ParagraphTree* firstWithLessIndentThan(int wanted_indentation) {
1108	if (indentation < wanted_indentation) return this;
1109	if (!brother) return 0;
1110	return brother->firstWithLessIndentThan(wanted_indentation);
1111	}
1112
1113	void format_indentations();
1114	void format_lists();
1115
1116	private:
1117	static ParagraphTree* buildNewParagraph(const string& Text, size_t beginLineNo, ParagraphType type) {
1118	Ostrings S;
1119	S.push_back(Ostring(Text, beginLineNo, type));
1120	return new ParagraphTree(S.begin(), S.end());
1121	}
1122	ParagraphTree *xml_write_list_contents();
1123	ParagraphTree *xml_write_enum_contents();
1124	void xml_write_textblock();
1125
1126	public:
1127	void xml_write();
1128	};
1129
1130	#if defined(DUMP_PARAGRAPHS)
1131	static void dump_paragraph(ParagraphTree *para) {
1132	// helper function for use in gdb
1133	para->dump(cout, 0);
1134	}
1135	#endif
1136
1137	void ParagraphTree::brothers_to_sons(ParagraphTree *new_brother) {
1138	/*! folds down brothers to sons
1139	* @param new_brother brother of 'this->brother', will become new brother.
1140	* If new_brother == NULL -> make all brothers sons.
1141	*/
1142
1143	if (new_brother) {
1144	h2x_assert(is_some_brother(new_brother));
1145
1146	if (brother != new_brother) {
1147	#if defined(DEBUG)
1148	if (son) {
1149	son->attach_warning("Found unexpected son (in brothers_to_sons)");
1150	brother->attach_warning("while trying to transform paragraphs from here ..");
1151	new_brother->attach_warning(".. to here ..");
1152	attach_warning(".. into sons of this paragraph.");
1153	return;
1154	}
1155	#endif
1156
1157	h2x_assert(!son);
1158	h2x_assert(brother);
1159
1160	if (new_brother == NULL) { // all brothers -> sons
1161	son = brother;
1162	brother = NULL;
1163	}
1164	else {
1165	son = brother->takeAllInFrontOf(new_brother);
1166	brother = new_brother;
1167	}
1168	}
1169	}
1170	else {
1171	h2x_assert(!son);
1172	son = brother;
1173	brother = NULL;
1174	}
1175	}
1176	void ParagraphTree::format_lists() {
1177	// reformats tree such that all items/enumerations are brothers
1178	ParagraphTree *member = firstListMember();
1179	if (member) {
1180	for (ParagraphTree *curr = this; curr != member; curr = curr->brother) {
1181	h2x_assert(curr);
1182	if (curr->son) curr->son->format_lists();
1183	}
1184
1185	for (ParagraphTree *next = member->nextListMember();
1186	next;
1187	member = next, next = member->nextListMember())
1188	{
1189	member->brothers_to_sons(next);
1190	h2x_assert(member->brother == next);
1191
1192	if (member->son) member->son->format_lists();
1193	}
1194
1195	h2x_assert(!member->son); // member is the last item
1196
1197	if (member->brother) {
1198	ParagraphTree *non_member = member->brother->firstWithLessIndentThan(member->indentation+1);
1199	member->brothers_to_sons(non_member);
1200	}
1201
1202	if (member->son) member->son->format_lists();
1203	if (member->brother) member->brother->format_lists();
1204	}
1205	else {
1206	for (ParagraphTree *curr = this; curr; curr = curr->brother) {
1207	h2x_assert(curr);
1208	if (curr->son) curr->son->format_lists();
1209	}
1210	}
1211	}
1212
1213	void ParagraphTree::format_indentations() {
1214	if (brother) {
1215	ParagraphTree *same_indent = brother->firstWithLessIndentThan(indentation+1);
1216	#if defined(WARN_POSSIBLY_WRONG_INDENTATION_CORRECTION)
1217	if (same_indent && indentation != same_indent->indentation) {
1218	same_indent->attach_warning("indentation is assumed to be same as ..");
1219	attach_warning(".. here");
1220	}
1221	#endif
1222	brothers_to_sons(same_indent); // if same_indent==NULL -> make all brothers childs
1223	if (brother) brother->format_indentations();
1224	}
1225
1226	if (son) son->format_indentations();
1227	}
1228
1229	// -----------------
1230	// LinkType
1231
1232	enum LinkType {
1233	LT_UNKNOWN = 0,
1234	LT_HTTP = 1,
1235	LT_FTP = 2,
1236	LT_FILE = 4,
1237	LT_EMAIL = 8,
1238	LT_HLP = 16,
1239	LT_PS = 32,
1240	LT_PDF = 64
1241	};
1242
1243	static const char *link_id[] = {
1244	"unknown",
1245	"www",
1246	"www",
1247	"www",
1248	"email",
1249	"hlp",
1250	"ps",
1251	"pdf",
1252	};
1253
1254	static string LinkType2id(LinkType type) {
1255	int idx = 0;
1256	while (type >= 1) {
1257	idx++;
1258	type = LinkType(type>>1);
1259	}
1260	return link_id[idx];
1261	}
1262
1263	inline const char *getExtension(const string& name) {
1264	size_t last_dot = name.find_last_of('.');
1265	if (last_dot == string::npos) {
1266	return NULL;
1267	}
1268	return name.c_str()+last_dot+1;
1269	}
1270
1271	static LinkType detectLinkType(const string& link_target) {
1272	LinkType type = LT_UNKNOWN;
1273	const char *ext = getExtension(link_target);
1274
1275	if (ext && strcasecmp(ext, "hlp") == 0) type = LT_HLP;
1276	else if (link_target.find("http://") == 0) type = LT_HTTP;
1277	else if (link_target.find("ftp://") == 0) type = LT_FTP;
1278	else if (link_target.find("file://") == 0) type = LT_FILE;
1279	else if (link_target.find('@') != string::npos) type = LT_EMAIL;
1280	else if (ext && strcasecmp(ext, "ps") == 0) type = LT_PS;
1281	else if (ext && strcasecmp(ext, "pdf") == 0) type = LT_PDF;
1282
1283	return type;
1284	}
1285
1286	// --------------------------------------------------------------------------------
1287
1288
1289
1290	static string locate_helpfile(const string& helpname) {
1291	// search for 'helpname' in various helpfile locations
1292
1293	#define PATHS 2
1294	static string path[PATHS] = { "oldhelp/", "genhelp/" };
1295	struct stat st;
1296
1297	for (size_t p = 0; p<PATHS; p++) {
1298	string fullname = path[p]+helpname;
1299	if (stat(fullname.c_str(), &st) == 0) {
1300	return fullname;
1301	}
1302	}
1303	return "";
1304	#undef PATHS
1305	}
1306
1307	static string locate_document(const string& docname) {
1308	// search for 'docname' or 'docname.gz' in various helpfile locations
1309
1310	string located = locate_helpfile(docname);
1311	if (located.empty()) {
1312	located = locate_helpfile(docname+".gz");
1313	}
1314	return located;
1315	}
1316
1317	static void add_link_attributes(XML_Tag& link, LinkType type, const string& dest, size_t source_line) {
1318	if (type == LT_UNKNOWN) {
1319	string msg = string("Invalid link (dest='")+dest+"')";
1320	throw LineAttachedMessage(msg, source_line);
1321	}
1322
1323	link.add_attribute("dest", dest);
1324	link.add_attribute("type", LinkType2id(type));
1325	link.add_attribute("source_line", source_line);
1326
1327	if (type&(LT_HLP\|LT_PDF\|LT_PS)) { // other links (www, email) cannot be checked for existence here
1328	string fullhelp = ((type&LT_HLP) ? locate_helpfile : locate_document)(dest);
1329	if (fullhelp.empty()) {
1330	link.add_attribute("missing", "1");
1331	string deadlink = strf("Dead link to '%s'", dest.c_str());
1332	#if defined(DEVEL_RELEASE)
1333	throw LineAttachedMessage(deadlink, source_line);
1334	#else // !defined(DEVEL_RELEASE)
1335	add_warning(deadlink, source_line);
1336	#endif
1337	}
1338	}
1339	}
1340
1341	static void print_XML_Text_expanding_links(const string& text, size_t lineNo) {
1342	size_t found = text.find("LINK{", 0);
1343	if (found != string::npos) {
1344	size_t inside_link = found+5;
1345	size_t close = text.find('}', inside_link);
1346
1347	if (close == string::npos) throw "unclosed 'LINK{}'";
1348
1349	string link_target = text.substr(inside_link, close-inside_link);
1350	LinkType type = detectLinkType(link_target);
1351	string dest = link_target;
1352
1353	XML_Text(text.substr(0, found));
1354
1355	{
1356	XML_Tag link("LINK");
1357	link.set_on_extra_line(false);
1358	add_link_attributes(link, type, dest, lineNo);
1359	}
1360
1361	print_XML_Text_expanding_links(text.substr(close+1), lineNo);
1362	}
1363	else {
1364	XML_Text t(text);
1365	}
1366	}
1367
1368	void ParagraphTree::xml_write_textblock() {
1369	XML_Tag textblock("T");
1370	textblock.add_attribute("reflow", reflow ? "1" : "0");
1371
1372	{
1373	string usedText;
1374	const string& text = otext;
1375	if (reflow) {
1376	usedText = correctIndentation(text, (textblock.Indent()+1) * the_XML_Document->indentation_per_level);
1377	}
1378	else {
1379	usedText = text;
1380	}
1381	print_XML_Text_expanding_links(usedText, otext.get_lineno());
1382	}
1383	}
1384
1385	ParagraphTree *ParagraphTree::xml_write_list_contents() {
1386	h2x_assert(is_itemlist_member());
1387	#if defined(WARN_FIXED_LAYOUT_LIST_ELEMENTS)
1388	if (!reflow) attach_warning("ITEM not reflown (check output)");
1389	#endif
1390	{
1391	XML_Tag entry("ENTRY");
1392	entry.add_attribute("item", "1");
1393	xml_write_textblock();
1394	if (son) son->xml_write();
1395	}
1396	if (brother && brother->is_itemlist_member()) {
1397	return brother->xml_write_list_contents();
1398	}
1399	return brother;
1400	}
1401	ParagraphTree *ParagraphTree::xml_write_enum_contents() {
1402	h2x_assert(get_enumeration());
1403	#if defined(WARN_FIXED_LAYOUT_LIST_ELEMENTS)
1404	if (!reflow) attach_warning("ENUMERATED not reflown (check output)");
1405	#endif
1406	{
1407	XML_Tag entry("ENTRY");
1408	switch (get_enum_type()) {
1409	case DIGITS:
1410	entry.add_attribute("enumerated", strf("%i", get_enumeration()));
1411	break;
1412	case ALPHA_UPPER:
1413	entry.add_attribute("enumerated", strf("%c", 'A'-1+get_enumeration()));
1414	break;
1415	case ALPHA_LOWER:
1416	entry.add_attribute("enumerated", strf("%c", 'a'-1+get_enumeration()));
1417	break;
1418	default:
1419	h2x_assert(0);
1420	break;
1421	}
1422	xml_write_textblock();
1423	if (son) son->xml_write();
1424	}
1425	if (brother && brother->get_enumeration()) {
1426	int diff = brother->get_enumeration()-get_enumeration();
1427	if (diff != 1) {
1428	attach_warning("Non-consecutive enumeration detected between here..");
1429	brother->attach_warning(".. and here");
1430	}
1431	return brother->xml_write_enum_contents();
1432	}
1433	return brother;
1434	}
1435
1436	void ParagraphTree::xml_write() {
1437	try {
1438	ParagraphTree *next = NULL;
1439	if (get_enumeration()) {
1440	XML_Tag enu("ENUM");
1441	if (get_enumeration() != 1) {
1442	attach_warning(strf("First enum starts with '%u.' (maybe previous enum was not detected)", get_enumeration()));
1443	}
1444	next = xml_write_enum_contents();
1445	#if defined(WARN_LONESOME_ENUM_ELEMENTS)
1446	if (next == brother) attach_warning("Suspicious single-element-ENUM");
1447	#endif
1448	}
1449	else if (is_itemlist_member()) {
1450	XML_Tag list("LIST");
1451	next = xml_write_list_contents();
1452	#if defined(WARN_LONESOME_LIST_ELEMENTS)
1453	if (next == brother) attach_warning("Suspicious single-element-LIST");
1454	#endif
1455	}
1456	else {
1457	{
1458	XML_Tag para("P");
1459	xml_write_textblock();
1460	if (son) son->xml_write();
1461	}
1462	next = brother;
1463	}
1464	if (next) next->xml_write();
1465	}
1466	catch (string& err) { throw attached_message(err); }
1467	catch (const char *err) { throw attached_message(err); }
1468	}
1469
1470	static void create_top_links(const Links& links, const char *tag) {
1471	for (Links::const_iterator s = links.begin(); s != links.end(); ++s) {
1472	XML_Tag link(tag);
1473	add_link_attributes(link, detectLinkType(s->Target()), s->Target(), s->SourceLineno());
1474	}
1475	}
1476
1477	void Helpfile::writeXML(FILE *out, const string& page_name) {
1478	XML_Document xml("PAGE", "arb_help.dtd", out);
1479
1480	xml.skip_empty_tags = true;
1481	xml.indentation_per_level = 2;
1482
1483	xml.getRoot().add_attribute("name", page_name);
1484	#if defined(DEBUG)
1485	xml.getRoot().add_attribute("edit_warning", "devel"); // inserts a edit warning into development version
1486	#else
1487	xml.getRoot().add_attribute("edit_warning", "release"); // inserts a different edit warning into release version
1488	#endif // DEBUG
1489
1490	xml.getRoot().add_attribute("source", inputfile.c_str());
1491
1492	{
1493	XML_Comment(string("automatically generated from ../")+inputfile+' ');
1494	}
1495
1496	create_top_links(uplinks, "UP");
1497	create_top_links(references, "SUB");
1498	create_top_links(auto_references, "SUB");
1499
1500	{
1501	XML_Tag title_tag("TITLE");
1502	const Ostrings& T = title.Content();
1503	for (Ostrings::const_iterator s = T.begin(); s != T.end(); ++s) {
1504	if (s != T.begin()) { XML_Text text("\n"); }
1505	XML_Text text(*s);
1506	}
1507	}
1508
1509	for (SectionList::const_iterator sec = sections.begin(); sec != sections.end(); ++sec) {
1510	try {
1511	XML_Tag section_tag("SECTION");
1512	section_tag.add_attribute("name", sec->getName());
1513
1514	ParagraphTree ptree = ParagraphTree::buildParagraphTree(sec);
1515
1516	#if defined(DEBUG)
1517	size_t textnodes = ptree->countTextNodes();
1518	#endif
1519	#if defined(DUMP_PARAGRAPHS)
1520	cout << "Dump of section '" << sec->getName() << "' (before format_lists):\n";
1521	ptree->dump(cout);
1522	cout << "----------------------------------------\n";
1523	#endif
1524
1525	ptree->format_lists();
1526
1527	#if defined(DUMP_PARAGRAPHS)
1528	cout << "Dump of section '" << sec->getName() << "' (after format_lists):\n";
1529	ptree->dump(cout);
1530	cout << "----------------------------------------\n";
1531	#endif
1532	#if defined(DEBUG)
1533	size_t textnodes2 = ptree->countTextNodes();
1534	h2x_assert(textnodes2 == textnodes); // if this occurs format_lists has an error
1535	#endif
1536
1537	ptree->format_indentations();
1538
1539	#if defined(DUMP_PARAGRAPHS)
1540	cout << "Dump of section '" << sec->getName() << "' (after format_indentations):\n";
1541	ptree->dump(cout);
1542	cout << "----------------------------------------\n";
1543	#endif
1544	#if defined(DEBUG)
1545	size_t textnodes3 = ptree->countTextNodes();
1546	h2x_assert(textnodes3 == textnodes2); // if this occurs format_indentations has an error
1547	#endif
1548
1549	ptree->xml_write();
1550
1551	delete ptree;
1552	}
1553	catch (string& err) { throw sec->attached_message(err); }
1554	catch (const char *err) { throw sec->attached_message(err); }
1555	}
1556	}
1557
1558	void Helpfile::extractInternalLinks() {
1559	for (SectionList::const_iterator sec = sections.begin(); sec != sections.end(); ++sec) {
1560	try {
1561	const Ostrings& s = sec->Content();
1562
1563	for (Ostrings::const_iterator li = s.begin(); li != s.end(); ++li) {
1564	const string& line = *li;
1565	size_t start = 0;
1566
1567	while (1) {
1568	size_t found = line.find("LINK{", start);
1569	if (found == string::npos) break;
1570	found += 5;
1571	size_t close = line.find('}', found);
1572	if (close == string::npos) break;
1573
1574	string link_target = line.substr(found, close-found);
1575
1576	if (link_target.find("http://") == string::npos &&
1577	link_target.find("ftp://") == string::npos &&
1578	link_target.find("file://") == string::npos &&
1579	link_target.find('@') == string::npos)
1580	{
1581	check_self_ref(link_target);
1582
1583	try {
1584	check_specific_duplicates(link_target, references, false); // check only sublinks here
1585	check_specific_duplicates(link_target, uplinks, false); // check only uplinks here
1586	check_specific_duplicates(link_target, auto_references, false); // check only sublinks here
1587
1588	// only auto-add inline reference if none of the above checks has thrown
1589	auto_references.push_back(Link(link_target, sec->line_number()));
1590	}
1591	catch (string& err) {
1592	; // silently ignore inlined
1593	}
1594	}
1595	start = close+1;
1596	}
1597	}
1598	}
1599	catch (string& err) {
1600	throw sec->attached_message("'"+err+"' while scanning LINK{}");
1601	}
1602	}
1603	}
1604
1605	static void show_err(const string& err, size_t lineno, const string& helpfile) {
1606	if (err.find(helpfile+':') != string::npos) {
1607	cerr << err;
1608	}
1609	else if (lineno == NO_LINENUMBER_INFO) {
1610	cerr << helpfile << ":1: [in unknown line] " << err;
1611	}
1612	else {
1613	cerr << helpfile << ":" << lineno << ": " << err;
1614	}
1615	cerr << '\n';
1616	}
1617	inline void show_err(const LineAttachedMessage& line_err, const string& helpfile) {
1618	show_err(line_err.Message(), line_err.Lineno(), helpfile);
1619	}
1620	inline void show_warning(const LineAttachedMessage& line_err, const string& helpfile) {
1621	show_err(string("Warning: ")+line_err.Message(), line_err.Lineno(), helpfile);
1622	}
1623	inline void show_warnings(const string& helpfile) {
1624	for (list<LineAttachedMessage>::const_iterator wi = warnings.begin(); wi != warnings.end(); ++wi) {
1625	show_warning(*wi, helpfile);
1626	}
1627	}
1628	static void show_error_and_warnings(const LineAttachedMessage& error, const string& helpfile) {
1629	show_err(error, helpfile);
1630	show_warnings(helpfile);
1631	}
1632
1633	int ARB_main(int argc, char *argv[]) {
1634	if (argc != 3) {
1635	cerr << "Usage: arb_help2xml <ARB helpfile> <XML output>\n";
1636	return EXIT_FAILURE;
1637	}
1638
1639	Helpfile help;
1640	string arb_help;
1641
1642	try {
1643	try {
1644	arb_help = argv[1];
1645	string xml_output = argv[2];
1646
1647	{
1648	ifstream in(arb_help.c_str());
1649	help.readHelp(in, arb_help);
1650	}
1651
1652	help.extractInternalLinks();
1653
1654	{
1655	FILE *out = std::fopen(xml_output.c_str(), "wt");
1656	if (!out) throw string("Can't open '")+xml_output+'\'';
1657
1658	try {
1659	// arb_help contains 'oldhelp/name.hlp'
1660	size_t slash = arb_help.find('/');
1661	size_t dot = arb_help.find_last_of('.');
1662
1663	if (slash == string::npos \|\| dot == string::npos) {
1664	throw string("parameter <ARB helpfile> has to be in format 'oldhelp/name.hlp' (not '"+arb_help+"')");
1665	}
1666
1667	string page_name(arb_help, slash+1, dot-slash-1);
1668	help.writeXML(out, page_name);
1669	fclose(out);
1670	}
1671	catch (...) {
1672	fclose(out);
1673	remove(xml_output.c_str());
1674	throw;
1675	}
1676	}
1677
1678	show_warnings(arb_help);
1679
1680	return EXIT_SUCCESS;
1681	}
1682	catch (string& err) { throw unattached_message(err); }
1683	catch (const char * err) { throw unattached_message(err); }
1684	catch (LineAttachedMessage& err) { throw; }
1685	catch (...) { throw unattached_message("unknown exception in arb_help2xml"); }
1686	}
1687	catch (LineAttachedMessage& err) { show_error_and_warnings(err, arb_help); }
1688	catch (...) { h2x_assert(0); }
1689
1690	return EXIT_FAILURE;
1691	}
1692
1693	// --------------------------------------------------------------------------------
1694
1695	#ifdef UNIT_TESTS
1696	#include <test_unit.h>
1697	#include <arb_msg.h>
1698
1699	static arb_test::match_expectation help_file_compiles(const char helpfile, const char expected_title, const char *expected_error_part) {
1700	using namespace arb_test;
1701	expectation_group expected;
1702
1703	ifstream in(helpfile);
1704
1705	LineAttachedMessage *error = NULL;
1706
1707	Helpfile help;
1708	try {
1709	help.readHelp(in, helpfile);
1710	help.extractInternalLinks();
1711
1712	FILE *devnul = fopen("/dev/null", "wt");
1713	if (!devnul) throw unattached_message("can't write to null device");
1714	help.writeXML(devnul, "dummy");
1715	fclose(devnul);
1716	}
1717	catch (LineAttachedMessage& err) { error = new LineAttachedMessage(err); }
1718	catch (...) { error = new LineAttachedMessage(unattached_message("unknown exception")); }
1719
1720	if (expected_error_part) {
1721	expected.add(that(error).does_differ_from_NULL());
1722	if (error) expected.add(that(error->Message()).does_contain(expected_error_part));
1723	}
1724	else {
1725	expected.add(that(error).is_equal_to_NULL());
1726	if (!error) {
1727	Section title = help.get_title();
1728	const Ostrings& title_strings = title.Content();
1729
1730	expected.add(that(title_strings.front().as_string()).is_equal_to(expected_title));
1731	expected.add(that(title_strings.size()).is_equal_to(1));
1732	}
1733	else {
1734	show_error_and_warnings(*error, helpfile);
1735	}
1736	}
1737
1738	delete error;
1739
1740	return all().ofgroup(expected);
1741	}
1742
1743	#define HELP_FILE_COMPILES(name,expTitle) TEST_EXPECTATION(help_file_compiles(name,expTitle,NULL))
1744	#define HELP_FILE_COMPILE_ERROR(name,expError) TEST_EXPECTATION(help_file_compiles(name,NULL,expError))
1745
1746	void TEST_hlp2xml_conversion() {
1747	TEST_EXPECT_ZERO(chdir("../../HELP_SOURCE"));
1748
1749	HELP_FILE_COMPILES("genhelp/agde_treepuzzle.hlp", "treepuzzle"); // genhelp/agde_treepuzzle.hlp
1750
1751	HELP_FILE_COMPILES("oldhelp/markbyref.hlp", "Mark by reference"); // oldhelp/markbyref.hlp
1752	HELP_FILE_COMPILES("oldhelp/ad_align.hlp", "Alignment Administration"); // oldhelp/ad_align.hlp
1753	HELP_FILE_COMPILES("genhelp/copyright.hlp", "Copyrights"); // genhelp/copyright.hlp
1754
1755	HELP_FILE_COMPILE_ERROR("akjsdlkad.hlp", "Can't read from"); // no such file
1756	}
1757	TEST_PUBLISH(TEST_hlp2xml_conversion);
1758
1759
1760	// #define TEST_AUTO_UPDATE // uncomment to update expected xml // @@@ comment-out!
1761
1762	void TEST_hlp2xml_output() {
1763	string tested_helpfile[] = {
1764	"unittest"
1765	};
1766
1767	string HELP_SOURCE = "../../HELP_SOURCE/";
1768	string LIB = "../../lib/";
1769	string EXPECTED = "help/";
1770
1771	for (size_t i = 0; i<ARRAY_ELEMS(tested_helpfile); ++i) {
1772	string xml = HELP_SOURCE + "Xml/" + tested_helpfile[i] + ".xml";
1773	string html = LIB + "help_html/" + tested_helpfile[i] + ".html";
1774	string hlp = LIB + "help/" + tested_helpfile[i] + ".hlp";
1775
1776	string xml_expected = EXPECTED + tested_helpfile[i] + ".xml";
1777	string html_expected = EXPECTED + tested_helpfile[i] + ".html";
1778	string hlp_expected = EXPECTED + tested_helpfile[i] + ".hlp";
1779
1780
1781	#if defined(TEST_AUTO_UPDATE)
1782	# if defined(NDEBUG)
1783	# error please use auto-update only in DEBUG mode
1784	# endif
1785	TEST_COPY_FILE(xml.c_str(), xml_expected.c_str());
1786	TEST_COPY_FILE(html.c_str(), html_expected.c_str());
1787	TEST_COPY_FILE(hlp.c_str(), hlp_expected.c_str());
1788
1789	#else // !defined(TEST_AUTO_UPDATE)
1790
1791	# if defined(DEBUG)
1792	int expected_xml_difflines = 0;
1793	int expected_hlp_difflines = 0;
1794	# else // !defined(DEBUG)
1795	int expected_xml_difflines = 1; // value of "edit_warning" differs - see .@edit_warning
1796	int expected_hlp_difflines = 1; // resulting warning in helpfile
1797	# endif
1798	TEST_EXPECT_TEXTFILE_DIFFLINES(xml_expected.c_str(), xml.c_str(), expected_xml_difflines);
1799	TEST_EXPECT_TEXTFILE_DIFFLINES_IGNORE_DATES(html_expected.c_str(), html.c_str(), 0); // html contains the update-date
1800	TEST_EXPECT_TEXTFILE_DIFFLINES(hlp_expected.c_str(), hlp.c_str(), expected_hlp_difflines);
1801	#endif
1802	}
1803	}
1804
1805
1806	#if defined(PROTECT_HELP_VS_CHANGES)
1807	void TEST_protect_help_vs_changes() { // should normally be disabled
1808	// fails if help changes compared to another checkout
1809	// or just updates the diff w/o failing (if you comment out the last line)
1810	//
1811	// if the patch is hugo and you load it into xemacs
1812	// you might want to (turn-on-lazy-shot)
1813	//
1814	// patch-pointer: ../UNIT_TESTER/run/help_changes.patch
1815
1816	bool do_help = true;
1817	bool do_html = true;
1818
1819	const char *ref_WC = "ARB.help.ref";
1820
1821	// ---------------------------------------- config above
1822
1823	string this_base = "../..";
1824	string ref_base = this_base+"/../"+ref_WC;
1825	string to_help = "/lib/help";
1826	string to_html = "/lib/help_html";
1827	string diff_help = "diff -u "+ref_base+to_help+" "+this_base+to_help;
1828	string diff_html = "diff -u "+ref_base+to_html+" "+this_base+to_html;
1829
1830	string update_cmd;
1831
1832	if (do_help) {
1833	if (do_html) update_cmd = string("(")+diff_help+";"+diff_html+")";
1834	else update_cmd = diff_help;
1835	}
1836	else if (do_html) update_cmd = diff_html;
1837
1838	string patch = "help_changes.patch";
1839	update_cmd += " >"+patch+" \|\|true";
1840
1841	string fail_on_change_cmd = "test \"`cat "+patch+" \| grep -v '^Common subdirectories' \| wc -l`\" = \"0\" \|\| ( echo \"Error: Help changed\"; false)";
1842
1843	TEST_EXPECT_NO_ERROR(GBK_system(update_cmd.c_str()));
1844	TEST_EXPECT_NO_ERROR(GBK_system(fail_on_change_cmd.c_str())); // @@@ uncomment before commit
1845	}
1846	#endif
1847
1848	#endif // UNIT_TESTS

Note: See TracBrowser for help on using the repository browser.

Download in other formats: