Context Navigation

source: tags/svn.1.5.4/HELP_SOURCE/arb_help2xml.cxx

Visit:

Last change on this file was 8393, checked in by westram, 13 years ago
cppchecked (1.53)
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 43.9 KB

Line
1	// ==================================================================== //
2	// //
3	// File : arb_help2xml.cxx //
4	// Purpose : Converts old ARB help format to XML //
5	// //
6	// Coded by Ralf Westram (coder@reallysoft.de) in October 2001 //
7	// Copyright Department of Microbiology (Technical University Munich) //
8	// //
9	// Visit our web site at: http://www.arb-home.de/ //
10	// //
11	// ==================================================================== //
12
13	#include <xml.hxx>
14
15	#include <list>
16	#include <set>
17	#include <iostream>
18	#include <fstream>
19
20	#include <cstdlib>
21	#include <cstdarg>
22	#include <cstring>
23	#include <climits>
24
25	#include <sys/stat.h>
26
27	using namespace std;
28
29	#define h2x_assert(bed) arb_assert(bed)
30
31	#if defined(DEBUG)
32	#define WARN_MISSING_HELP
33	#endif // DEBUG
34
35
36	// #define DUMP_DATA // use this to see internal data (class Helpfile)
37	#define MAX_LINE_LENGTH 200 // maximum length of lines in input stream
38	#define TABSIZE 8
39
40	static const char *knownSections[] = { "OCCURRENCE", "DESCRIPTION", "NOTES", "EXAMPLES", "WARNINGS", "BUGS",
41	"QUESTION", "ANSWER", "SECTION",
42	0 };
43
44	enum SectionType {
45	SEC_OCCURRENCE,
46	SEC_DESCRIPTION,
47	SEC_NOTES,
48	SEC_EXAMPLES,
49	SEC_WARNINGS,
50	SEC_BUGS,
51	SEC_QUESTION,
52	SEC_ANSWER,
53	SEC_SECTION,
54
55	SEC_NONE,
56
57	SEC_FAKE,
58	};
59
60
61	STATIC_ATTRIBUTED(__ATTR__VFORMAT(1), string vstrf(const char *format, va_list argPtr)) {
62	static size_t buf_size = 256;
63	static char *buffer = new char[buf_size];
64
65	size_t length;
66	while (1) {
67	if (!buffer) {
68	h2x_assert(buffer); // to stop when debugging
69	throw string("out of memory");
70	}
71
72	length = vsnprintf(buffer, buf_size, format, argPtr);
73	if (length < buf_size) break; // string fits into current buffer
74
75	// otherwise resize buffer :
76	buf_size += buf_size/2;
77	delete [] buffer;
78	buffer = new char[buf_size];
79	}
80
81	return string(buffer, length);
82	}
83
84	STATIC_ATTRIBUTED(__ATTR__FORMAT(1), string strf(const char *format, ...)) {
85	va_list argPtr;
86	va_start(argPtr, format);
87	string result = vstrf(format, argPtr);
88	va_end(argPtr);
89
90	return result;
91	}
92
93	// --------------------------------------
94	// warnings
95	// --------------------------------------
96
97	class LineAttachedMessage {
98	string message;
99	size_t lineno;
100
101	public:
102	LineAttachedMessage(const string& message_, size_t lineno_)
103	: message(message_)
104	, lineno(lineno_)
105	{}
106
107	const string& Message() const { return message; }
108	size_t Lineno() const { return lineno; }
109	};
110
111
112	static list<LineAttachedMessage> warnings;
113
114	inline LineAttachedMessage make_warning(const string& warning, size_t lineno) {
115	return LineAttachedMessage(string("Warning: ")+warning, lineno);
116	}
117	inline void add_warning(const string& warning, size_t lineno) {
118	warnings.push_back(make_warning(warning, lineno));
119	}
120	inline void preadd_warning(const string& warning, size_t lineno) {
121	LineAttachedMessage line_message = make_warning(warning, lineno);
122	if (warnings.size() < 2) {
123	warnings.push_front(line_message);
124	}
125	else {
126	LineAttachedMessage prev_message = warnings.back();
127	warnings.pop_back();
128	warnings.push_back(line_message);
129	warnings.push_back(prev_message);
130	}
131	}
132
133	// ----------------------
134	// class Reader
135	// ----------------------
136
137	class Reader {
138	private:
139	istream& in;
140	char lineBuffer[MAX_LINE_LENGTH];
141	char lineBuffer2[MAX_LINE_LENGTH];
142	bool readAgain;
143	bool eof;
144	int lineNo;
145
146	void getline() {
147	if (!eof) {
148	if (in.eof()) eof = true;
149	else {
150	h2x_assert(in.good());
151
152	in.getline(lineBuffer, MAX_LINE_LENGTH);
153	lineNo++;
154
155	if (in.eof()) eof = true;
156	else if (in.fail()) throw "line too long";
157
158	if (strchr(lineBuffer, '\t')) {
159	int o2 = 0;
160
161	for (int o = 0; lineBuffer[o]; ++o) {
162	if (lineBuffer[o] == '\t') {
163	int spaces = TABSIZE - (o2 % TABSIZE);
164	while (spaces--) lineBuffer2[o2++] = ' ';
165	}
166	else {
167	lineBuffer2[o2++] = lineBuffer[o];
168	}
169	}
170	lineBuffer2[o2] = 0;
171	strcpy(lineBuffer, lineBuffer2);
172	}
173
174	char *eol = strchr(lineBuffer, 0)-1;
175	while (eol >= lineBuffer && eol[0] == ' ') eol--;
176	if (eol > lineBuffer) {
177	// now eol points to last character
178	if (eol[0] == '-' && isalnum(eol[-1])) {
179	add_warning("manual hyphenation detected", lineNo);
180	}
181	}
182	}
183	}
184	}
185
186	public:
187	Reader(istream& in_) : in(in_), readAgain(true), eof(false), lineNo(0) { getline(); }
188	virtual ~Reader() {}
189
190	const char *getNext() {
191	if (readAgain) readAgain = false;
192	else getline();
193	return eof ? 0 : lineBuffer;
194	}
195
196	void back() {
197	h2x_assert(!readAgain);
198	readAgain = true;
199	}
200
201	int getLineNo() const { return lineNo; }
202	};
203
204
205	typedef list<string> Strings;
206
207	class Section {
208	SectionType type;
209	Strings content;
210	size_t start_lineno;
211
212	public:
213	Section(SectionType type_, size_t start_lineno_) : type(type_), start_lineno(start_lineno_) {}
214
215	const Strings& Content() const { return content; }
216	Strings& Content() { return content; }
217
218	SectionType get_type() const { return type; }
219
220	size_t StartLineno() const { return start_lineno; }
221	void set_StartLineno(size_t start_lineno_) { start_lineno = start_lineno_; }
222	};
223
224
225	#if defined(WARN_MISSING_HELP)
226	void check_TODO(const char *line, const Reader& reader) {
227	if (strstr(line, "@@@") != NULL \|\| strstr(line, "TODO") != NULL) {
228	string warn = strf("TODO: %s", line);
229	add_warning(warn.c_str(), reader.getLineNo());
230	}
231	}
232	#endif // WARN_MISSING_HELP
233
234	// ---------------------------
235	// class NamedSection
236	// ---------------------------
237	class NamedSection {
238	private:
239	string name;
240	Section section;
241
242	public:
243	NamedSection(const string& name_, const Section& section_)
244	: name(name_)
245	, section(section_)
246	{}
247	virtual ~NamedSection() {}
248
249	const Section& getSection() const { return section; }
250	const string& getName() const { return name; }
251	};
252
253	typedef list<NamedSection> NamedSections;
254
255	class Link {
256	string target;
257	size_t source_lineno;
258
259	public:
260	Link(const string& target_, size_t source_lineno_)
261	: target(target_)
262	, source_lineno(source_lineno_)
263	{}
264
265	const string& Target() const { return target; }
266	size_t SourceLineno() const { return source_lineno; }
267	};
268
269	typedef list<Link> Links;
270
271	// -----------------------
272	// class Helpfile
273	// -----------------------
274	class Helpfile {
275	private:
276	Links uplinks;
277	Links references;
278	Links auto_references;
279	Section title;
280	NamedSections sections;
281	string inputfile;
282
283	public:
284	Helpfile() : title(SEC_FAKE, -1U) {}
285	virtual ~Helpfile() {}
286
287	void readHelp(istream& in, const string& filename);
288	void writeXML(FILE *out, const string& page_name);
289	void extractInternalLinks();
290
291	const Section& get_title() const { return title; }
292	};
293
294	inline bool isWhite(char c) { return c == ' '; }
295
296	#if defined(DUMP_DATA)
297	static void display(const Strings& strings, const string& title, FILE *out) {
298	fprintf(out, " %s:\n", title.c_str());
299	for (Strings::const_iterator s = strings.begin(); s != strings.end(); ++s) {
300	fprintf(out, " '%s'\n", s->c_str());
301	}
302	}
303	static void display(const Sections& sections, const string& title, FILE *out) {
304	fprintf(out, "%s:\n", title.c_str());
305	for (Sections::const_iterator s = sections.begin(); s != sections.end(); ++s) {
306	display(s->second, s->first, out);
307	}
308	}
309	#endif // DUMP_DATA
310
311	inline bool isEmptyOrComment(const char *s) {
312	if (s[0] == '#') return true;
313	for (int off = 0; ; ++off) {
314	if (s[off] == 0) return true;
315	if (!isWhite(s[off])) break;
316	}
317
318	return false;
319	}
320
321	inline const char extractKeyword(const char line, string& keyword) {
322	// returns NULL if no keyword was found
323	// otherwise returns position behind keyword and sets value of 'keyword'
324
325	const char *space = strchr(line, ' ');
326	if (space && space>line) {
327	keyword = string(line, 0, space-line);
328	return space;
329	}
330	else if (!space) { // test for keyword w/o content behind
331	if (line[0]) { // not empty
332	keyword = line;
333	return strchr(line, 0);
334	}
335	}
336	return 0;
337	}
338
339	inline const char eatWhite(const char line) {
340	// skips whitespace
341	while (isWhite(*line)) ++line;
342	return line;
343	}
344
345	inline void pushParagraph(Section& sec, string& paragraph) {
346	if (paragraph.length()) {
347	sec.Content().push_back(paragraph);
348	paragraph = "";
349	}
350	}
351
352	inline const char firstChar(const char s) {
353	while (isWhite(s[0])) ++s;
354	return s;
355	}
356
357	static void parseSection(Section& sec, const char *line, int indentation, Reader& reader) {
358	string paragraph = line;
359	int lines_in_paragraph = 1;
360
361	if (sec.StartLineno() == -1U) {
362	sec.set_StartLineno(reader.getLineNo());
363	}
364
365	while (1) {
366	line = reader.getNext();
367	if (!line) break;
368	if (isEmptyOrComment(line)) {
369	pushParagraph(sec, paragraph); lines_in_paragraph = 0;
370	}
371	else {
372	string keyword;
373	const char *rest = extractKeyword(line, keyword);
374
375	if (rest) { // a new keyword
376	reader.back();
377	break;
378	}
379
380	#if defined(WARN_MISSING_HELP)
381	check_TODO(line, reader);
382	#endif // WARN_MISSING_HELP
383
384	string Line = line;
385
386	if (sec.get_type() == SEC_OCCURRENCE) {
387	pushParagraph(sec, paragraph); lines_in_paragraph = 0;
388	}
389	else {
390	const char *first = firstChar(line);
391	if (first[0] == '-') {
392	pushParagraph(sec, paragraph); lines_in_paragraph = 0;
393	Line[first-line] = ' ';
394	}
395	}
396
397	if (paragraph.length()) {
398	paragraph = paragraph+"\n"+Line;
399	}
400	else {
401	paragraph = string("\n")+Line;
402	}
403	lines_in_paragraph++;
404	}
405	}
406
407	pushParagraph(sec, paragraph);
408
409	if (sec.Content().size()>0 && indentation>0) {
410	string spaces;
411	spaces.reserve(indentation);
412	spaces.append(indentation, ' ');
413
414	Strings::iterator p = sec.Content().begin();
415
416	p = string("\n")+spaces+p;
417	}
418	}
419	inline void check_duplicates(const string& link, const char * /* where */, const Links& existing, bool add_warnings) {
420	for (Links::const_iterator ex = existing.begin(); ex != existing.end(); ++ex) {
421	if (ex->Target() == link) {
422	if (add_warnings) add_warning(strf("First Link to '%s' was found here.", ex->Target().c_str()), ex->SourceLineno());
423	throw strf("Link to '%s' duplicated here.", link.c_str());
424	}
425	}
426	}
427	inline void check_duplicates(const string& link, const Links& uplinks, const Links& references, bool add_warnings) {
428	check_duplicates(link, "UP", uplinks, add_warnings);
429	check_duplicates(link, "SUB", references, add_warnings);
430	}
431
432	void Helpfile::readHelp(istream& in, const string& filename) {
433	Reader read(in);
434
435	inputfile = filename; // remember file read (for comment)
436
437	const char *line;
438	const char *name_only = strrchr(filename.c_str(), '/');
439
440	h2x_assert(name_only);
441	++name_only;
442
443	try {
444	while (1) {
445	line = read.getNext();
446	if (!line) break;
447
448	if (isEmptyOrComment(line)) {
449	continue;
450	}
451
452	#if defined(WARN_MISSING_HELP)
453	check_TODO(line, read);
454	#endif // WARN_MISSING_HELP
455
456	string keyword;
457	const char *rest = extractKeyword(line, keyword);
458
459	if (rest) { // found a keyword
460	if (keyword == "UP") {
461	rest = eatWhite(rest);
462	if (strlen(rest)) {
463	check_duplicates(rest, uplinks, references, true);
464	if (strcmp(name_only, rest) == 0) throw "UP link to self";
465
466	uplinks.push_back(Link(rest, read.getLineNo()));
467	}
468	}
469	else if (keyword == "SUB") {
470	rest = eatWhite(rest);
471	if (strlen(rest)) {
472	check_duplicates(rest, uplinks, references, true);
473	if (strcmp(name_only, rest) == 0) throw "SUB link to self";
474
475	references.push_back(Link(rest, read.getLineNo()));
476	}
477	}
478	else if (keyword == "TITLE") {
479	rest = eatWhite(rest);
480	// parseSection(title, rest, rest-line, read);
481	parseSection(title, rest, 0, read);
482
483	if (title.Content().empty()) throw "empty TITLE not allowed";
484
485	const char *t = title.Content().front().c_str();
486
487	if (strstr(t, "Standard help file form") != 0) {
488	throw strf("Illegal title for help file: '%s'", t);
489	}
490	}
491	else {
492	if (keyword == "NOTE") keyword = "NOTES";
493	if (keyword == "EXAMPLE") keyword = "EXAMPLES";
494	if (keyword == "WARNING") keyword = "WARNINGS";
495
496	SectionType stype = SEC_NONE;
497	int idx;
498	for (idx = 0; knownSections[idx]; ++idx) {
499	if (knownSections[idx] == keyword) {
500	stype = SectionType(idx);
501	break;
502	}
503	}
504
505	if (knownSections[idx]) {
506	if (stype == SEC_SECTION) {
507	string section_name = eatWhite(rest);
508	Section sec(stype, read.getLineNo());
509
510	parseSection(sec, "", 0, read);
511	sections.push_back(NamedSection(section_name, sec));
512	}
513	else {
514	Section sec(stype, read.getLineNo());
515
516	rest = eatWhite(rest);
517	parseSection(sec, rest, rest-line, read);
518	sections.push_back(NamedSection(keyword, sec));
519	}
520	}
521	else {
522	throw strf("unknown keyword '%s'", keyword.c_str());
523	}
524	}
525	}
526	else {
527	throw strf("Unhandled line");
528	}
529	}
530	}
531
532	catch (string& err) { throw LineAttachedMessage(err, read.getLineNo()); }
533	catch (const char *err) { throw LineAttachedMessage(err, read.getLineNo()); }
534	}
535
536	static bool shouldReflow(const string& s, int& foundIndentation) {
537	// foundIndentation is only valid if shouldReflow() returns true
538	enum { START, CHAR, SPACE, MULTIPLE, DOT, DOTSPACE } state = START;
539	bool equal_indent = true;
540	int lastIndent = -1;
541	int thisIndent = 0;
542
543	for (string::const_iterator c = s.begin(); c != s.end(); ++c, ++thisIndent) {
544	if (*c == '\n') {
545	state = START;
546	thisIndent = 0;
547	}
548	else if (isWhite(*c)) {
549	if (state == DOT \|\| state == DOTSPACE) state = DOTSPACE; // multiple spaces after DOT are allowed
550	else if (state == SPACE) state = MULTIPLE; // now seen multiple spaces
551	else if (state == CHAR) state = SPACE; // now seen 1 space
552	}
553	else {
554	if (state == MULTIPLE) return false; // character after multiple spaces
555	if (state == START) {
556	if (lastIndent == -1) lastIndent = thisIndent;
557	else if (lastIndent != thisIndent) equal_indent = false;
558	}
559	if (c == '.' \|\| c == ',') state = DOT;
560	else state = CHAR;
561	}
562	}
563
564	if (equal_indent) {
565	foundIndentation = lastIndent-1;
566	h2x_assert(lastIndent >= 0);
567	}
568	return equal_indent;
569	}
570
571	static bool startsWithNumber(string& s, long long &number, bool do_erase = true) {
572	// tests if first line starts with 'number.'
573	// if true then the number is removed
574
575	size_t off = s.find_first_not_of(" \n");
576	if (off == string::npos) return false;
577	if (!isdigit(s[off])) return false;
578
579	size_t num_start = off;
580	number = 0;
581
582	for (; isdigit(s[off]); ++off) {
583	number = number*10 + (s[off]-'0');
584	}
585
586	if (s[off] != '.') return false;
587	if (s[off+1] != ' ') return false;
588
589	if (do_erase) {
590	// remove 'number.' from string :
591	for (size_t erase = num_start; erase <= off; ++erase) {
592	s[erase] = ' ';
593	}
594	}
595
596	return true;
597	}
598
599	static int get_first_indentation(const string& s) {
600	// returns the indentation of the first line containing other than spaces
601	size_t text_start = s.find_first_not_of(" \n");
602	size_t prev_lineend = s.find_last_of('\n', text_start);
603
604	if (prev_lineend == string::npos) return text_start;
605	return text_start-prev_lineend-1;
606	}
607
608	static string correctSpaces(const string& text, int change) {
609	h2x_assert(text.find('\n') == string::npos);
610
611	if (!change) return text;
612
613	size_t first = text.find_first_not_of(' ');
614	if (first == string::npos) return ""; // empty line
615
616	if (change<0) {
617	int remove = -change;
618	h2x_assert(remove <= int(first));
619	return text.substr(remove);
620	}
621
622	h2x_assert(change>0); // add spaces
623	return string(change, ' ')+text;
624	}
625
626	static string correctIndentation(const string& text, int change) {
627	// removes 'remove' spaces from evry line
628
629	size_t this_lineend = text.find('\n');
630	string result;
631
632	if (this_lineend == string::npos) {
633	result = correctSpaces(text, change);
634	}
635	else {
636	result = correctSpaces(text.substr(0, this_lineend), change);
637
638	while (this_lineend != string::npos) {
639	size_t next_lineend = text.find('\n', this_lineend+1);
640	if (next_lineend == string::npos) { // last line
641	result = result+"\n"+correctSpaces(text.substr(this_lineend+1), change);
642	}
643	else {
644	result = result+"\n"+correctSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1), change);
645	}
646	this_lineend = next_lineend;
647	}
648	}
649	return result;
650	}
651
652	inline size_t countSpaces(const string& text) {
653	size_t first = text.find_first_not_of(' ');
654	if (first == string::npos) return INT_MAX; // empty line
655	return first;
656	}
657
658	static size_t scanMinIndentation(const string& text) {
659	size_t this_lineend = text.find('\n');
660	size_t min_indent = INT_MAX;
661
662	if (this_lineend == string::npos) {
663	min_indent = countSpaces(text);
664	}
665	else {
666	while (this_lineend != string::npos) {
667	size_t next_lineend = text.find('\n', this_lineend+1);
668	if (next_lineend == string::npos) {
669	min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1)));
670	}
671	else {
672	min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1)));
673	}
674	this_lineend = next_lineend;
675	}
676	}
677
678	if (min_indent == INT_MAX) min_indent = 0; // only empty lines
679	return min_indent;
680	}
681
682	// ----------------------------
683	// class ParagraphTree
684	// ----------------------------
685	class ParagraphTree : virtual Noncopyable {
686	private:
687	ParagraphTree *brother; // has same indentation as this
688	ParagraphTree *son; // indentation + 1
689
690	bool is_enumerated; // 1., 2., usw.
691	long long enumeration; // the value of the enumeration (undefined if !is_enumerated)
692
693	bool reflow; // should the paragraph be reflown ? (true if indentation is equal for all lines of text)
694	int indentation; // the real indentation of the black (after enumeration was removed)
695
696	string text; // text of the Section (containing linefeeds)
697	size_t lineNo; // line number where Paragraph starts
698
699	ParagraphTree(Strings::const_iterator begin, const Strings::const_iterator end, size_t beginLineNo) {
700	h2x_assert(begin != end);
701
702	text = *begin;
703	son = 0;
704
705	enumeration = 0;
706	is_enumerated = startsWithNumber(text, enumeration);
707
708	lineNo = beginLineNo;
709
710	if (is_enumerated) {
711	size_t text_start = text.find_first_not_of(" \n");
712	size_t next_linestart = text.find('\n', text_start);
713
714	if (next_linestart != string::npos) {
715	// more than one line -> set indent of 1st line to indent of 2nd line :
716	++next_linestart; // point behind \n
717	size_t ind = get_first_indentation(text.substr(next_linestart));
718	text = string("\n")+string(ind, ' ') + text.substr(text_start);
719	}
720	}
721
722	indentation = 0;
723	reflow = shouldReflow(text, indentation);
724
725	if (!reflow) {
726	size_t reststart = text.find('\n', 1);
727	if (reststart != string::npos) {
728	int rest_indent = -1;
729	string rest = text.substr(reststart);
730	bool rest_reflow = shouldReflow(rest, rest_indent);
731
732	if (rest_reflow) {
733	int first_indent = countSpaces(text.substr(1));
734	size_t last = text.find_last_not_of(' ', reststart-1);
735	bool is_header = last != string::npos && text[last] == ':';
736
737	if (!is_header && rest_indent == (first_indent+8)) {
738	#if defined(DEBUG)
739	size_t textstart = text.find_first_not_of(" \n");
740	h2x_assert(textstart != string::npos);
741	#endif // DEBUG
742
743	text = text.substr(0, reststart)+correctIndentation(rest, -8);
744	reflow = shouldReflow(text, indentation);
745	}
746	}
747	}
748	}
749
750	if (!reflow) {
751	indentation = scanMinIndentation(text);
752	}
753
754	text = correctIndentation(text, -indentation);
755	brother = buildParagraphTree(++begin, end, beginLineNo);
756	}
757
758	public:
759	virtual ~ParagraphTree() {
760	delete brother;
761	delete son;
762	}
763
764	size_t countTextNodes() {
765	size_t nodes = 1; // this
766	if (son) nodes += son->countTextNodes();
767	if (brother) nodes += brother->countTextNodes();
768	return nodes;
769	}
770
771	void dump(ostream& out) {
772	out << "text='" << text << "'\n";
773	out << "is_enumerated='" << is_enumerated << "'";
774	out << "enumeration='" << enumeration << "'";
775	out << "reflow='" << reflow << "'";
776	out << "indentation='" << indentation << "'\n";
777
778	if (brother) { cout << "\nbrother:\n"; brother->dump(out); cout << "\n"; }
779	if (son) { cout << "\nson:\n"; son->dump(out); cout << "\n"; }
780	}
781
782	static ParagraphTree* buildParagraphTree(Strings::const_iterator begin, const Strings::const_iterator end, size_t beginLineNo) {
783	if (begin == end) return 0;
784	return new ParagraphTree(begin, end, beginLineNo);
785	}
786	static ParagraphTree* buildParagraphTree(const NamedSection& N) {
787	ParagraphTree *ptree = 0;
788	const Strings& S = N.getSection().Content();
789	if (S.empty()) throw string("Tried to build an empty ParagraphTree (Section=")+N.getName()+")";
790	else ptree = buildParagraphTree(S.begin(), S.end(), N.getSection().StartLineno());
791	return ptree;
792	}
793
794	bool contains(ParagraphTree *that) {
795	return
796	this == that \|\|
797	(son && son->contains(that)) \|\|
798	(brother && brother->contains(that));
799	}
800
801	ParagraphTree predeccessor(ParagraphTree before_this) {
802	if (brother == before_this) return this;
803	if (!brother) return 0;
804	return brother->predeccessor(before_this);
805	}
806
807	void append(ParagraphTree *new_brother) {
808	if (!brother) brother = new_brother;
809	else brother->append(new_brother);
810	}
811
812	ParagraphTree* removeTill(ParagraphTree *after) {
813	ParagraphTree *removed = this;
814	ParagraphTree *after_pred = this;
815
816	while (1) {
817	h2x_assert(after_pred);
818	h2x_assert(after_pred->brother); // removeTill called with non-existing 'after'
819
820	if (after_pred->brother == after) { // found after
821	after_pred->brother = 0; // unlink
822	break;
823	}
824	after_pred = after_pred->brother;
825	}
826
827	return removed;
828	}
829
830	ParagraphTree *firstEnumerated() {
831	if (is_enumerated) return this;
832	if (brother) return brother->firstEnumerated();
833	return 0;
834	}
835	ParagraphTree *nextEnumerated() {
836	h2x_assert(is_enumerated);
837	if (brother) {
838	ParagraphTree *next = brother->firstEnumerated();
839	if (next && next->enumeration == (enumeration+1)) {
840	return next;
841	}
842	}
843	return 0;
844	}
845
846	ParagraphTree* firstWithSameOrLowerIndent(int wanted_indentation) {
847	if (indentation <= wanted_indentation) return this;
848	if (!brother) return 0;
849	return brother->firstWithSameOrLowerIndent(wanted_indentation);
850	}
851
852	ParagraphTree* format_indentations();
853	ParagraphTree* format_enums();
854	private:
855	static ParagraphTree* buildNewParagraph(const string& Text, size_t beginLineNo) {
856	Strings S;
857	S.push_back(Text);
858	return new ParagraphTree(S.begin(), S.end(), beginLineNo);
859	}
860	ParagraphTree *extractEmbeddedEnum(int lookfor) {
861	size_t this_lineend = text.find('\n');
862
863	while (this_lineend != string::npos) {
864	long long number;
865	string embedded = text.substr(this_lineend);
866	if (startsWithNumber(embedded, number, false)) {
867	if (number == lookfor) {
868	text.erase(this_lineend);
869	return buildNewParagraph(embedded, lineNo);
870	}
871	break;
872	}
873	this_lineend = text.find('\n', this_lineend+1);
874	}
875
876	return 0;
877	}
878	public:
879	static size_t embeddedCounter;
880
881	void xml_write(bool ignore_enumerated = false, bool write_as_entry = false);
882	};
883
884	size_t ParagraphTree::embeddedCounter = 0;
885
886	ParagraphTree* ParagraphTree::format_enums() {
887	// reformats tree such that all enums are brothers
888	ParagraphTree *enum_this = firstEnumerated();
889
890	if (enum_this) { // we have enumeration
891	ParagraphTree *before_enum = predeccessor(enum_this);
892
893	if (before_enum) {
894	h2x_assert(before_enum->son == 0);
895	before_enum->son = before_enum->brother;
896	before_enum->brother = 0;
897	}
898
899	for (ParagraphTree *enum_next = enum_this->nextEnumerated();
900	enum_next;
901	enum_this = enum_next, enum_next = enum_this->nextEnumerated())
902	{
903	if (enum_next != enum_this->brother) {
904	h2x_assert(enum_this->son == 0);
905	enum_this->son = enum_this->brother->removeTill(enum_next);
906	enum_this->brother = enum_next;
907	}
908	}
909
910	// enum_this is the last enumeration
911	h2x_assert(!enum_this->son);
912
913	if (enum_this->brother) { // there are more sections behind enum
914	ParagraphTree *after_enum = enum_this->firstWithSameOrLowerIndent(enum_this->indentation-1);
915
916	if (after_enum) { // indent should go back after enum
917	h2x_assert(!enum_this->son);
918
919	if (after_enum != enum_this->brother) {
920	enum_this->son = enum_this->brother->removeTill(after_enum);
921	}
922	enum_this->brother = 0;
923
924	h2x_assert(before_enum);
925	h2x_assert(before_enum->brother == 0);
926	before_enum->brother = after_enum->format_enums();
927	}
928	else { // nothing after enum -> take all as children
929	h2x_assert(enum_this->son == 0);
930	enum_this->son = enum_this->brother;
931	enum_this->brother = 0;
932	}
933	if (enum_this->son) enum_this->son = enum_this->son->format_enums();
934	}
935	else {
936	if (before_enum) {
937	if (before_enum->son) before_enum->son->append(before_enum->brother);
938	before_enum->brother = 0;
939	}
940	}
941
942	if (enum_this->enumeration == 1) { // oops - only '1.' search for enum inside block
943	ParagraphTree *lookin = enum_this;
944	int lookfor;
945
946	for (lookfor = 2; ; ++lookfor) {
947	ParagraphTree *next_enum = lookin->extractEmbeddedEnum(lookfor);
948	if (!next_enum) break;
949
950	embeddedCounter++;
951	next_enum->brother = lookin->brother;
952	lookin->brother = next_enum;
953	lookin = next_enum;
954	}
955	}
956
957	return this;
958	}
959
960	// no enumerations found
961	return this;
962	}
963
964	ParagraphTree* ParagraphTree::format_indentations() {
965	if (brother) {
966	if (is_enumerated) {
967	if (brother) brother = brother->format_indentations();
968	if (son) son = son->format_indentations();
969	}
970	else {
971	ParagraphTree *same_indent = brother->firstWithSameOrLowerIndent(indentation);
972	if (same_indent) {
973	if (same_indent == brother) {
974	brother = brother->format_indentations();
975	if (son) son = son->format_indentations();
976	}
977	else {
978	ParagraphTree *children = brother->removeTill(same_indent);
979	brother = same_indent->format_indentations();
980	h2x_assert(!son);
981	son = children->format_indentations();
982	}
983	}
984	else { // none with same indent
985	h2x_assert(!son);
986	son = brother->format_indentations();
987	brother = 0;
988	}
989	}
990	}
991	else { // no brother
992	if (son) son = son->format_indentations();
993	}
994	return this;
995	}
996
997	// -----------------
998	// LinkType
999
1000	enum LinkType {
1001	LT_UNKNOWN = 0,
1002	LT_HTTP = 1,
1003	LT_FTP = 2,
1004	LT_FILE = 4,
1005	LT_EMAIL = 8,
1006	LT_HLP = 16,
1007	LT_PS = 32,
1008	LT_PDF = 64
1009	};
1010
1011	static const char *link_id[] = {
1012	"unknown",
1013	"www",
1014	"www",
1015	"www",
1016	"email",
1017	"hlp",
1018	"ps",
1019	"pdf",
1020	};
1021
1022	static string LinkType2id(LinkType type) {
1023	int idx = 0;
1024	while (type >= 1) {
1025	idx++;
1026	type = LinkType(type>>1);
1027	}
1028	return link_id[idx];
1029	}
1030
1031	inline const char *getExtension(const string& name) {
1032	size_t last_dot = name.find_last_of('.');
1033	if (last_dot == string::npos) {
1034	return NULL;
1035	}
1036	return name.c_str()+last_dot+1;
1037	}
1038
1039	static LinkType detectLinkType(const string& link_target) {
1040	LinkType type = LT_UNKNOWN;
1041	const char *ext = getExtension(link_target);
1042
1043	if (ext && strcasecmp(ext, "hlp") == 0) type = LT_HLP;
1044	else if (link_target.find("http://") == 0) type = LT_HTTP;
1045	else if (link_target.find("ftp://") == 0) type = LT_FTP;
1046	else if (link_target.find("file://") == 0) type = LT_FILE;
1047	else if (link_target.find('@') != string::npos) type = LT_EMAIL;
1048	else if (ext && strcasecmp(ext, "ps") == 0) type = LT_PS;
1049	else if (ext && strcasecmp(ext, "pdf") == 0) type = LT_PDF;
1050
1051	return type;
1052	}
1053
1054	// --------------------------------------------------------------------------------
1055
1056
1057
1058	static string locate_helpfile(const string& helpname) {
1059	// search for 'helpname' in various helpfile locations
1060
1061	#define PATHS 2
1062	static string path[PATHS] = { "oldhelp/", "genhelp/" };
1063	struct stat st;
1064
1065	for (size_t p = 0; p<PATHS; p++) {
1066	string fullname = path[p]+helpname;
1067	if (stat(fullname.c_str(), &st) == 0) {
1068	return fullname;
1069	}
1070	}
1071	return "";
1072	#undef PATHS
1073	}
1074
1075	static string locate_document(const string& docname) {
1076	// search for 'docname' or 'docname.gz' in various helpfile locations
1077
1078	string located = locate_helpfile(docname);
1079	if (located.empty()) {
1080	located = locate_helpfile(docname+".gz");
1081	}
1082	return located;
1083	}
1084
1085	static void add_link_attributes(XML_Tag& link, LinkType type, const string& dest, size_t source_line) {
1086	if (type == LT_UNKNOWN) {
1087	string msg = string("Invalid link (dest='")+dest+"')";
1088	throw LineAttachedMessage(msg, source_line);
1089	}
1090
1091	link.add_attribute("dest", dest);
1092	link.add_attribute("type", LinkType2id(type));
1093	link.add_attribute("source_line", source_line);
1094
1095	if (type&(LT_HLP\|LT_PDF\|LT_PS)) { // other links (www, email) cannot be checked for existence here
1096	string fullhelp = ((type&LT_HLP) ? locate_helpfile : locate_document)(dest);
1097	if (fullhelp.empty()) {
1098	link.add_attribute("missing", "1");
1099	string warning = strf("Dead link to '%s'", dest.c_str());
1100	h2x_assert(source_line<1000); // illegal line number ?
1101	add_warning(warning, source_line);
1102	}
1103	}
1104	}
1105
1106	static void print_XML_Text_expanding_links(const string& text, size_t lineNo) {
1107	size_t found = text.find("LINK{", 0);
1108	if (found != string::npos) {
1109	size_t inside_link = found+5;
1110	size_t close = text.find('}', inside_link);
1111
1112	if (close != string::npos) {
1113	string link_target = text.substr(inside_link, close-inside_link);
1114	LinkType type = detectLinkType(link_target);
1115	string dest = link_target;
1116
1117	// cppcheck-suppress unusedScopedObject
1118	XML_Text(text.substr(0, found));
1119
1120	{
1121	XML_Tag link("LINK");
1122	link.set_on_extra_line(false);
1123	add_link_attributes(link, type, dest, lineNo);
1124	}
1125
1126	return print_XML_Text_expanding_links(text.substr(close+1), lineNo);
1127	}
1128	}
1129
1130	XML_Text t(text);
1131	}
1132
1133	void ParagraphTree::xml_write(bool ignore_enumerated, bool write_as_entry) {
1134	if (is_enumerated && !ignore_enumerated) {
1135	XML_Tag e("ENUM");
1136	xml_write(true);
1137	}
1138	else {
1139	{
1140	XML_Tag para(is_enumerated \|\| write_as_entry ? "ENTRY" : "P");
1141	{
1142	XML_Tag textblock("T");
1143	textblock.add_attribute("reflow", reflow ? "1" : "0");
1144	{
1145	string usedText;
1146	if (reflow) {
1147	usedText = correctIndentation(text, (textblock.Indent()+1) * the_XML_Document->indentation_per_level);
1148	}
1149	else {
1150	usedText = text;
1151	}
1152	print_XML_Text_expanding_links(usedText, lineNo);
1153	}
1154	}
1155	if (son) {
1156	if (!son->is_enumerated && son->brother) {
1157	XML_Tag sontag("LIST");
1158	son->xml_write(false, true);
1159	}
1160	else {
1161	son->xml_write(false);
1162	}
1163	}
1164	}
1165	if (brother) brother->xml_write(ignore_enumerated, write_as_entry);
1166	}
1167	}
1168
1169	static void create_top_links(const Links& links, const char *tag) {
1170	for (Links::const_iterator s = links.begin(); s != links.end(); ++s) {
1171	XML_Tag link(tag);
1172	add_link_attributes(link, detectLinkType(s->Target()), s->Target(), s->SourceLineno());
1173	}
1174	}
1175
1176	void Helpfile::writeXML(FILE *out, const string& page_name) {
1177	#if defined(DUMP_DATA)
1178	display(uplinks, "Uplinks", stdout);
1179	display(references, "References", stdout);
1180	display(auto_references, "Auto-References", stdout);
1181	display(title, "Title", stdout);
1182	display(sections, "Sections", stdout);
1183	#endif // DUMP_DATA
1184
1185	XML_Document xml("PAGE", "arb_help.dtd", out);
1186
1187	xml.skip_empty_tags = true;
1188	xml.indentation_per_level = 2;
1189
1190	xml.getRoot().add_attribute("name", page_name);
1191	#if defined(DEBUG)
1192	xml.getRoot().add_attribute("edit_warning", "devel"); // inserts a edit warning into development version
1193	#else
1194	xml.getRoot().add_attribute("edit_warning", "release"); // inserts a different edit warning into release version
1195	#endif // DEBUG
1196
1197	{
1198	// cppcheck-suppress unusedScopedObject
1199	XML_Comment(string("automatically generated from ../")+inputfile+' ');
1200	}
1201
1202	create_top_links(uplinks, "UP");
1203	create_top_links(references, "SUB");
1204	create_top_links(auto_references, "SUB");
1205
1206	{
1207	XML_Tag title_tag("TITLE");
1208	Strings& T = title.Content();
1209	for (Strings::const_iterator s = T.begin(); s != T.end(); ++s) {
1210	if (s != T.begin()) { XML_Text text("\n"); }
1211	XML_Text text(*s);
1212	}
1213	}
1214
1215	for (NamedSections::const_iterator named_sec = sections.begin(); named_sec != sections.end(); ++named_sec) {
1216	XML_Tag section_tag("SECTION");
1217	section_tag.add_attribute("name", named_sec->getName());
1218
1219	ParagraphTree ptree = ParagraphTree::buildParagraphTree(named_sec);
1220
1221	ParagraphTree::embeddedCounter = 0;
1222
1223	#if defined(DEBUG)
1224	size_t textnodes = ptree->countTextNodes();
1225	#endif // DEBUG
1226
1227	ptree = ptree->format_enums();
1228
1229	#if defined(DEBUG)
1230	size_t textnodes2 = ptree->countTextNodes();
1231	h2x_assert(textnodes2 == (textnodes+ParagraphTree::embeddedCounter)); // if this occurs format_enums has an error
1232	#endif // DEBUG
1233
1234	ptree = ptree->format_indentations();
1235
1236	#if defined(DEBUG)
1237	size_t textnodes3 = ptree->countTextNodes();
1238	h2x_assert(textnodes3 == textnodes2); // if this occurs format_indentations has an error
1239	#endif // DEBUG
1240
1241	ptree->xml_write();
1242
1243	delete ptree;
1244	}
1245	}
1246
1247	void Helpfile::extractInternalLinks() {
1248	for (NamedSections::const_iterator named_sec = sections.begin(); named_sec != sections.end(); ++named_sec) {
1249	const Section& sec = named_sec->getSection();
1250	try {
1251	const Strings& s = sec.Content();
1252
1253	for (Strings::const_iterator li = s.begin(); li != s.end(); ++li) {
1254	const string& line = *li;
1255	size_t start = 0;
1256
1257	while (1) {
1258	size_t found = line.find("LINK{", start);
1259	if (found == string::npos) break;
1260	found += 5;
1261	size_t close = line.find('}', found);
1262	if (close == string::npos) break;
1263
1264	string link_target = line.substr(found, close-found);
1265
1266	if (link_target.find("http://") == string::npos &&
1267	link_target.find("ftp://") == string::npos &&
1268	link_target.find("file://") == string::npos &&
1269	link_target.find('@') == string::npos)
1270	{
1271	try {
1272	check_duplicates(link_target, "SUB", references, true); // check only sublinks here
1273	check_duplicates(link_target, "UP", uplinks, false); // check only sublinks here
1274	check_duplicates(link_target, "AUTO-SUB", auto_references, false); // check only sublinks here
1275	auto_references.push_back(Link(link_target, sec.StartLineno()));
1276	}
1277	catch (string& err) {
1278	; // silently ignore inlined
1279	}
1280	}
1281	start = close+1;
1282	}
1283	}
1284	}
1285	catch (string& err) {
1286	throw LineAttachedMessage(string("'"+err+"' while scanning LINK{} in SECTION '"+named_sec->getName()+'\''),
1287	sec.StartLineno());
1288	}
1289	}
1290	}
1291
1292	static void show_err(const string& err, size_t lineno, const string& helpfile) {
1293	if (err.find(helpfile+':') != string::npos) {
1294	cerr << err;
1295	}
1296	else if (lineno == -1U) {
1297	cerr << helpfile << ":1: [in unknown line] " << err;
1298	}
1299	else {
1300	cerr << helpfile << ":" << lineno << ": " << err;
1301	}
1302	cerr << '\n';
1303	}
1304	static void show_err(const LineAttachedMessage& line_err, const string& helpfile) {
1305	show_err(line_err.Message(), line_err.Lineno(), helpfile);
1306	}
1307
1308	static void show_warnings(const string& helpfile) {
1309	for (list<LineAttachedMessage>::const_iterator wi = warnings.begin(); wi != warnings.end(); ++wi) {
1310	show_err(*wi, helpfile);
1311	}
1312	}
1313
1314	static void show_warnings_and_error(const LineAttachedMessage& error, const string& helpfile) {
1315	show_warnings(helpfile);
1316	show_err(error, helpfile);
1317	}
1318
1319	static void show_warnings_and_error(const string& error, const string& helpfile) {
1320	show_warnings_and_error(LineAttachedMessage(error, -1U), helpfile);
1321	}
1322
1323	int ARB_main(int argc, const char *argv[]) {
1324	Helpfile help;
1325	string arb_help;
1326
1327	try {
1328	if (argc != 3) {
1329	cerr << "Usage: arb_help2xml <ARB helpfile> <XML output>\n";
1330	return EXIT_FAILURE;
1331	}
1332
1333	arb_help = argv[1];
1334	string xml_output = argv[2];
1335
1336	{
1337	ifstream in(arb_help.c_str());
1338	help.readHelp(in, arb_help);
1339	}
1340
1341	help.extractInternalLinks();
1342
1343	{
1344	FILE *out = std::fopen(xml_output.c_str(), "wt");
1345	if (!out) throw string("Can't open '")+xml_output+'\'';
1346
1347	try {
1348	// arb_help contains 'oldhelp/name.hlp'
1349	size_t slash = arb_help.find('/');
1350	size_t dot = arb_help.find_last_of('.');
1351
1352	if (slash == string::npos \|\| dot == string::npos) {
1353	throw string("parameter <ARB helpfile> has to be in format 'oldhelp/name.hlp' (not '"+arb_help+"')");
1354	}
1355
1356	string page_name(arb_help, slash+1, dot-slash-1);
1357	help.writeXML(out, page_name);
1358	fclose(out);
1359	}
1360	catch (...) {
1361	fclose(out);
1362	remove(xml_output.c_str());
1363	throw;
1364	}
1365	}
1366
1367	show_warnings(arb_help);
1368
1369	return EXIT_SUCCESS;
1370	}
1371	catch (LineAttachedMessage& err) { show_warnings_and_error(err, arb_help); }
1372	catch (string& err) { show_warnings_and_error(err, arb_help); }
1373	catch (const char * err) { show_warnings_and_error(err, arb_help); }
1374	catch (...) { show_warnings_and_error("unknown exception in arb_help2xml", arb_help); }
1375
1376	return EXIT_FAILURE;
1377	}
1378
1379
1380	// --------------------------------------------------------------------------------
1381
1382	#ifdef UNIT_TESTS
1383	#include <test_unit.h>
1384
1385	void TEST_hlp2xml_conversion() {
1386	// oldhelp/ad_align.hlp
1387	string arb_help = "../../HELP_SOURCE/oldhelp/ad_align.hlp";
1388	ifstream in(arb_help.c_str());
1389
1390	Helpfile help;
1391	help.readHelp(in, arb_help);
1392
1393	Section title = help.get_title();
1394	const Strings& strings = title.Content();
1395
1396	TEST_ASSERT_EQUAL(strings.front().c_str(), "Alignment Administration");
1397	TEST_ASSERT(strings.size() == 1);
1398	}
1399
1400	#endif // UNIT_TESTS

Note: See TracBrowser for help on using the repository browser.

Download in other formats: