1 | #include "fun.h" |
---|
2 | #include "global.h" |
---|
3 | |
---|
4 | #include <time.h> |
---|
5 | #include <sys/time.h> |
---|
6 | |
---|
7 | #define SIZE 128 // default buffer size for generated dates |
---|
8 | |
---|
9 | static const char *ERROR_DATE = "\?\?-\?\?\?-\?\?\?\?"; |
---|
10 | |
---|
11 | static const char *MON[12] = { |
---|
12 | "JAN", "FEB", "MAR", |
---|
13 | "APR", "MAY", "JUN", |
---|
14 | "JUL", "AUG", "SEP", |
---|
15 | "OCT", "NOV", "DEC" |
---|
16 | }; |
---|
17 | |
---|
18 | static const char *Month[12] = { |
---|
19 | "January", "February", "March", |
---|
20 | "April", "May", "June", |
---|
21 | "July", "August", "September", |
---|
22 | "October", "November", "December" |
---|
23 | }; |
---|
24 | |
---|
25 | static unsigned char days_in_month[12+1] = { |
---|
26 | 0xFF, |
---|
27 | 31, 29, 31, 30, 31, 30, |
---|
28 | 31, 31, 30, 31, 30, 31 |
---|
29 | }; |
---|
30 | |
---|
31 | inline bool two_char(const char *str, char determ) { |
---|
32 | // Return true if Str has two determinator char. |
---|
33 | int count = 0; |
---|
34 | for (const char *d = strchr(str, determ); d; d = strchr(d+1, determ)) count++; |
---|
35 | return count; |
---|
36 | } |
---|
37 | |
---|
38 | inline int ismonth(const char *str) { |
---|
39 | // Return [1..12] if the char Str is one of 12 months. Case insensitive. |
---|
40 | for (int i = 0; i<12; i++) { |
---|
41 | if (str_iequal(str, MON[i])) { |
---|
42 | return i+1; |
---|
43 | } |
---|
44 | } |
---|
45 | return 0; |
---|
46 | } |
---|
47 | |
---|
48 | |
---|
49 | __ATTR__USERESULT static bool find_date(const char *date_string, int *month, int *day, int *year) { |
---|
50 | // Find day, month, year from date Str. |
---|
51 | char determ = ' '; |
---|
52 | if (two_char(date_string, '.')) determ = '.'; |
---|
53 | else if (two_char(date_string, '/')) determ = '/'; |
---|
54 | else if (two_char(date_string, '-')) determ = '-'; |
---|
55 | |
---|
56 | if (determ == ' ') return false; |
---|
57 | |
---|
58 | char token[20]; |
---|
59 | int nums[3] = { 0, 0, 0 }; |
---|
60 | int count = 0; |
---|
61 | int index = 0; |
---|
62 | |
---|
63 | int len = str0len(date_string); |
---|
64 | for (int indi = 0; indi <= len; indi++) { |
---|
65 | if (date_string[indi] == determ || indi == len) { |
---|
66 | token[index++] = '\0'; |
---|
67 | if (count == 1) { |
---|
68 | nums[count++] = ismonth(token); |
---|
69 | } |
---|
70 | else { |
---|
71 | nums[count++] = atoi(token); |
---|
72 | } |
---|
73 | index = 0; |
---|
74 | } |
---|
75 | else { |
---|
76 | token[index++] = date_string[indi]; |
---|
77 | } |
---|
78 | } |
---|
79 | *day = nums[0]; |
---|
80 | *month = nums[1]; |
---|
81 | *year = nums[2]; |
---|
82 | |
---|
83 | return true; |
---|
84 | } |
---|
85 | |
---|
86 | static int isdatenum(char *Str) { |
---|
87 | // Return number of day or year the Str represents. |
---|
88 | // If not day or year, return 0. |
---|
89 | int length, num, indi; |
---|
90 | |
---|
91 | length = str0len(Str); |
---|
92 | if (length > 4 || length < 1) |
---|
93 | return 0; |
---|
94 | for (indi = 0, num = 1; indi < length && num == 1; indi++) { |
---|
95 | if (!isdigit(Str[indi])) { |
---|
96 | num = 0; |
---|
97 | } |
---|
98 | } |
---|
99 | if (num == 1) |
---|
100 | num = atoi(Str); |
---|
101 | return num; |
---|
102 | } |
---|
103 | |
---|
104 | class SetOnce { |
---|
105 | int num_; |
---|
106 | bool set_; |
---|
107 | bool is_set() const { return set_; } |
---|
108 | public: |
---|
109 | SetOnce() : num_(-1), set_(false) {} |
---|
110 | |
---|
111 | bool operator!() const { return !set_; } |
---|
112 | |
---|
113 | int value() const { ca_assert(is_set()); return num_; } |
---|
114 | void set(int val) { ca_assert(!is_set()); num_ = val; set_ = true; } |
---|
115 | void replace(int val) { ca_assert(is_set()); num_ = val; } |
---|
116 | }; |
---|
117 | |
---|
118 | __ATTR__USERESULT static bool find_date_long_form(const char *date_string, int *monthPtr, int *dayPtr, int *yearPtr) { |
---|
119 | // Find day, month, year in the long term date Str like day-of-week, month, day, time, year. |
---|
120 | |
---|
121 | int length = str0len(date_string); |
---|
122 | SetOnce day, month, year; |
---|
123 | |
---|
124 | char token[SIZE]; |
---|
125 | for (int indi = 0, index = 0; index <= length; index++) { |
---|
126 | if (index == length || isspace(date_string[index]) || strchr("(),", date_string[index])) { |
---|
127 | if (indi == 0) continue; // empty token |
---|
128 | token[indi] = '\0'; |
---|
129 | |
---|
130 | int num = ismonth(token); |
---|
131 | if (num>0) { |
---|
132 | if (!month) month.set(num); |
---|
133 | else if (!day) { |
---|
134 | day.set(month.value()); // day has been misinterpreted as month |
---|
135 | month.replace(num); |
---|
136 | } |
---|
137 | } |
---|
138 | else if ((num = isdatenum(token)) > 0) { |
---|
139 | if (!month && num <= 12) { month.set(num); } |
---|
140 | else if (!day && num <= 31) { day.set(num); } |
---|
141 | else if (!year) { year.set(num); } |
---|
142 | } |
---|
143 | indi = 0; |
---|
144 | } |
---|
145 | else token[indi++] = date_string[index]; |
---|
146 | } |
---|
147 | |
---|
148 | if (!day || !month || !year || |
---|
149 | day.value()>days_in_month[month.value()]) return false; |
---|
150 | |
---|
151 | *monthPtr = month.value(); |
---|
152 | *dayPtr = day.value(); |
---|
153 | *yearPtr = year.value(); |
---|
154 | |
---|
155 | return true; |
---|
156 | } |
---|
157 | |
---|
158 | inline bool is_genbank_date(const char *str) { |
---|
159 | // Return true if it is genbank form of date, |
---|
160 | // which is day(2 digits)-MONTH(in letters)-year(4 digits). |
---|
161 | return str0len(str) >= 11 && str[2] == '-' && str[6] == '-'; |
---|
162 | } |
---|
163 | |
---|
164 | const char *genbank_date(const char *other_date) { |
---|
165 | // Convert the date to be in genbank date form. |
---|
166 | const char *result; |
---|
167 | int length = str0len(other_date); |
---|
168 | |
---|
169 | if (other_date[length - 1] == '\n') { |
---|
170 | char *dup = nulldup(other_date); |
---|
171 | dup[--length] = '\0'; |
---|
172 | result = genbank_date(dup); |
---|
173 | free(dup); |
---|
174 | } |
---|
175 | else { |
---|
176 | static char gdate[SIZE]; |
---|
177 | gdate[0] = 0; |
---|
178 | |
---|
179 | int day = -1, month = -1, year = -1; |
---|
180 | bool ok = false; |
---|
181 | if (length > 10) { |
---|
182 | if (is_genbank_date(other_date)) { |
---|
183 | strncpy(gdate, other_date, 11); |
---|
184 | gdate[11] = 0; |
---|
185 | ok = true; |
---|
186 | } |
---|
187 | else ok = find_date_long_form(other_date, &month, &day, &year); |
---|
188 | } |
---|
189 | |
---|
190 | if (!ok) ok = find_date(other_date, &month, &day, &year); |
---|
191 | |
---|
192 | if (!ok) { |
---|
193 | warningf(146, "Unknown date format: %s, cannot convert.", other_date); |
---|
194 | strcpy(gdate, ERROR_DATE); |
---|
195 | } |
---|
196 | |
---|
197 | if (!gdate[0]) { |
---|
198 | if (day <= 0 || month <= 0 || year <= 0 || month > 12 || day > days_in_month[month]) { |
---|
199 | warningf(147, "Wrong date format: %s", other_date); |
---|
200 | strcpy(gdate, ERROR_DATE); |
---|
201 | } |
---|
202 | else { |
---|
203 | if (year<100) year += 1900; |
---|
204 | sprintf(gdate, "%02d-%s-%d", day, MON[month - 1], year); |
---|
205 | } |
---|
206 | } |
---|
207 | |
---|
208 | ca_assert(gdate[0]); |
---|
209 | result = gdate; |
---|
210 | } |
---|
211 | return result; |
---|
212 | } |
---|
213 | |
---|
214 | const char *today_date() { |
---|
215 | // Get today's date. |
---|
216 | static char line[SIZE] = ""; |
---|
217 | if (!line[0]) { |
---|
218 | struct timeval tp; |
---|
219 | struct timezone tzp; |
---|
220 | (void)gettimeofday(&tp, &tzp); |
---|
221 | |
---|
222 | strcpy(line, ctime(&(tp.tv_sec))); |
---|
223 | |
---|
224 | int len = strlen(line); |
---|
225 | if (line[len-1] == '\n') { |
---|
226 | line[len-1] = 0; |
---|
227 | } |
---|
228 | } |
---|
229 | return line; |
---|
230 | } |
---|
231 | |
---|
232 | const char *gcg_date(const char *input) { |
---|
233 | // Create gcg format of date. |
---|
234 | static char date[2*SIZE]; |
---|
235 | |
---|
236 | ca_assert(strlen(input) >= 8); |
---|
237 | |
---|
238 | const int MONTH_POS = 4; |
---|
239 | const int MONTH_LEN = 3; |
---|
240 | const int DAY_POS = MONTH_POS+MONTH_LEN+1; |
---|
241 | |
---|
242 | const char *monthname = ""; |
---|
243 | { |
---|
244 | char part[MONTH_LEN+1]; |
---|
245 | memcpy(part, input+MONTH_POS, MONTH_LEN); |
---|
246 | part[MONTH_LEN] = 0; |
---|
247 | |
---|
248 | int month = ismonth(part); |
---|
249 | if (month) monthname = Month[month-1]; |
---|
250 | } |
---|
251 | |
---|
252 | char time[SIZE]; |
---|
253 | int day, year; |
---|
254 | IF_ASSERTION_USED(int scanned = ) |
---|
255 | sscanf(input+DAY_POS, "%d %s %d", &day, time, &year); |
---|
256 | ca_assert(scanned == 3); |
---|
257 | |
---|
258 | sprintf(date, "%s %d, %d %s", monthname, day, year, time); |
---|
259 | return date; |
---|
260 | } |
---|
261 | |
---|
262 | // -------------------------------------------------------------------------------- |
---|
263 | |
---|
264 | #ifdef UNIT_TESTS |
---|
265 | #include <test_unit.h> |
---|
266 | |
---|
267 | #define TEST_EXPECT_CONVERT(input,expect,CONVERT,ASSERTION) ASSERTION(CONVERT(input), expect); |
---|
268 | |
---|
269 | #define TEST_EXPECT_GENBANK_DATE(input,expect) TEST_EXPECT_CONVERT(input, expect, genbank_date, TEST_EXPECT_EQUAL) |
---|
270 | #define TEST_EXPECT_GENBANK_DATE__BROKEN(input,expect) TEST_EXPECT_CONVERT(input, expect, genbank_date, TEST_EXPECT_EQUAL__BROKEN) |
---|
271 | #define TEST_EXPECT_GCG_DATE(input,expect) TEST_EXPECT_CONVERT(input, expect, gcg_date, TEST_EXPECT_EQUAL) |
---|
272 | #define TEST_EXPECT_GCG_DATE__BROKEN(input,expect) TEST_EXPECT_CONVERT(input, expect, gcg_date, TEST_EXPECT_EQUAL__BROKEN) |
---|
273 | |
---|
274 | #define TEST_EXPECT_INVALID_ANYDATE(input,finder) \ |
---|
275 | do { \ |
---|
276 | int day_, month_, year_; \ |
---|
277 | ASSERT_RESULT(bool, false, \ |
---|
278 | finder(input, &month_, &day_, &year_)); \ |
---|
279 | } while(0) |
---|
280 | |
---|
281 | #define TEST_EXPECT_INVALID_LONGDATE(input) TEST_EXPECT_INVALID_ANYDATE(input, find_date_long_form) |
---|
282 | |
---|
283 | #define TEST_EXPECT_FIND_ANYDATE(input,d,m,y,finder) \ |
---|
284 | do { \ |
---|
285 | char *dup_ = ARB_strdup(input); \ |
---|
286 | int day_, month_, year_; \ |
---|
287 | TEST_EXPECT(finder(dup_, &month_, &day_, &year_)); \ |
---|
288 | TEST_EXPECT_EQUAL(day_, d); \ |
---|
289 | TEST_EXPECT_EQUAL(month_, m); \ |
---|
290 | TEST_EXPECT_EQUAL(year_, y); \ |
---|
291 | free(dup_); \ |
---|
292 | } while (0) |
---|
293 | |
---|
294 | #define TEST_EXPECT_FIND_____DATE(input,d,m,y) TEST_EXPECT_FIND_ANYDATE(input, d, m, y, find_date) |
---|
295 | #define TEST_EXPECT_FIND_LONGDATE(input,d,m,y) TEST_EXPECT_FIND_ANYDATE(input, d, m, y, find_date_long_form) |
---|
296 | |
---|
297 | // #define TEST_EXPECT_FIND_DATE(str,d,m,y) TEST_EXPECT_FIND_DATE_IMPL(str,d,m,y,TEST_EXPECT_EQUAL) |
---|
298 | |
---|
299 | __ATTR__REDUCED_OPTIMIZE void TEST_BASIC_conv_date() { |
---|
300 | TEST_EXPECT_EQUAL(ismonth("Apr"), 4); |
---|
301 | |
---|
302 | TEST_EXPECT_FIND_____DATE("19-APR-99", 19, 4, 99); |
---|
303 | TEST_EXPECT_FIND_____DATE("22-JUN-65", 22, 6, 65); |
---|
304 | TEST_EXPECT_FIND_____DATE("5-SEP-10", 5, 9, 10); |
---|
305 | TEST_EXPECT_FIND_____DATE("05-SEP-10", 5, 9, 10); |
---|
306 | |
---|
307 | TEST_EXPECT_FIND_____DATE("19-APR-1999", 19, 4, 1999); |
---|
308 | TEST_EXPECT_FIND_____DATE("22-JUN-1965", 22, 6, 1965); // test date b4 epoch |
---|
309 | TEST_EXPECT_FIND_____DATE("5-SEP-2010", 5, 9, 2010); |
---|
310 | TEST_EXPECT_FIND_____DATE("05-SEP-2010", 5, 9, 2010); |
---|
311 | |
---|
312 | // -------------------- |
---|
313 | |
---|
314 | TEST_EXPECT_FIND_LONGDATE("05 Sep 2010", 5, 9, 2010); |
---|
315 | TEST_EXPECT_FIND_LONGDATE("Sep, 05 2010", 5, 9, 2010); |
---|
316 | TEST_EXPECT_FIND_LONGDATE("Sep 05 2010", 5, 9, 2010); |
---|
317 | |
---|
318 | TEST_EXPECT_FIND_LONGDATE("Mon Apr 19 25:46:19 CEST 99", 19, 4, 99); |
---|
319 | TEST_EXPECT_FIND_LONGDATE("Tue Jun 22 05:11:00 CEST 65", 22, 6, 65); |
---|
320 | TEST_EXPECT_FIND_LONGDATE("Wed Sep 5 19:46:25 CEST 10", 5, 9, 10); |
---|
321 | TEST_EXPECT_FIND_LONGDATE("Wed Sep 05 19:46:25 CEST 10", 5, 9, 10); |
---|
322 | |
---|
323 | TEST_EXPECT_FIND_LONGDATE("Mon Apr 19 25:46:19 CEST 1999", 19, 4, 1999); |
---|
324 | TEST_EXPECT_FIND_LONGDATE("Tue Jun 22 05:11:00 CEST 1965", 22, 6, 1965); |
---|
325 | TEST_EXPECT_FIND_LONGDATE("Wed Sep 5 19:46:25 CEST 2010", 5, 9, 2010); |
---|
326 | TEST_EXPECT_FIND_LONGDATE("Wed Sep 05 19:46:25 CEST 2010", 5, 9, 2010); |
---|
327 | TEST_EXPECT_FIND_LONGDATE("Wed Sep 05 19:46:25 2010", 5, 9, 2010); |
---|
328 | |
---|
329 | TEST_EXPECT_FIND_LONGDATE("Sun Oct 31 08:37:14 2010", 31, 10, 2010); |
---|
330 | |
---|
331 | // -------------------- |
---|
332 | |
---|
333 | TEST_EXPECT_GENBANK_DATE("19 Apr 1999", "19-APR-1999"); |
---|
334 | TEST_EXPECT_GENBANK_DATE("19-APR-1999", "19-APR-1999"); |
---|
335 | TEST_EXPECT_GENBANK_DATE("22-JUN-1965", "22-JUN-1965"); |
---|
336 | TEST_EXPECT_GENBANK_DATE("5-SEP-2010", "05-SEP-2010"); |
---|
337 | TEST_EXPECT_GENBANK_DATE("05-SEP-2010", "05-SEP-2010"); |
---|
338 | TEST_EXPECT_GENBANK_DATE("crap", ERROR_DATE); |
---|
339 | |
---|
340 | TEST_EXPECT_GENBANK_DATE("Mon Apr 19 25:46:19 CEST 1999", "19-APR-1999"); |
---|
341 | TEST_EXPECT_GENBANK_DATE("Tue Jun 22 05:11:00 CEST 1965", "22-JUN-1965"); |
---|
342 | TEST_EXPECT_GENBANK_DATE("Wed Sep 5 19:46:25 CEST 2010", "05-SEP-2010"); |
---|
343 | TEST_EXPECT_GENBANK_DATE("Wed Sep 05 19:46:25 CEST 2010", "05-SEP-2010"); |
---|
344 | TEST_EXPECT_GENBANK_DATE("Wed Sep 31 19:46:25 CEST 2010", ERROR_DATE); |
---|
345 | |
---|
346 | TEST_EXPECT_GENBANK_DATE("Sun Oct 31 08:37:14 2010", "31-OCT-2010"); |
---|
347 | TEST_EXPECT_GENBANK_DATE("Sun 10 31 08:37:14 2010", "31-OCT-2010"); |
---|
348 | TEST_EXPECT_GENBANK_DATE("Sun 31 10 08:37:14 2010", "31-OCT-2010"); |
---|
349 | TEST_EXPECT_GENBANK_DATE("Sun Oct 32 08:37:14 2010", ERROR_DATE); |
---|
350 | |
---|
351 | TEST_EXPECT_GENBANK_DATE("Fri Dec 31 08:37:14 2010", "31-DEC-2010"); |
---|
352 | TEST_EXPECT_GENBANK_DATE("Fri 12 31 08:37:14 2010", "31-DEC-2010"); |
---|
353 | TEST_EXPECT_GENBANK_DATE("Fri 31 12 08:37:14 2010", "31-DEC-2010"); |
---|
354 | TEST_EXPECT_GENBANK_DATE("Fri 13 31 08:37:14 2010", ERROR_DATE); |
---|
355 | TEST_EXPECT_GENBANK_DATE("Fri 31 13 08:37:14 2010", ERROR_DATE); |
---|
356 | |
---|
357 | TEST_EXPECT_GENBANK_DATE("Tue Feb 28 08:37:14 2011", "28-FEB-2011"); |
---|
358 | TEST_EXPECT_GENBANK_DATE("Tue Feb 29 08:37:14 2011", "29-FEB-2011"); // existence not checked |
---|
359 | TEST_EXPECT_GENBANK_DATE("Tue Feb 30 08:37:14 2011", ERROR_DATE); // existence not checked |
---|
360 | |
---|
361 | TEST_EXPECT_DIFFERENT(genbank_date(today_date()), ERROR_DATE); |
---|
362 | |
---|
363 | // -------------------- |
---|
364 | |
---|
365 | TEST_EXPECT_GCG_DATE("Mon Apr 19 25:46:19 99", "April 19, 99 25:46:19"); |
---|
366 | |
---|
367 | TEST_EXPECT_GCG_DATE("Mon Apr 19 25:46:19 1999", "April 19, 1999 25:46:19"); |
---|
368 | TEST_EXPECT_GCG_DATE("Tue Jun 22 05:11:00 1965", "June 22, 1965 05:11:00"); |
---|
369 | TEST_EXPECT_GCG_DATE("Wed Sep 5 19:46:25 2010", "September 5, 2010 19:46:25"); |
---|
370 | TEST_EXPECT_GCG_DATE("Wed Sep 05 19:46:25 2010", "September 5, 2010 19:46:25"); |
---|
371 | |
---|
372 | TEST_REJECT_NULL(gcg_date(today_date())); // currently gcg_date is only used like this |
---|
373 | } |
---|
374 | |
---|
375 | #endif // UNIT_TESTS |
---|