| 1 | #include "fun.h" |
|---|
| 2 | #include "global.h" |
|---|
| 3 | |
|---|
| 4 | #include <time.h> |
|---|
| 5 | #include <sys/time.h> |
|---|
| 6 | |
|---|
| 7 | #define SIZE 128 // default buffer size for generated dates |
|---|
| 8 | |
|---|
| 9 | static const char *ERROR_DATE = "\?\?-\?\?\?-\?\?\?\?"; |
|---|
| 10 | |
|---|
| 11 | static const char *MON[12] = { |
|---|
| 12 | "JAN", "FEB", "MAR", |
|---|
| 13 | "APR", "MAY", "JUN", |
|---|
| 14 | "JUL", "AUG", "SEP", |
|---|
| 15 | "OCT", "NOV", "DEC" |
|---|
| 16 | }; |
|---|
| 17 | |
|---|
| 18 | static const char *Month[12] = { |
|---|
| 19 | "January", "February", "March", |
|---|
| 20 | "April", "May", "June", |
|---|
| 21 | "July", "August", "September", |
|---|
| 22 | "October", "November", "December" |
|---|
| 23 | }; |
|---|
| 24 | |
|---|
| 25 | static unsigned char days_in_month[12+1] = { |
|---|
| 26 | 0xFF, |
|---|
| 27 | 31, 29, 31, 30, 31, 30, |
|---|
| 28 | 31, 31, 30, 31, 30, 31 |
|---|
| 29 | }; |
|---|
| 30 | |
|---|
| 31 | inline bool two_char(const char *str, char determ) { |
|---|
| 32 | // Return true if Str has two determinator char. |
|---|
| 33 | int count = 0; |
|---|
| 34 | for (const char *d = strchr(str, determ); d; d = strchr(d+1, determ)) count++; |
|---|
| 35 | return count; |
|---|
| 36 | } |
|---|
| 37 | |
|---|
| 38 | inline int ismonth(const char *str) { |
|---|
| 39 | // Return [1..12] if the char Str is one of 12 months. Case insensitive. |
|---|
| 40 | for (int i = 0; i<12; i++) { |
|---|
| 41 | if (str_iequal(str, MON[i])) { |
|---|
| 42 | return i+1; |
|---|
| 43 | } |
|---|
| 44 | } |
|---|
| 45 | return 0; |
|---|
| 46 | } |
|---|
| 47 | |
|---|
| 48 | |
|---|
| 49 | __ATTR__USERESULT static bool find_date(const char *date_string, int *month, int *day, int *year) { |
|---|
| 50 | // Find day, month, year from date Str. |
|---|
| 51 | char determ = ' '; |
|---|
| 52 | if (two_char(date_string, '.')) determ = '.'; |
|---|
| 53 | else if (two_char(date_string, '/')) determ = '/'; |
|---|
| 54 | else if (two_char(date_string, '-')) determ = '-'; |
|---|
| 55 | |
|---|
| 56 | if (determ == ' ') return false; |
|---|
| 57 | |
|---|
| 58 | char token[20]; |
|---|
| 59 | int nums[3] = { 0, 0, 0 }; |
|---|
| 60 | int count = 0; |
|---|
| 61 | int index = 0; |
|---|
| 62 | |
|---|
| 63 | int len = str0len(date_string); |
|---|
| 64 | for (int indi = 0; indi <= len; indi++) { |
|---|
| 65 | if (date_string[indi] == determ || indi == len) { |
|---|
| 66 | token[index++] = '\0'; |
|---|
| 67 | if (count == 1) { |
|---|
| 68 | nums[count++] = ismonth(token); |
|---|
| 69 | } |
|---|
| 70 | else { |
|---|
| 71 | nums[count++] = atoi(token); |
|---|
| 72 | } |
|---|
| 73 | index = 0; |
|---|
| 74 | } |
|---|
| 75 | else { |
|---|
| 76 | token[index++] = date_string[indi]; |
|---|
| 77 | } |
|---|
| 78 | } |
|---|
| 79 | *day = nums[0]; |
|---|
| 80 | *month = nums[1]; |
|---|
| 81 | *year = nums[2]; |
|---|
| 82 | |
|---|
| 83 | return true; |
|---|
| 84 | } |
|---|
| 85 | |
|---|
| 86 | static int isdatenum(char *Str) { |
|---|
| 87 | // Return number of day or year the Str represents. |
|---|
| 88 | // If not day or year, return 0. |
|---|
| 89 | int length, num, indi; |
|---|
| 90 | |
|---|
| 91 | length = str0len(Str); |
|---|
| 92 | if (length > 4 || length < 1) |
|---|
| 93 | return 0; |
|---|
| 94 | for (indi = 0, num = 1; indi < length && num == 1; indi++) { |
|---|
| 95 | if (!isdigit(Str[indi])) { |
|---|
| 96 | num = 0; |
|---|
| 97 | } |
|---|
| 98 | } |
|---|
| 99 | if (num == 1) |
|---|
| 100 | num = atoi(Str); |
|---|
| 101 | return num; |
|---|
| 102 | } |
|---|
| 103 | |
|---|
| 104 | class SetOnce { |
|---|
| 105 | int num_; |
|---|
| 106 | bool set_; |
|---|
| 107 | bool is_set() const { return set_; } |
|---|
| 108 | public: |
|---|
| 109 | SetOnce() : num_(-1), set_(false) {} |
|---|
| 110 | |
|---|
| 111 | bool operator!() const { return !set_; } |
|---|
| 112 | |
|---|
| 113 | int value() const { ca_assert(is_set()); return num_; } |
|---|
| 114 | void set(int val) { ca_assert(!is_set()); num_ = val; set_ = true; } |
|---|
| 115 | void replace(int val) { ca_assert(is_set()); num_ = val; } |
|---|
| 116 | }; |
|---|
| 117 | |
|---|
| 118 | __ATTR__USERESULT static bool find_date_long_form(const char *date_string, int *monthPtr, int *dayPtr, int *yearPtr) { |
|---|
| 119 | // Find day, month, year in the long term date Str like day-of-week, month, day, time, year. |
|---|
| 120 | |
|---|
| 121 | int length = str0len(date_string); |
|---|
| 122 | SetOnce day, month, year; |
|---|
| 123 | |
|---|
| 124 | char token[SIZE]; |
|---|
| 125 | for (int indi = 0, index = 0; index <= length; index++) { |
|---|
| 126 | if (index == length || isspace(date_string[index]) || strchr("(),", date_string[index])) { |
|---|
| 127 | if (indi == 0) continue; // empty token |
|---|
| 128 | token[indi] = '\0'; |
|---|
| 129 | |
|---|
| 130 | int num = ismonth(token); |
|---|
| 131 | if (num>0) { |
|---|
| 132 | if (!month) month.set(num); |
|---|
| 133 | else if (!day) { |
|---|
| 134 | day.set(month.value()); // day has been misinterpreted as month |
|---|
| 135 | month.replace(num); |
|---|
| 136 | } |
|---|
| 137 | } |
|---|
| 138 | else if ((num = isdatenum(token)) > 0) { |
|---|
| 139 | if (!month && num <= 12) { month.set(num); } |
|---|
| 140 | else if (!day && num <= 31) { day.set(num); } |
|---|
| 141 | else if (!year) { year.set(num); } |
|---|
| 142 | } |
|---|
| 143 | indi = 0; |
|---|
| 144 | } |
|---|
| 145 | else token[indi++] = date_string[index]; |
|---|
| 146 | } |
|---|
| 147 | |
|---|
| 148 | if (!day || !month || !year || |
|---|
| 149 | day.value()>days_in_month[month.value()]) return false; |
|---|
| 150 | |
|---|
| 151 | *monthPtr = month.value(); |
|---|
| 152 | *dayPtr = day.value(); |
|---|
| 153 | *yearPtr = year.value(); |
|---|
| 154 | |
|---|
| 155 | return true; |
|---|
| 156 | } |
|---|
| 157 | |
|---|
| 158 | inline bool is_genbank_date(const char *str) { |
|---|
| 159 | // Return true if it is genbank form of date, |
|---|
| 160 | // which is day(2 digits)-MONTH(in letters)-year(4 digits). |
|---|
| 161 | return str0len(str) >= 11 && str[2] == '-' && str[6] == '-'; |
|---|
| 162 | } |
|---|
| 163 | |
|---|
| 164 | const char *genbank_date(const char *other_date) { |
|---|
| 165 | // Convert the date to be in genbank date form. |
|---|
| 166 | const char *result; |
|---|
| 167 | int length = str0len(other_date); |
|---|
| 168 | |
|---|
| 169 | if (other_date[length - 1] == '\n') { |
|---|
| 170 | char *dup = nulldup(other_date); |
|---|
| 171 | dup[--length] = '\0'; |
|---|
| 172 | result = genbank_date(dup); |
|---|
| 173 | free(dup); |
|---|
| 174 | } |
|---|
| 175 | else { |
|---|
| 176 | static char gdate[SIZE]; |
|---|
| 177 | gdate[0] = 0; |
|---|
| 178 | |
|---|
| 179 | int day = -1, month = -1, year = -1; |
|---|
| 180 | bool ok = false; |
|---|
| 181 | if (length > 10) { |
|---|
| 182 | if (is_genbank_date(other_date)) { |
|---|
| 183 | strncpy(gdate, other_date, 11); |
|---|
| 184 | gdate[11] = 0; |
|---|
| 185 | ok = true; |
|---|
| 186 | } |
|---|
| 187 | else ok = find_date_long_form(other_date, &month, &day, &year); |
|---|
| 188 | } |
|---|
| 189 | |
|---|
| 190 | if (!ok) ok = find_date(other_date, &month, &day, &year); |
|---|
| 191 | |
|---|
| 192 | if (!ok) { |
|---|
| 193 | warningf(146, "Unknown date format: %s, cannot convert.", other_date); |
|---|
| 194 | strcpy(gdate, ERROR_DATE); |
|---|
| 195 | } |
|---|
| 196 | |
|---|
| 197 | if (!gdate[0]) { |
|---|
| 198 | if (day <= 0 || month <= 0 || year <= 0 || month > 12 || day > days_in_month[month]) { |
|---|
| 199 | warningf(147, "Wrong date format: %s", other_date); |
|---|
| 200 | strcpy(gdate, ERROR_DATE); |
|---|
| 201 | } |
|---|
| 202 | else { |
|---|
| 203 | if (year<100) year += 1900; |
|---|
| 204 | sprintf(gdate, "%02d-%s-%d", day, MON[month - 1], year); |
|---|
| 205 | } |
|---|
| 206 | } |
|---|
| 207 | |
|---|
| 208 | ca_assert(gdate[0]); |
|---|
| 209 | result = gdate; |
|---|
| 210 | } |
|---|
| 211 | return result; |
|---|
| 212 | } |
|---|
| 213 | |
|---|
| 214 | const char *today_date() { |
|---|
| 215 | // Get today's date. |
|---|
| 216 | static char line[SIZE] = ""; |
|---|
| 217 | if (!line[0]) { |
|---|
| 218 | struct timeval tp; |
|---|
| 219 | struct timezone tzp; |
|---|
| 220 | (void)gettimeofday(&tp, &tzp); |
|---|
| 221 | |
|---|
| 222 | strcpy(line, ctime(&(tp.tv_sec))); |
|---|
| 223 | |
|---|
| 224 | int len = strlen(line); |
|---|
| 225 | if (line[len-1] == '\n') { |
|---|
| 226 | line[len-1] = 0; |
|---|
| 227 | } |
|---|
| 228 | } |
|---|
| 229 | return line; |
|---|
| 230 | } |
|---|
| 231 | |
|---|
| 232 | const char *gcg_date(const char *input) { |
|---|
| 233 | // Create gcg format of date. |
|---|
| 234 | static char date[2*SIZE]; |
|---|
| 235 | |
|---|
| 236 | ca_assert(strlen(input) >= 8); |
|---|
| 237 | |
|---|
| 238 | const int MONTH_POS = 4; |
|---|
| 239 | const int MONTH_LEN = 3; |
|---|
| 240 | const int DAY_POS = MONTH_POS+MONTH_LEN+1; |
|---|
| 241 | |
|---|
| 242 | const char *monthname = ""; |
|---|
| 243 | { |
|---|
| 244 | char part[MONTH_LEN+1]; |
|---|
| 245 | memcpy(part, input+MONTH_POS, MONTH_LEN); |
|---|
| 246 | part[MONTH_LEN] = 0; |
|---|
| 247 | |
|---|
| 248 | int month = ismonth(part); |
|---|
| 249 | if (month) monthname = Month[month-1]; |
|---|
| 250 | } |
|---|
| 251 | |
|---|
| 252 | char time[SIZE]; |
|---|
| 253 | int day, year; |
|---|
| 254 | IF_ASSERTION_USED(int scanned = ) |
|---|
| 255 | sscanf(input+DAY_POS, "%d %s %d", &day, time, &year); |
|---|
| 256 | ca_assert(scanned == 3); |
|---|
| 257 | |
|---|
| 258 | sprintf(date, "%s %d, %d %s", monthname, day, year, time); |
|---|
| 259 | return date; |
|---|
| 260 | } |
|---|
| 261 | |
|---|
| 262 | // -------------------------------------------------------------------------------- |
|---|
| 263 | |
|---|
| 264 | #ifdef UNIT_TESTS |
|---|
| 265 | #include <test_unit.h> |
|---|
| 266 | |
|---|
| 267 | #define TEST_EXPECT_CONVERT(input,expect,CONVERT,ASSERTION) ASSERTION(CONVERT(input), expect); |
|---|
| 268 | |
|---|
| 269 | #define TEST_EXPECT_GENBANK_DATE(input,expect) TEST_EXPECT_CONVERT(input, expect, genbank_date, TEST_EXPECT_EQUAL) |
|---|
| 270 | #define TEST_EXPECT_GENBANK_DATE__BROKEN(input,expect) TEST_EXPECT_CONVERT(input, expect, genbank_date, TEST_EXPECT_EQUAL__BROKEN) |
|---|
| 271 | #define TEST_EXPECT_GCG_DATE(input,expect) TEST_EXPECT_CONVERT(input, expect, gcg_date, TEST_EXPECT_EQUAL) |
|---|
| 272 | #define TEST_EXPECT_GCG_DATE__BROKEN(input,expect) TEST_EXPECT_CONVERT(input, expect, gcg_date, TEST_EXPECT_EQUAL__BROKEN) |
|---|
| 273 | |
|---|
| 274 | #define TEST_EXPECT_INVALID_ANYDATE(input,finder) \ |
|---|
| 275 | do { \ |
|---|
| 276 | int day_, month_, year_; \ |
|---|
| 277 | ASSERT_RESULT(bool, false, \ |
|---|
| 278 | finder(input, &month_, &day_, &year_)); \ |
|---|
| 279 | } while(0) |
|---|
| 280 | |
|---|
| 281 | #define TEST_EXPECT_INVALID_LONGDATE(input) TEST_EXPECT_INVALID_ANYDATE(input, find_date_long_form) |
|---|
| 282 | |
|---|
| 283 | #define TEST_EXPECT_FIND_ANYDATE(input,d,m,y,finder) \ |
|---|
| 284 | do { \ |
|---|
| 285 | char *dup_ = ARB_strdup(input); \ |
|---|
| 286 | int day_, month_, year_; \ |
|---|
| 287 | TEST_EXPECT(finder(dup_, &month_, &day_, &year_)); \ |
|---|
| 288 | TEST_EXPECT_EQUAL(day_, d); \ |
|---|
| 289 | TEST_EXPECT_EQUAL(month_, m); \ |
|---|
| 290 | TEST_EXPECT_EQUAL(year_, y); \ |
|---|
| 291 | free(dup_); \ |
|---|
| 292 | } while (0) |
|---|
| 293 | |
|---|
| 294 | #define TEST_EXPECT_FIND_____DATE(input,d,m,y) TEST_EXPECT_FIND_ANYDATE(input, d, m, y, find_date) |
|---|
| 295 | #define TEST_EXPECT_FIND_LONGDATE(input,d,m,y) TEST_EXPECT_FIND_ANYDATE(input, d, m, y, find_date_long_form) |
|---|
| 296 | |
|---|
| 297 | // #define TEST_EXPECT_FIND_DATE(str,d,m,y) TEST_EXPECT_FIND_DATE_IMPL(str,d,m,y,TEST_EXPECT_EQUAL) |
|---|
| 298 | |
|---|
| 299 | __ATTR__REDUCED_OPTIMIZE void TEST_BASIC_conv_date() { |
|---|
| 300 | TEST_EXPECT_EQUAL(ismonth("Apr"), 4); |
|---|
| 301 | |
|---|
| 302 | TEST_EXPECT_FIND_____DATE("19-APR-99", 19, 4, 99); |
|---|
| 303 | TEST_EXPECT_FIND_____DATE("22-JUN-65", 22, 6, 65); |
|---|
| 304 | TEST_EXPECT_FIND_____DATE("5-SEP-10", 5, 9, 10); |
|---|
| 305 | TEST_EXPECT_FIND_____DATE("05-SEP-10", 5, 9, 10); |
|---|
| 306 | |
|---|
| 307 | TEST_EXPECT_FIND_____DATE("19-APR-1999", 19, 4, 1999); |
|---|
| 308 | TEST_EXPECT_FIND_____DATE("22-JUN-1965", 22, 6, 1965); // test date b4 epoch |
|---|
| 309 | TEST_EXPECT_FIND_____DATE("5-SEP-2010", 5, 9, 2010); |
|---|
| 310 | TEST_EXPECT_FIND_____DATE("05-SEP-2010", 5, 9, 2010); |
|---|
| 311 | |
|---|
| 312 | // -------------------- |
|---|
| 313 | |
|---|
| 314 | TEST_EXPECT_FIND_LONGDATE("05 Sep 2010", 5, 9, 2010); |
|---|
| 315 | TEST_EXPECT_FIND_LONGDATE("Sep, 05 2010", 5, 9, 2010); |
|---|
| 316 | TEST_EXPECT_FIND_LONGDATE("Sep 05 2010", 5, 9, 2010); |
|---|
| 317 | |
|---|
| 318 | TEST_EXPECT_FIND_LONGDATE("Mon Apr 19 25:46:19 CEST 99", 19, 4, 99); |
|---|
| 319 | TEST_EXPECT_FIND_LONGDATE("Tue Jun 22 05:11:00 CEST 65", 22, 6, 65); |
|---|
| 320 | TEST_EXPECT_FIND_LONGDATE("Wed Sep 5 19:46:25 CEST 10", 5, 9, 10); |
|---|
| 321 | TEST_EXPECT_FIND_LONGDATE("Wed Sep 05 19:46:25 CEST 10", 5, 9, 10); |
|---|
| 322 | |
|---|
| 323 | TEST_EXPECT_FIND_LONGDATE("Mon Apr 19 25:46:19 CEST 1999", 19, 4, 1999); |
|---|
| 324 | TEST_EXPECT_FIND_LONGDATE("Tue Jun 22 05:11:00 CEST 1965", 22, 6, 1965); |
|---|
| 325 | TEST_EXPECT_FIND_LONGDATE("Wed Sep 5 19:46:25 CEST 2010", 5, 9, 2010); |
|---|
| 326 | TEST_EXPECT_FIND_LONGDATE("Wed Sep 05 19:46:25 CEST 2010", 5, 9, 2010); |
|---|
| 327 | TEST_EXPECT_FIND_LONGDATE("Wed Sep 05 19:46:25 2010", 5, 9, 2010); |
|---|
| 328 | |
|---|
| 329 | TEST_EXPECT_FIND_LONGDATE("Sun Oct 31 08:37:14 2010", 31, 10, 2010); |
|---|
| 330 | |
|---|
| 331 | // -------------------- |
|---|
| 332 | |
|---|
| 333 | TEST_EXPECT_GENBANK_DATE("19 Apr 1999", "19-APR-1999"); |
|---|
| 334 | TEST_EXPECT_GENBANK_DATE("19-APR-1999", "19-APR-1999"); |
|---|
| 335 | TEST_EXPECT_GENBANK_DATE("22-JUN-1965", "22-JUN-1965"); |
|---|
| 336 | TEST_EXPECT_GENBANK_DATE("5-SEP-2010", "05-SEP-2010"); |
|---|
| 337 | TEST_EXPECT_GENBANK_DATE("05-SEP-2010", "05-SEP-2010"); |
|---|
| 338 | TEST_EXPECT_GENBANK_DATE("crap", ERROR_DATE); |
|---|
| 339 | |
|---|
| 340 | TEST_EXPECT_GENBANK_DATE("Mon Apr 19 25:46:19 CEST 1999", "19-APR-1999"); |
|---|
| 341 | TEST_EXPECT_GENBANK_DATE("Tue Jun 22 05:11:00 CEST 1965", "22-JUN-1965"); |
|---|
| 342 | TEST_EXPECT_GENBANK_DATE("Wed Sep 5 19:46:25 CEST 2010", "05-SEP-2010"); |
|---|
| 343 | TEST_EXPECT_GENBANK_DATE("Wed Sep 05 19:46:25 CEST 2010", "05-SEP-2010"); |
|---|
| 344 | TEST_EXPECT_GENBANK_DATE("Wed Sep 31 19:46:25 CEST 2010", ERROR_DATE); |
|---|
| 345 | |
|---|
| 346 | TEST_EXPECT_GENBANK_DATE("Sun Oct 31 08:37:14 2010", "31-OCT-2010"); |
|---|
| 347 | TEST_EXPECT_GENBANK_DATE("Sun 10 31 08:37:14 2010", "31-OCT-2010"); |
|---|
| 348 | TEST_EXPECT_GENBANK_DATE("Sun 31 10 08:37:14 2010", "31-OCT-2010"); |
|---|
| 349 | TEST_EXPECT_GENBANK_DATE("Sun Oct 32 08:37:14 2010", ERROR_DATE); |
|---|
| 350 | |
|---|
| 351 | TEST_EXPECT_GENBANK_DATE("Fri Dec 31 08:37:14 2010", "31-DEC-2010"); |
|---|
| 352 | TEST_EXPECT_GENBANK_DATE("Fri 12 31 08:37:14 2010", "31-DEC-2010"); |
|---|
| 353 | TEST_EXPECT_GENBANK_DATE("Fri 31 12 08:37:14 2010", "31-DEC-2010"); |
|---|
| 354 | TEST_EXPECT_GENBANK_DATE("Fri 13 31 08:37:14 2010", ERROR_DATE); |
|---|
| 355 | TEST_EXPECT_GENBANK_DATE("Fri 31 13 08:37:14 2010", ERROR_DATE); |
|---|
| 356 | |
|---|
| 357 | TEST_EXPECT_GENBANK_DATE("Tue Feb 28 08:37:14 2011", "28-FEB-2011"); |
|---|
| 358 | TEST_EXPECT_GENBANK_DATE("Tue Feb 29 08:37:14 2011", "29-FEB-2011"); // existence not checked |
|---|
| 359 | TEST_EXPECT_GENBANK_DATE("Tue Feb 30 08:37:14 2011", ERROR_DATE); // existence not checked |
|---|
| 360 | |
|---|
| 361 | TEST_EXPECT_DIFFERENT(genbank_date(today_date()), ERROR_DATE); |
|---|
| 362 | |
|---|
| 363 | // -------------------- |
|---|
| 364 | |
|---|
| 365 | TEST_EXPECT_GCG_DATE("Mon Apr 19 25:46:19 99", "April 19, 99 25:46:19"); |
|---|
| 366 | |
|---|
| 367 | TEST_EXPECT_GCG_DATE("Mon Apr 19 25:46:19 1999", "April 19, 1999 25:46:19"); |
|---|
| 368 | TEST_EXPECT_GCG_DATE("Tue Jun 22 05:11:00 1965", "June 22, 1965 05:11:00"); |
|---|
| 369 | TEST_EXPECT_GCG_DATE("Wed Sep 5 19:46:25 2010", "September 5, 2010 19:46:25"); |
|---|
| 370 | TEST_EXPECT_GCG_DATE("Wed Sep 05 19:46:25 2010", "September 5, 2010 19:46:25"); |
|---|
| 371 | |
|---|
| 372 | TEST_REJECT_NULL(gcg_date(today_date())); // currently gcg_date is only used like this |
|---|
| 373 | } |
|---|
| 374 | |
|---|
| 375 | #endif // UNIT_TESTS |
|---|