1 /**
2   * datefmt provides parsing and formatting for std.datetime objects.
3   *
4   * The format is taken from strftime:
5   *    %a     The abbreviated name of the day of the week.
6   *    %A     The full name of the day of the week.
7   *    %b     The abbreviated month name.
8   *    %B     The full month name.
9   *    %C     The century number (year/100) as a 2-digit integer.
10   *    %d     The day of the month as a decimal number (range 01 to 31).
11   *    %e     Like %d, the day of the month as a decimal number, but space padded.
12   *    %F     Equivalent to %Y-%m-%d (the ISO 8601 date format).
13   *    %h     The hour as a decimal number using a 12-hour clock (range 01 to 12).
14   *    %H     The hour as a decimal number using a 24-hour clock (range 00 to 23).
15   *    %I     The hour as a decimal number using a 12-hour clock (range 00 to 23).
16   *    %j     The day of the year as a decimal number (range 001 to 366).
17   *    %k     The hour (24-hour clock) as a decimal number (range 0 to 23), space padded.
18   *    %l     The hour (12-hour clock) as a decimal number (range 1 to 12), space padded.
19   *    %m     The month as a decimal number (range 01 to 12).
20   *    %M     The minute as a decimal number (range 00 to 59).
21   *    %p     "AM" / "PM" (midnight is AM; noon is PM).
22   *    %P     "am" / "pm" (midnight is AM; noon is PM).
23   *    %r     Equivalent to "%I:%M:%S %p".
24   *    %R     Equivalent to "%H:%M".
25   *    %s     The number of seconds since the Epoch, 1970-01-01 00:00:00 +0000 (UTC).
26   *    %S     The second as a decimal number (range 00 to 60).
27   *    %T     Equivalent to "%H:%M:%S".
28   *    %u     The day of the week as a decimal, range 1 to 7, Monday being 1 (formatting only).
29   *    %V     The ISO 8601 week number (formatting only).
30   *    %w     The day of the week as a decimal, range 0 to 6, Sunday being 0 (formatting only).
31   *    %y     The year as a decimal number without a century (range 00 to 99).
32   *    %Y     The year as a decimal number including the century, minimum 4 digits.
33   *    %z     The +hhmm or -hhmm numeric timezone (that is, the hour and minute offset from UTC).
34   *    %Z     The timezone name or abbreviation. Formatting only.
35   *    %%     A literal '%' character.
36   */
37 module datefmt;
38 
39 import core.time;
40 import std.array;
41 import std.conv;
42 import std.datetime;
43 import std.string;
44 import std.utf : codeLength;
45 alias to = std.conv.to;
46 
47 
48 /**
49  * Format the given datetime with the given format string.
50  */
51 string format(SysTime dt, string formatString)
52 {
53     Appender!string ap;
54     bool inPercent;
55     foreach (i, c; formatString)
56     {
57         if (inPercent)
58         {
59             inPercent = false;
60             interpretIntoString(ap, dt, c);
61         }
62         else if (c == '%')
63         {
64             inPercent = true;
65         }
66         else
67         {
68             ap ~= c;
69         }
70     }
71     return ap.data;
72 }
73 
74 
75 /**
76  * Parse the given datetime string with the given format string.
77  *
78  * This tries rather hard to produce a reasonable result. If the format string doesn't describe an
79  * unambiguous point time, the result will be a date that satisfies the inputs and should generally
80  * be the earliest such date. However, that is not guaranteed.
81  *
82  * For instance:
83  * ---
84  * SysTime time = parse("%d", "21");
85  * writeln(time);  // 0000-01-21T00:00:00.000000Z
86  * ---
87  */
88 SysTime parse(
89         string data,
90         string formatString,
91         immutable(TimeZone) defaultTimeZone = null,
92         bool allowTrailingData = false)
93 {
94     auto a = Interpreter(data);
95     auto res = a.parse(formatString, defaultTimeZone);
96     if (res.error)
97     {
98         throw new Exception(res.error ~ " around " ~ res.remaining);
99     }
100     if (!allowTrailingData && res.remaining.length > 0)
101     {
102         throw new Exception("trailing data: " ~ res.remaining);
103     }
104     return res.dt;
105 }
106 
107 /**
108  * Try to parse the input string according to the given pattern.
109  *
110  * Return: true to indicate success; false to indicate failure
111  */
112 bool tryParse(
113         string data,
114         string formatString,
115         out SysTime dt,
116         immutable(TimeZone) defaultTimeZone = null)
117 {
118     auto a = Interpreter(data);
119     auto res = a.parse(formatString, defaultTimeZone);
120     if (res.error)
121     {
122         return false;
123     }
124     dt = res.dt;
125     return true;
126 }
127 
128 enum RFC1123FORMAT = "%a, %d %b %Y %H:%M:%S GMT";
129 
130 /** Parse an RFC1123 date. */
131 SysTime parseRFC1123(string data, bool allowTrailingData = false)
132 {
133     return parse(data, RFC1123FORMAT, UTC(), allowTrailingData);
134 }
135 
136 /** Produce an RFC1123 date string from a SysTime. */
137 string toRFC1123(SysTime date)
138 {
139     return format(date.toUTC(), RFC1123FORMAT);
140 }
141 
142 private:
143 
144 
145 immutable(TimeZone) utc;
146 static this() { utc = UTC(); }
147 
148 enum weekdayNames = [
149     "Sunday",
150     "Monday",
151     "Tuesday",
152     "Wednesday",
153     "Thursday",
154     "Friday",
155     "Saturday"
156 ];
157 
158 enum weekdayAbbrev = [
159     "Sun",
160     "Mon",
161     "Tue",
162     "Wed",
163     "Thu",
164     "Fri",
165     "Sat"
166 ];
167 
168 enum monthNames = [
169     "January",
170     "February",
171     "March",
172     "April",
173     "May",
174     "June",
175     "July",
176     "August",
177     "September",
178     "October",
179     "November",
180     "December",
181 ];
182 
183 enum monthAbbrev = [
184     "Jan",
185     "Feb",
186     "Mar",
187     "Apr",
188     "May",
189     "Jun",
190     "Jul",
191     "Aug",
192     "Sep",
193     "Oct",
194     "Nov",
195     "Dec",
196 ];
197 
198 struct Result
199 {
200     SysTime dt;
201     string error;
202     string remaining;
203     string remainingFormat;
204 }
205 
206 // TODO support wstring, dstring
207 struct Interpreter
208 {
209     this(string data)
210     {
211         this.data = data;
212     }
213     string data;
214 
215     int year;
216     int century;
217     int yearOfCentury;
218     Month month;
219     int dayOfWeek;
220     int dayOfMonth;
221     int dayOfYear;
222     int isoWeek;
223     int hour12;
224     int hour24;
225     int hour;
226     int minute;
227     int second;
228     int nanosecond;
229     int weekNumber;
230     Duration tzOffset;
231     string tzAbbreviation;
232     string tzName;
233     long epochSecond;
234     enum AMPM { AM, PM, None };
235     AMPM amPm = AMPM.None;
236 
237     Result parse(string formatString, immutable(TimeZone) defaultTimeZone)
238     {
239         auto tz = defaultTimeZone is null ? utc : defaultTimeZone;
240         bool inPercent;
241         foreach (size_t i, dchar c; formatString)
242         {
243             if (inPercent)
244             {
245                 inPercent = false;
246                 if (!interpretFromString(c))
247                 {
248                     auto remainder = data;
249                     if (remainder.length > 15)
250                     {
251                         remainder = remainder[0..15];
252                     }
253                     return Result(SysTime.init, "unexpected value", data, formatString[i..$]);
254                 }
255             }
256             else if (c == '%')
257             {
258                 inPercent = true;
259             }
260             else
261             {
262                 // TODO non-ASCII
263                 auto b = data;
264                 bool endedEarly = false;
265                 foreach (size_t i, dchar dc; b)
266                 {
267                     data = b[i..$];
268                     if (i > 0)
269                     {
270                         endedEarly = true;
271                         break;
272                     }
273                     if (c != dc)
274                     {
275                         return Result(SysTime.init, "unexpected literal", data, formatString[i..$]);
276                     }
277                 }
278                 if (!endedEarly) data = "";
279             }
280         }
281 
282         if (!year)
283         {
284             year = century * 100 + yearOfCentury;
285         }
286         if (hour12)
287         {
288             if (amPm == AMPM.PM)
289             {
290                 hour24 = (hour12 + 12) % 24;
291             }
292             else
293             {
294                 hour24 = hour12;
295             }
296         }
297         auto dt = SysTime(
298                 DateTime(year, month, dayOfMonth, hour24, minute, second),
299                 tz);
300         return Result(dt, null, data);
301     }
302 
303     bool interpretFromString(dchar c)
304     {
305         switch (c)
306         {
307             case 'a':
308                 foreach (i, m; weekdayAbbrev)
309                 {
310                     if (data.startsWith(m))
311                     {
312                         data = data[m.length .. $];
313                         return true;
314                     }
315                 }
316                 return false;
317             case 'A':
318                 foreach (i, m; weekdayNames)
319                 {
320                     if (data.startsWith(m))
321                     {
322                         data = data[m.length .. $];
323                         return true;
324                     }
325                 }
326                 return false;
327             case 'b':
328                 foreach (i, m; monthAbbrev)
329                 {
330                     if (data.startsWith(m))
331                     {
332                         month = cast(Month)(i + 1);
333                         data = data[m.length .. $];
334                         return true;
335                     }
336                 }
337                 return false;
338             case 'B':
339                 foreach (i, m; monthNames)
340                 {
341                     if (data.startsWith(m))
342                     {
343                         month = cast(Month)(i + 1);
344                         data = data[m.length .. $];
345                         return true;
346                     }
347                 }
348                 return false;
349             case 'C':
350                 return parseInt!(x => century = x)(data);
351             case 'd':
352                 return parseInt!(x => dayOfMonth = x)(data);
353             case 'e':
354                 return parseInt!(x => dayOfMonth = x)(data);
355             case 'F':
356                 auto dash1 = data.indexOf('-');
357                 if (dash1 <= 0) return false;
358                 if (dash1 >= data.length - 1) return false;
359                 auto yearStr = data[0..dash1];
360                 auto year = yearStr.to!int;
361                 data = data[dash1 + 1 .. $];
362 
363                 if (data.length < 5)
364                 {
365                     // Month is 2 digits; day is 2 digits; dash between
366                     return false;
367                 }
368                 if (data[2] != '-')
369                 {
370                     return false;
371                 }
372                 if (!parseInt!(x => month = cast(Month)x)(data)) return false;
373                 if (!data.startsWith("-")) return false;
374                 data = data[1..$];
375                 return parseInt!(x => dayOfMonth = x)(data);
376             case 'H':
377             case 'k':
378                 auto h = parseInt!(x => hour24 = x)(data);
379                 return h;
380             case 'h':
381             case 'I':
382             case 'l':
383                 return parseInt!(x => hour12 = x)(data);
384             case 'j':
385                 return parseInt!(x => dayOfYear = x, 3)(data);
386             case 'm':
387                 return parseInt!(x => month = cast(Month)x)(data);
388             case 'M':
389                 return parseInt!(x => minute = x)(data);
390             case 'p':
391                 if (data.startsWith("AM"))
392                 {
393                     amPm = AMPM.AM;
394                 }
395                 else if (data.startsWith("PM"))
396                 {
397                     amPm = AMPM.PM;
398                 }
399                 else
400                 {
401                     return false;
402                 }
403                 return true;
404             case 'P':
405                 if (data.startsWith("am"))
406                 {
407                     amPm = AMPM.AM;
408                 }
409                 else if (data.startsWith("pm"))
410                 {
411                     amPm = AMPM.PM;
412                 }
413                 else
414                 {
415                     return false;
416                 }
417                 return true;
418             case 'r':
419                 return interpretFromString('I') &&
420                     pop(':') &&
421                     interpretFromString('M') &&
422                     pop(':') &&
423                     interpretFromString('S') &&
424                     pop(' ') &&
425                     interpretFromString('p');
426             case 'R':
427                 return interpretFromString('H') &&
428                     pop(':') &&
429                     interpretFromString('M');
430             case 's':
431                 size_t end = 0;
432                 foreach (i2, c2; data)
433                 {
434                     if (c2 < '0' || c2 > '9')
435                     {
436                         end = cast()i2;
437                         break;
438                     }
439                 }
440                 if (end == 0) return false;
441                 epochSecond = data[0..end].to!int;
442                 data = data[end..$];
443                 return true;
444             case 'S':
445                 return parseInt!(x => second = x)(data);
446             case 'T':
447                 return interpretFromString('H') &&
448                     pop(':') &&
449                     interpretFromString('M') &&
450                     pop(':') &&
451                     interpretFromString('S');
452             case 'u':
453                 return parseInt!(x => dayOfWeek = cast(DayOfWeek)(x % 7))(data);
454             case 'V':
455                 return parseInt!(x => isoWeek = x)(data);
456             case 'y':
457                 return parseInt!(x => yearOfCentury = x)(data);
458             case 'Y':
459                 size_t end = 0;
460                 foreach (i2, c2; data)
461                 {
462                     if (c2 < '0' || c2 > '9')
463                     {
464                         end = i2;
465                         break;
466                     }
467                 }
468                 if (end == 0) return false;
469                 year = data[0..end].to!int;
470                 data = data[end..$];
471                 return true;
472             case 'z':
473                 int sign = 0;
474                 if (pop('-'))
475                 {
476                     sign = -1;
477                 }
478                 else if (pop('+'))
479                 {
480                     sign = 1;
481                 }
482                 else
483                 {
484                     return false;
485                 }
486                 int hour, minute;
487                 parseInt!(x => hour = x)(data);
488                 parseInt!(x => minute = x)(data);
489                 tzOffset = dur!"minutes"(sign * (minute + 60 * hour));
490                 return true;
491             case 'Z':
492                 // Oh god.
493                 // This could be something like America/Los_Angeles.
494                 // Or UTC.
495                 // Or EST5EDT.
496                 // And it could be followed by anything. Like the format might be:
497                 //  "%Z%a" -> America/Los_AngelesMon
498                 // I'll assume that this is followed by a space or something.
499                 return parseInt!(x => isoWeek = x)(data);
500             default:
501                 throw new Exception("unrecognized control character %" ~ c.to!string);
502         }
503     }
504 
505     bool pop(dchar c)
506     {
507         if (data.startsWith(c))
508         {
509             data = data[c.codeLength!char .. $];
510             return true;
511         }
512         return false;
513     }
514 }
515 
516 bool parseInt(alias setter, int length = 2)(ref string data)
517 {
518     if (data.length < length)
519     {
520         return false;
521     }
522     auto c = data[0..length].strip;
523     data = data[length..$];
524     int v;
525     try
526     {
527         v = c.to!int;
528 
529     }
530     catch (ConvException e)
531     {
532         return false;
533     }
534     cast(void)setter(c.to!int);
535     return true;
536 }
537 
538 void interpretIntoString(ref Appender!string ap, SysTime dt, char c)
539 {
540     switch (c)
541     {
542         case 'a':
543             ap ~= weekdayAbbrev[cast(size_t)dt.dayOfWeek];
544             return;
545         case 'A':
546             ap ~= weekdayNames[cast(size_t)dt.dayOfWeek];
547             return;
548         case 'b':
549             ap ~= monthAbbrev[cast(size_t)dt.month];
550             return;
551         case 'B':
552             ap ~= monthNames[cast(size_t)dt.month];
553             return;
554         case 'C':
555             ap ~= (dt.year / 100).to!string;
556             return;
557         case 'd':
558             auto s = dt.day.to!string;
559             if (s.length == 1)
560             {
561                 ap ~= "0";
562             }
563             ap ~= s;
564             return;
565         case 'e':
566             auto s = dt.day.to!string;
567             if (s.length == 1)
568             {
569                 ap ~= " ";
570             }
571             ap ~= s;
572             return;
573         case 'F':
574             interpretIntoString(ap, dt, 'Y');
575             ap ~= '-';
576             interpretIntoString(ap, dt, 'm');
577             ap ~= '-';
578             interpretIntoString(ap, dt, 'd');
579             return;
580         case 'g':
581             // TODO what is this?
582             throw new Exception("%g not yet implemented");
583         case 'G':
584             // TODO what is this?
585             throw new Exception("%G not yet implemented");
586         case 'h':
587         case 'I':
588             auto h = dt.hour;
589             if (h == 0)
590             {
591                 h = 12;
592             }
593             else if (h > 12)
594             {
595                 h -= 12;
596             }
597             ap.pad(h.to!string, '0', 2);
598             return;
599         case 'H':
600             ap.pad(dt.hour.to!string, '0', 2);
601             return;
602         case 'j':
603             ap.pad(dt.dayOfYear.to!string, '0', 3);
604             return;
605         case 'k':
606             ap.pad(dt.hour.to!string, ' ', 2);
607             return;
608         case 'l':
609             auto h = dt.hour;
610             if (h == 0)
611             {
612                 h = 12;
613             }
614             else if (h > 12)
615             {
616                 h -= 12;
617             }
618             ap.pad(h.to!string, ' ', 2);
619             return;
620         case 'm':
621             uint m = cast(uint)dt.month;
622             ap.pad(m.to!string, '0', 2);
623             return;
624         case 'M':
625             ap.pad(dt.minute.to!string, '0', 2);
626             return;
627         case 'p':
628             if (dt.hour >= 12)
629             {
630                 ap ~= "PM";
631             }
632             else
633             {
634                 ap ~= "AM";
635             }
636             return;
637         case 'P':
638             if (dt.hour >= 12)
639             {
640                 ap ~= "pm";
641             }
642             else
643             {
644                 ap ~= "am";
645             }
646             return;
647         case 'r':
648             interpretIntoString(ap, dt, 'I');
649             ap ~= ':';
650             interpretIntoString(ap, dt, 'M');
651             ap ~= ':';
652             interpretIntoString(ap, dt, 'S');
653             ap ~= ' ';
654             interpretIntoString(ap, dt, 'p');
655             return;
656         case 'R':
657             interpretIntoString(ap, dt, 'H');
658             ap ~= ':';
659             interpretIntoString(ap, dt, 'M');
660             return;
661         case 's':
662             auto delta = dt - SysTime(DateTime(1970, 1, 1), UTC());
663             ap ~= delta.total!"seconds"().to!string;
664             return;
665         case 'S':
666             ap.pad(dt.second.to!string, '0', 2);
667             return;
668         case 'T':
669             interpretIntoString(ap, dt, 'H');
670             ap ~= ':';
671             interpretIntoString(ap, dt, 'M');
672             ap ~= ':';
673             interpretIntoString(ap, dt, 'S');
674             return;
675         case 'u':
676             auto dow = cast(uint)dt.dayOfWeek;
677             if (dow == 0) dow = 7;
678             ap ~= dow.to!string;
679             return;
680         case 'w':
681             ap ~= (cast(uint)dt.dayOfWeek).to!string;
682             return;
683         case 'y':
684             ap.pad((dt.year % 100).to!string, '0', 2);
685             return;
686         case 'Y':
687             ap.pad(dt.year.to!string, '0', 4);
688             return;
689         case 'z':
690             import std.math : abs;
691             auto d = dt.utcOffset;
692             if (d < dur!"seconds"(0))
693             {
694                 ap ~= '-';
695             }
696             else
697             {
698                 ap ~= '+';
699             }
700             auto minutes = abs(d.total!"minutes");
701             ap.pad((minutes / 60).to!string, '0', 2);
702             ap.pad((minutes % 60).to!string, '0', 2);
703             return;
704         case 'Z':
705             if (dt.dstInEffect)
706             {
707                 ap ~= dt.timezone.stdName;
708             }
709             else if (dt.timezone is null)
710             {
711                 ap ~= 'Z';
712             }
713             else
714             {
715                 ap ~= dt.timezone.dstName;
716             }
717             return;
718         case '%':
719             ap ~= '%';
720             return;
721         default:
722             throw new Exception("format element %" ~ c ~ " not recognized");
723     }
724 }
725 
726 void pad(ref Appender!string ap, string s, char pad, uint length)
727 {
728     if (s.length >= length)
729     {
730         ap ~= s;
731         return;
732     }
733     for (uint i = 0; i < length - s.length; i++)
734     {
735         ap ~= pad;
736     }
737     ap ~= s;
738 }
739 
740 unittest
741 {
742     import std.stdio;
743     auto dt = SysTime(
744             DateTime(2017, 5, 3, 14, 31, 57),
745             UTC());
746     auto isoishFmt = "%Y-%m-%d %H:%M:%S %z";
747     auto isoish = dt.format(isoishFmt);
748     writefln("isoish: %s", isoish);
749     assert(isoish == "2017-05-03 14:31:57 +0000", isoish);
750     auto parsed = isoish.parse(isoishFmt);
751     assert(parsed.timezone !is null);
752     writefln("tz name: %s | %s | %s", parsed.timezone.name, parsed.timezone.stdName,
753             parsed.timezone.dstName);
754     assert(parsed.timezone == UTC());
755     assert(parsed == dt, parsed.format(isoishFmt));
756 }
757 
758 unittest
759 {
760     auto formatted = "Thu, 04 Sep 2014 06:42:22 GMT";
761     auto dt = parseRFC1123(formatted);
762     assert(dt == SysTime(DateTime(2014, 9, 4, 6, 42, 22), UTC()), dt.toISOString());
763 }