Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(208)

Side by Side Diff: base/third_party/nspr/prtime.cc

Issue 266193002: Extend PR_ParseTimeString() to accept some ISO 8601 formats to fix timezone parsing in SyslogParser. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: New approach: Extend PR_ParseTimeString() to handle some ISO 8601 formats. Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* Portions are Copyright (C) 2011 Google Inc */ 1 /* Portions are Copyright (C) 2011 Google Inc */
2 /* ***** BEGIN LICENSE BLOCK ***** 2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 * 4 *
5 * The contents of this file are subject to the Mozilla Public License Version 5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with 6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at 7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/ 8 * http://www.mozilla.org/MPL/
9 * 9 *
10 * Software distributed under the License is distributed on an "AS IS" basis, 10 * Software distributed under the License is distributed on an "AS IS" basis,
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
54 * PR_GMTParameters 54 * PR_GMTParameters
55 * PR_ImplodeTime 55 * PR_ImplodeTime
56 * This was modified to use the Win32 SYSTEMTIME/FILETIME structures 56 * This was modified to use the Win32 SYSTEMTIME/FILETIME structures
57 * and the timezone offsets are applied to the FILETIME structure. 57 * and the timezone offsets are applied to the FILETIME structure.
58 * All types and macros are defined in the base/third_party/prtime.h file. 58 * All types and macros are defined in the base/third_party/prtime.h file.
59 * These have been copied from the following nspr files. We have only copied 59 * These have been copied from the following nspr files. We have only copied
60 * over the types we need. 60 * over the types we need.
61 * 1. prtime.h 61 * 1. prtime.h
62 * 2. prtypes.h 62 * 2. prtypes.h
63 * 3. prlong.h 63 * 3. prlong.h
64 *
65 * Unit tests are in base/time/pr_time_unittest.cc.
wtc 2014/05/08 21:04:19 Maybe this entire comment block should be moved to
Thiemo Nagel 2014/05/09 16:19:00 I don't have an opinion about that...
64 */ 66 */
65 67
66 #include "base/logging.h" 68 #include "base/logging.h"
67 #include "base/third_party/nspr/prtime.h" 69 #include "base/third_party/nspr/prtime.h"
68 #include "build/build_config.h" 70 #include "build/build_config.h"
69 71
70 #if defined(OS_WIN) 72 #if defined(OS_WIN)
71 #include <windows.h> 73 #include <windows.h>
72 #elif defined(OS_MACOSX) 74 #elif defined(OS_MACOSX)
73 #include <CoreFoundation/CoreFoundation.h> 75 #include <CoreFoundation/CoreFoundation.h>
(...skipping 424 matching lines...) Expand 10 before | Expand all | Expand 10 after
498 * Mon Jan 16 16:12 +0130 1989 500 * Mon Jan 16 16:12 +0130 1989
499 * 6 May 1992 16:41-JST (Wednesday) 501 * 6 May 1992 16:41-JST (Wednesday)
500 * 22-AUG-1993 10:59:12.82 502 * 22-AUG-1993 10:59:12.82
501 * 22-AUG-1993 10:59pm 503 * 22-AUG-1993 10:59pm
502 * 22-AUG-1993 12:59am 504 * 22-AUG-1993 12:59am
503 * 22-AUG-1993 12:59 PM 505 * 22-AUG-1993 12:59 PM
504 * Friday, August 04, 1995 3:54 PM 506 * Friday, August 04, 1995 3:54 PM
505 * 06/21/95 04:24:34 PM 507 * 06/21/95 04:24:34 PM
506 * 20/06/95 21:07 508 * 20/06/95 21:07
507 * 95-06-08 19:32:48 EDT 509 * 95-06-08 19:32:48 EDT
510 * 1995-06-17T23:11:25.342156Z
508 * 511 *
509 * If the input string doesn't contain a description of the timezone, 512 * If the input string doesn't contain a description of the timezone,
510 * we consult the `default_to_gmt' to decide whether the string should 513 * we consult the `default_to_gmt' to decide whether the string should
511 * be interpreted relative to the local time zone (PR_FALSE) or GMT (PR_TRUE). 514 * be interpreted relative to the local time zone (PR_FALSE) or GMT (PR_TRUE).
512 * The correct value for this argument depends on what standard specified 515 * The correct value for this argument depends on what standard specified
513 * the time string which you are parsing. 516 * the time string which you are parsing.
514 */ 517 */
515 518
516 PRStatus 519 PRStatus
517 PR_ParseTimeString( 520 PR_ParseTimeString(
518 const char *string, 521 const char *string,
519 PRBool default_to_gmt, 522 PRBool default_to_gmt,
520 PRTime *result_imploded) 523 PRTime *result_imploded)
521 { 524 {
522 PRExplodedTime tm; 525 PRExplodedTime tm;
523 PRExplodedTime *result = &tm; 526 PRExplodedTime *result = &tm;
524 TIME_TOKEN dotw = TT_UNKNOWN; 527 TIME_TOKEN dotw = TT_UNKNOWN;
525 TIME_TOKEN month = TT_UNKNOWN; 528 TIME_TOKEN month = TT_UNKNOWN;
526 TIME_TOKEN zone = TT_UNKNOWN; 529 TIME_TOKEN zone = TT_UNKNOWN;
527 int zone_offset = -1; 530 int zone_offset = -1;
528 int dst_offset = 0; 531 int dst_offset = 0;
529 int date = -1; 532 int date = -1;
530 PRInt32 year = -1; 533 PRInt32 year = -1;
531 int hour = -1; 534 int hour = -1;
532 int min = -1; 535 int min = -1;
533 int sec = -1; 536 int sec = -1;
537 int usec = -1;
534 538
535 const char *rest = string; 539 const char *rest = string;
536 540
537 int iterations = 0; 541 int iterations = 0;
538 542
539 PR_ASSERT(string && result); 543 PR_ASSERT(string && result);
540 if (!string || !result) return PR_FAILURE; 544 if (!string || !result) return PR_FAILURE;
541 545
542 while (*rest) 546 while (*rest)
543 { 547 {
(...skipping 223 matching lines...) Expand 10 before | Expand all | Expand 10 after
767 zone = TT_GMT; 771 zone = TT_GMT;
768 break; 772 break;
769 } 773 }
770 774
771 case '0': case '1': case '2': case '3': case '4': 775 case '0': case '1': case '2': case '3': case '4':
772 case '5': case '6': case '7': case '8': case '9': 776 case '5': case '6': case '7': case '8': case '9':
773 { 777 {
774 int tmp_hour = -1; 778 int tmp_hour = -1;
775 int tmp_min = -1; 779 int tmp_min = -1;
776 int tmp_sec = -1; 780 int tmp_sec = -1;
781 int tmp_usec = -1;
777 const char *end = rest + 1; 782 const char *end = rest + 1;
778 while (*end >= '0' && *end <= '9') 783 while (*end >= '0' && *end <= '9')
779 end++; 784 end++;
780 785
781 /* end is now the first character after a range of digit s. */ 786 /* end is now the first character after a range of digit s. */
782 787
783 if (*end == ':') 788 if (*end == ':')
784 { 789 {
785 if (hour >= 0 && min >= 0) /* already got it */ 790 if (hour >= 0 && min >= 0) /* already got it */
786 break; 791 break;
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
826 ; 831 ;
827 else if ((end - rest) > 2) 832 else if ((end - rest) > 2)
828 /* it is [0-9][0-9][0-9]+: */ 833 /* it is [0-9][0-9][0-9]+: */
829 break; 834 break;
830 else if ((end - rest) == 2) 835 else if ((end - rest) == 2)
831 tmp_sec = ((rest[0]-'0')*10 + 836 tmp_sec = ((rest[0]-'0')*10 +
832 (rest[1]-'0')); 837 (rest[1]-'0'));
833 else 838 else
834 tmp_sec = (rest[0]-'0'); 839 tmp_sec = (rest[0]-'0');
835 840
841 /* fractional second */
842 rest = end;
843 if (*rest == '.') {
844 rest++;
845 end++;
846 tmp_usec = 0;
847 /* use up to 6 digits, skip over the rest */
848 while (*end >= '0' && *end <= '9') {
849 if (end - rest < 6) {
850 tmp_usec = tmp_usec * 10 + *end - '0';
851 } else if (end - rest == 6) {
852 if ('5' <= *end && *end <= '9') tmp_usec++ ; /* round to nearest */
wtc 2014/05/08 21:04:19 I suggest we always truncate. The reason is that r
Thiemo Nagel 2014/05/09 16:19:00 I wouldn't consider that surprising... ;-) Anywa
853 }
854 end++;
855 }
856 int ndigits = end - rest;
857 while (ndigits++ < 6) tmp_usec *= 10;
wtc 2014/05/08 21:04:19 Nit: put tmp_usec *= 10; on a separate line.
Thiemo Nagel 2014/05/09 16:19:00 Done.
858 }
859
860 rest = end;
861 if (*rest == 'Z') {
wtc 2014/05/08 21:04:19 Should we only allow 'Z' if 'T' was used as a deli
Thiemo Nagel 2014/05/09 16:19:00 I wouldn't do that since according to Wikipedia "i
862 zone = TT_GMT;
863 end++;
wtc 2014/05/08 21:04:19 It seems that we should skip the AM/PM processing
Thiemo Nagel 2014/05/09 16:19:00 Done.
864 }
865
836 /* If we made it here, we've parsed hour and min , 866 /* If we made it here, we've parsed hour and min ,
837 and possibly sec, so it worked as a unit. */ 867 and possibly sec, so it worked as a unit. */
838 868
839 /* skip over whitespace and see if there's an AM or PM 869 /* skip over whitespace and see if there's an AM or PM
840 directly following the time. 870 directly following the time.
841 */ 871 */
842 if (tmp_hour <= 12) 872 if (tmp_hour <= 12)
843 { 873 {
844 const char *s = end; 874 const char *s = end;
845 while (*s && (*s == ' ' || *s == '\t')) 875 while (*s && (*s == ' ' || *s == '\t'))
846 s++; 876 s++;
847 if ((s[0] == 'p' || s[0] == 'P') && 877 if ((s[0] == 'p' || s[0] == 'P') &&
848 (s[1] == 'm' || s[1] == 'M')) 878 (s[1] == 'm' || s[1] == 'M'))
849 /* 10:05pm == 22:05, and 12:05pm == 12 :05 */ 879 /* 10:05pm == 22:05, and 12:05pm == 12 :05 */
850 tmp_hour = (tmp_hour == 12 ? 12 : tmp_ hour + 12); 880 tmp_hour = (tmp_hour == 12 ? 12 : tmp_ hour + 12);
851 else if (tmp_hour == 12 && 881 else if (tmp_hour == 12 &&
852 (s[0] == 'a' || s[0] == 'A') && 882 (s[0] == 'a' || s[0] == 'A') &&
853 (s[1] == 'm' || s[1] == 'M')) 883 (s[1] == 'm' || s[1] == 'M'))
854 /* 12:05am == 00:05 */ 884 /* 12:05am == 00:05 */
855 tmp_hour = 0; 885 tmp_hour = 0;
856 } 886 }
857 887
858 hour = tmp_hour; 888 hour = tmp_hour;
859 min = tmp_min; 889 min = tmp_min;
860 sec = tmp_sec; 890 sec = tmp_sec;
891 usec = tmp_usec;
861 rest = end; 892 rest = end;
862 break; 893 break;
863 } 894 }
864 else if ((*end == '/' || *end == '-') && 895 else if ((*end == '/' || *end == '-') &&
865 end[1] >= '0' && end[1] <= '9') 896 end[1] >= '0' && end[1] <= '9')
866 { 897 {
867 /* Perhaps this is 6/16/95, 16/6/95, 6-16-95, or 16-6-95 898 /* Perhaps this is 6/16/95, 16/6/95, 6-16-95, or 16-6-95
868 or even 95-06-05... 899 or even 95-06-05 or 1995-06-22.
869 #### But it doesn't handle 1995-06-22.
870 */ 900 */
871 int n1, n2, n3; 901 int n1, n2, n3;
872 const char *s; 902 const char *s;
873 903
874 if (month != TT_UNKNOWN) 904 if (month != TT_UNKNOWN)
875 /* if we saw a month name, this can't be. */ 905 /* if we saw a month name, this can't be. */
876 break; 906 break;
877 907
878 s = rest; 908 s = rest;
879 909
880 n1 = (*s++ - '0'); /* first 1 or 2 digits */ 910 n1 = (*s++ - '0'); /* first 1, 2 or 4 digits */
881 if (*s >= '0' && *s <= '9') 911 if (*s >= '0' && *s <= '9')
882 n1 = n1*10 + (*s++ - '0'); 912 n1 = n1*10 + (*s++ - '0');
883 913
914 if (*s >= '0' && *s <= '9') /* option al digits 3 and 4 */
915 {
916 n1 = n1*10 + (*s++ - '0');
917 if (*s < '0' || *s > '9')
918 break;
919 n1 = n1*10 + (*s++ - '0');
920 }
921
884 if (*s != '/' && *s != '-') /* sl ash */ 922 if (*s != '/' && *s != '-') /* sl ash */
885 break; 923 break;
886 s++; 924 s++;
887 925
888 if (*s < '0' || *s > '9') /* seco nd 1 or 2 digits */ 926 if (*s < '0' || *s > '9') /* seco nd 1 or 2 digits */
889 break; 927 break;
890 n2 = (*s++ - '0'); 928 n2 = (*s++ - '0');
891 if (*s >= '0' && *s <= '9') 929 if (*s >= '0' && *s <= '9')
892 n2 = n2*10 + (*s++ - '0'); 930 n2 = n2*10 + (*s++ - '0');
893 931
(...skipping 10 matching lines...) Expand all
904 if (*s >= '0' && *s <= '9') /* option al digits 3, 4, and 5 */ 942 if (*s >= '0' && *s <= '9') /* option al digits 3, 4, and 5 */
905 { 943 {
906 n3 = n3*10 + (*s++ - '0'); 944 n3 = n3*10 + (*s++ - '0');
907 if (*s < '0' || *s > '9') 945 if (*s < '0' || *s > '9')
908 break; 946 break;
909 n3 = n3*10 + (*s++ - '0'); 947 n3 = n3*10 + (*s++ - '0');
910 if (*s >= '0' && *s <= '9') 948 if (*s >= '0' && *s <= '9')
911 n3 = n3*10 + (*s++ - '0'); 949 n3 = n3*10 + (*s++ - '0');
912 } 950 }
913 951
914 if ((*s >= '0' && *s <= '9') || /* follow ed by non-alphanum */ 952 if (((*s >= '0' && *s <= '9') || /* follo wed by non-alphanum */
915 (*s >= 'A' && *s <= 'Z') || 953 (*s >= 'A' && *s <= 'Z') ||
916 (*s >= 'a' && *s <= 'z')) 954 (*s >= 'a' && *s <= 'z')) &&
955 !(*s == 'T' && '0' <= s[1] && s[1] <= '9')) /* allow ISO 8601 T delimiter */
wtc 2014/05/08 21:04:19 I think we should skip over 'T' right here, so tha
Thiemo Nagel 2014/05/09 16:19:00 We could skip over the 'T' here, but we cannot eli
917 break; 956 break;
918 957
919 /* Ok, we parsed three 1-2 digit numbers, with / or - 958 /* Ok, we parsed three 1-2 digit numbers, with / or -
wtc 2014/05/08 21:04:19 This comment needs to be updated because "three 1-
Thiemo Nagel 2014/05/09 16:19:00 Done.
920 between them. Now decide what the hell they are 959 between them. Now decide what the hell they are
921 (DD/MM/YY or MM/DD/YY or YY/MM/DD.) 960 (DD/MM/YY or MM/DD/YY or YY/MM/DD.)
wtc 2014/05/08 21:04:19 The last item should be YY[YY]/MM/DD or [YY]YY/MM/
Thiemo Nagel 2014/05/09 16:19:00 Done.
922 */ 961 */
923 962
924 if (n1 > 31 || n1 == 0) /* must be YY/MM/DD */ 963 if (n1 > 31 || n1 == 0) /* must be YY[YY]/MM/DD */
wtc 2014/05/08 21:04:19 Nit: [YY]YY/MM/DD seems more accurate. Not very su
Thiemo Nagel 2014/05/09 16:19:00 Absolutely!
925 { 964 {
926 if (n2 > 12) break; 965 if (n2 > 12) break;
927 if (n3 > 31) break; 966 if (n3 > 31) break;
928 year = n1; 967 year = n1;
929 if (year < 70) 968 if (year < 70)
930 year += 2000; 969 year += 2000;
931 else if (year < 100) 970 else if (year < 100)
932 year += 1900; 971 year += 1900;
933 month = (TIME_TOKEN)(n2 + ((int)TT_JAN) - 1); 972 month = (TIME_TOKEN)(n2 + ((int)TT_JAN) - 1);
934 date = n3; 973 date = n3;
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
1019 1058
1020 /* Skip to the end of this token, whether we parsed it or not. 1059 /* Skip to the end of this token, whether we parsed it or not.
1021 Tokens are delimited by whitespace, or ,;-/ 1060 Tokens are delimited by whitespace, or ,;-/
1022 But explicitly not :+-. 1061 But explicitly not :+-.
1023 */ 1062 */
1024 while (*rest && 1063 while (*rest &&
1025 *rest != ' ' && *rest != '\t' && 1064 *rest != ' ' && *rest != '\t' &&
1026 *rest != ',' && *rest != ';' && 1065 *rest != ',' && *rest != ';' &&
1027 *rest != '-' && *rest != '+' && 1066 *rest != '-' && *rest != '+' &&
1028 *rest != '/' && 1067 *rest != '/' &&
1029 *rest != '(' && *rest != ')' && *rest != '[' && *rest ! = ']') 1068 *rest != '(' && *rest != ')' && *rest != '[' && *rest ! = ']' &&
1069 !(*rest == 'T' && '0' <= rest[1] && rest[1] <= '9') /* T precedes time in ISO 8601 */
1070 )
1030 rest++; 1071 rest++;
1031 /* skip over uninteresting chars. */ 1072 /* skip over uninteresting chars. */
1032 SKIP_MORE: 1073 SKIP_MORE:
1033 while (*rest && 1074 while (*rest &&
1034 (*rest == ' ' || *rest == '\t' || 1075 (*rest == ' ' || *rest == '\t' ||
1035 *rest == ',' || *rest == ';' || *rest == '/' || 1076 *rest == ',' || *rest == ';' || *rest == '/' ||
1036 *rest == '(' || *rest == ')' || *rest == '[' || *rest == ']')) 1077 *rest == '(' || *rest == ')' || *rest == '[' || *rest == ']' ||
1078 (*rest == 'T' && '0' <= rest[1] && rest[1] <= '9') /* T precedes time in ISO 8601 */
1079 )
1080 )
1037 rest++; 1081 rest++;
1038 1082
1039 /* "-" is ignored at the beginning of a token if we have not yet 1083 /* "-" is ignored at the beginning of a token if we have not yet
1040 parsed a year (e.g., the second "-" in "30-AUG-1966"), or if 1084 parsed a year (e.g., the second "-" in "30-AUG-1966"), or if
1041 the character after the dash is not a digit. */ 1085 the character after the dash is not a digit. */
1042 if (*rest == '-' && ((rest > string && 1086 if (*rest == '-' && ((rest > string &&
1043 isalpha((unsigned char)rest[-1]) && year < 0) || 1087 isalpha((unsigned char)rest[-1]) && year < 0) ||
1044 rest[1] < '0' || rest[1] > '9')) 1088 rest[1] < '0' || rest[1] > '9'))
1045 { 1089 {
1046 rest++; 1090 rest++;
1047 goto SKIP_MORE; 1091 goto SKIP_MORE;
1048 } 1092 }
1049 1093
1050 } 1094 } /* while */
1051 1095
1052 if (zone != TT_UNKNOWN && zone_offset == -1) 1096 if (zone != TT_UNKNOWN && zone_offset == -1)
1053 { 1097 {
1054 switch (zone) 1098 switch (zone)
1055 { 1099 {
1056 case TT_PST: zone_offset = -8 * 60; break; 1100 case TT_PST: zone_offset = -8 * 60; break;
1057 case TT_PDT: zone_offset = -8 * 60; dst_offset = 1 * 60; break; 1101 case TT_PDT: zone_offset = -8 * 60; dst_offset = 1 * 60; break;
1058 case TT_MST: zone_offset = -7 * 60; break; 1102 case TT_MST: zone_offset = -7 * 60; break;
1059 case TT_MDT: zone_offset = -7 * 60; dst_offset = 1 * 60; break; 1103 case TT_MDT: zone_offset = -7 * 60; dst_offset = 1 * 60; break;
1060 case TT_CST: zone_offset = -6 * 60; break; 1104 case TT_CST: zone_offset = -6 * 60; break;
(...skipping 14 matching lines...) Expand all
1075 } 1119 }
1076 1120
1077 /* If we didn't find a year, month, or day-of-the-month, we can't 1121 /* If we didn't find a year, month, or day-of-the-month, we can't
1078 possibly parse this, and in fact, mktime() will do something random 1122 possibly parse this, and in fact, mktime() will do something random
1079 (I'm seeing it return "Tue Feb 5 06:28:16 2036", which is no doubt 1123 (I'm seeing it return "Tue Feb 5 06:28:16 2036", which is no doubt
1080 a numerologically significant date... */ 1124 a numerologically significant date... */
1081 if (month == TT_UNKNOWN || date == -1 || year == -1 || year > PR_INT16_MAX) 1125 if (month == TT_UNKNOWN || date == -1 || year == -1 || year > PR_INT16_MAX)
1082 return PR_FAILURE; 1126 return PR_FAILURE;
1083 1127
1084 memset(result, 0, sizeof(*result)); 1128 memset(result, 0, sizeof(*result));
1129 if (usec != -1)
1130 result->tm_usec = usec;
1085 if (sec != -1) 1131 if (sec != -1)
1086 result->tm_sec = sec; 1132 result->tm_sec = sec;
1087 if (min != -1) 1133 if (min != -1)
1088 result->tm_min = min; 1134 result->tm_min = min;
1089 if (hour != -1) 1135 if (hour != -1)
1090 result->tm_hour = hour; 1136 result->tm_hour = hour;
1091 if (date != -1) 1137 if (date != -1)
1092 result->tm_mday = date; 1138 result->tm_mday = date;
1093 if (month != TT_UNKNOWN) 1139 if (month != TT_UNKNOWN)
1094 result->tm_month = (((int)month) - ((int)TT_JAN)); 1140 result->tm_month = (((int)month) - ((int)TT_JAN));
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
1197 + 60 * localTime.tm_hour 1243 + 60 * localTime.tm_hour
1198 + 1440 * (localTime.tm_mday - 2); 1244 + 1440 * (localTime.tm_mday - 2);
1199 } 1245 }
1200 1246
1201 result->tm_params.tp_gmt_offset = zone_offset * 60; 1247 result->tm_params.tp_gmt_offset = zone_offset * 60;
1202 result->tm_params.tp_dst_offset = dst_offset * 60; 1248 result->tm_params.tp_dst_offset = dst_offset * 60;
1203 1249
1204 *result_imploded = PR_ImplodeTime(result); 1250 *result_imploded = PR_ImplodeTime(result);
1205 return PR_SUCCESS; 1251 return PR_SUCCESS;
1206 } 1252 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698