@@ -355,8 +355,6 @@ convert_datetimestruct_local_to_utc(pandas_datetimestruct *out_dts_utc,
355355 * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow
356356 * + Accepts special values "NaT" (not a time), "Today", (current
357357 * day according to local time) and "Now" (current time in UTC).
358- * + ':' separator between hours, minutes, and seconds is optional. When
359- * omitted, each component must be 2 digits if it appears. (GH-10041)
360358 *
361359 * 'str' must be a NULL-terminated string, and 'len' must be its length.
362360 * 'unit' should contain -1 if the unit is unknown, or the unit
@@ -396,21 +394,15 @@ parse_iso_8601_datetime(char *str, int len,
396394 char * substr , sublen ;
397395 PANDAS_DATETIMEUNIT bestunit ;
398396
399- /* If year-month-day are separated by a valid separator,
400- * months/days without leading zeroes will be parsed
397+ /* if date components in are separated by one of valid separators
398+ * months/days without leadings 0s will be parsed
401399 * (though not iso8601). If the components aren't separated,
402- * 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are
403- * forbidden here (but parsed as YYMMDD elsewhere).
400+ * an error code will be retuned because the date is ambigous
404401 */
405- int has_ymd_sep = 0 ;
406- char ymd_sep = '\0' ;
407- char valid_ymd_sep [] = {'-' , '.' , '/' , '\\' , ' ' };
408- int valid_ymd_sep_len = sizeof (valid_ymd_sep );
409-
410- /* hour-minute-second may or may not separated by ':'. If not, then
411- * each component must be 2 digits. */
412- int has_hms_sep = 0 ;
413- int hour_was_2_digits = 0 ;
402+ int has_sep = 0 ;
403+ char sep = '\0' ;
404+ char valid_sep [] = {'-' , '.' , '/' , '\\' , ' ' };
405+ int valid_sep_len = 5 ;
414406
415407 /* Initialize the output to all zeros */
416408 memset (out , 0 , sizeof (pandas_datetimestruct ));
@@ -558,58 +550,67 @@ parse_iso_8601_datetime(char *str, int len,
558550 /* Check whether it's a leap-year */
559551 year_leap = is_leapyear (out -> year );
560552
561- /* Next character must be a separator, start of month, or end of string */
553+ /* Next character must be a separator, start of month or end */
562554 if (sublen == 0 ) {
563555 if (out_local != NULL ) {
564556 * out_local = 0 ;
565557 }
566558 bestunit = PANDAS_FR_Y ;
567559 goto finish ;
568560 }
569-
570- if (!isdigit (* substr )) {
571- for (i = 0 ; i < valid_ymd_sep_len ; ++ i ) {
572- if (* substr == valid_ymd_sep [i ]) {
561+ else if (!isdigit (* substr )) {
562+ for (i = 0 ; i < valid_sep_len ; ++ i ) {
563+ if (* substr == valid_sep [i ]) {
564+ has_sep = 1 ;
565+ sep = valid_sep [i ];
566+ ++ substr ;
567+ -- sublen ;
573568 break ;
574569 }
575570 }
576- if (i == valid_ymd_sep_len ) {
577- goto parse_error ;
578- }
579- has_ymd_sep = 1 ;
580- ymd_sep = valid_ymd_sep [i ];
581- ++ substr ;
582- -- sublen ;
583- /* Cannot have trailing separator */
584- if (sublen == 0 || !isdigit (* substr )) {
571+ if (i == valid_sep_len ) {
585572 goto parse_error ;
586573 }
587574 }
588575
589- /* PARSE THE MONTH */
590- /* First digit required */
591- out -> month = (* substr - '0' );
592- ++ substr ;
593- -- sublen ;
594- /* Second digit optional if there was a separator */
595- if (isdigit (* substr )) {
596- out -> month = 10 * out -> month + (* substr - '0' );
576+ /* Can't have a trailing sep */
577+ if (sublen == 0 ) {
578+ goto parse_error ;
579+ }
580+
581+
582+ /* PARSE THE MONTH (2 digits) */
583+ if (has_sep && ((sublen >= 2 && isdigit (substr [0 ]) && !isdigit (substr [1 ]))
584+ || (sublen == 1 && isdigit (substr [0 ])))) {
585+ out -> month = (substr [0 ] - '0' );
586+
587+ if (out -> month < 1 ) {
588+ PyErr_Format (PyExc_ValueError ,
589+ "Month out of range in datetime string \"%s\"" , str );
590+ goto error ;
591+ }
597592 ++ substr ;
598593 -- sublen ;
599594 }
600- else if (!has_ymd_sep ) {
601- goto parse_error ;
595+ else if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
596+ out -> month = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
597+
598+ if (out -> month < 1 || out -> month > 12 ) {
599+ PyErr_Format (PyExc_ValueError ,
600+ "Month out of range in datetime string \"%s\"" , str );
601+ goto error ;
602+ }
603+ substr += 2 ;
604+ sublen -= 2 ;
602605 }
603- if (out -> month < 1 || out -> month > 12 ) {
604- PyErr_Format (PyExc_ValueError ,
605- "Month out of range in datetime string \"%s\"" , str );
606- goto error ;
606+ else {
607+ goto parse_error ;
607608 }
608609
609- /* Next character must be the separator, start of day, or end of string */
610+ /* Next character must be a '-' or the end of the string */
610611 if (sublen == 0 ) {
611- /* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */
612- if (!has_ymd_sep ) {
612+ /* dates of form YYYYMM are not valid */
613+ if (!has_sep ) {
613614 goto parse_error ;
614615 }
615616 if (out_local != NULL ) {
@@ -618,39 +619,46 @@ parse_iso_8601_datetime(char *str, int len,
618619 bestunit = PANDAS_FR_M ;
619620 goto finish ;
620621 }
621-
622- if (has_ymd_sep ) {
623- /* Must have separator, but cannot be trailing */
624- if (* substr != ymd_sep || sublen == 1 ) {
625- goto parse_error ;
626- }
622+ else if (has_sep && * substr == sep ) {
627623 ++ substr ;
628624 -- sublen ;
629625 }
626+ else if (!isdigit (* substr )) {
627+ goto parse_error ;
628+ }
630629
631- /* PARSE THE DAY */
632- /* First digit required */
633- if (!isdigit (* substr )) {
634- goto parse_error ;
630+ /* Can't have a trailing '-' */
631+ if (sublen == 0 ) {
632+ goto parse_error ;
635633 }
636- out -> day = (* substr - '0' );
637- ++ substr ;
638- -- sublen ;
639- /* Second digit optional if there was a separator */
640- if (isdigit (* substr )) {
641- out -> day = 10 * out -> day + (* substr - '0' );
634+
635+ /* PARSE THE DAY (2 digits) */
636+ if (has_sep && ((sublen >= 2 && isdigit (substr [0 ]) && !isdigit (substr [1 ]))
637+ || (sublen == 1 && isdigit (substr [0 ])))) {
638+ out -> day = (substr [0 ] - '0' );
639+
640+ if (out -> day < 1 ) {
641+ PyErr_Format (PyExc_ValueError ,
642+ "Day out of range in datetime string \"%s\"" , str );
643+ goto error ;
644+ }
642645 ++ substr ;
643646 -- sublen ;
644647 }
645- else if (!has_ymd_sep ) {
646- goto parse_error ;
648+ else if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
649+ out -> day = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
650+
651+ if (out -> day < 1 ||
652+ out -> day > days_per_month_table [year_leap ][out -> month - 1 ]) {
653+ PyErr_Format (PyExc_ValueError ,
654+ "Day out of range in datetime string \"%s\"" , str );
655+ goto error ;
656+ }
657+ substr += 2 ;
658+ sublen -= 2 ;
647659 }
648- if (out -> day < 1 ||
649- out -> day > days_per_month_table [year_leap ][out -> month - 1 ])
650- {
651- PyErr_Format (PyExc_ValueError ,
652- "Day out of range in datetime string \"%s\"" , str );
653- goto error ;
660+ else {
661+ goto parse_error ;
654662 }
655663
656664 /* Next character must be a 'T', ' ', or end of string */
@@ -661,119 +669,104 @@ parse_iso_8601_datetime(char *str, int len,
661669 bestunit = PANDAS_FR_D ;
662670 goto finish ;
663671 }
664-
665- if ((* substr != 'T' && * substr != ' ' ) || sublen == 1 ) {
666- goto parse_error ;
667- }
668- ++ substr ;
669- -- sublen ;
670-
671- /* PARSE THE HOURS */
672- /* First digit required */
673- if (!isdigit (* substr )) {
672+ else if (* substr != 'T' && * substr != ' ' ) {
674673 goto parse_error ;
675674 }
676- out -> hour = (* substr - '0' );
677- ++ substr ;
678- -- sublen ;
679- /* Second digit optional */
680- if (isdigit (* substr )) {
681- hour_was_2_digits = 1 ;
682- out -> hour = 10 * out -> hour + (* substr - '0' );
675+ else {
683676 ++ substr ;
684677 -- sublen ;
678+ }
679+
680+ /* PARSE THE HOURS (2 digits) */
681+ if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
682+ out -> hour = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
683+
685684 if (out -> hour >= 24 ) {
686685 PyErr_Format (PyExc_ValueError ,
687686 "Hours out of range in datetime string \"%s\"" , str );
688687 goto error ;
689688 }
689+ substr += 2 ;
690+ sublen -= 2 ;
690691 }
691-
692- /* Next character must be a ':' or the end of the string */
693- if (sublen == 0 ) {
694- if (!hour_was_2_digits ) {
695- goto parse_error ;
696- }
697- bestunit = PANDAS_FR_h ;
698- goto finish ;
692+ else if (sublen >= 1 && isdigit (substr [0 ])) {
693+ out -> hour = substr [0 ] - '0' ;
694+ ++ substr ;
695+ -- sublen ;
696+ }
697+ else {
698+ goto parse_error ;
699699 }
700700
701- if ( * substr == ':' ) {
702- has_hms_sep = 1 ;
701+ /* Next character must be a ':' or the end of the string */
702+ if ( sublen > 0 && * substr == ':' ) {
703703 ++ substr ;
704704 -- sublen ;
705- /* Cannot have a trailing separator */
706- if (sublen == 0 || !isdigit (* substr )) {
707- goto parse_error ;
708- }
709705 }
710- else if (!isdigit (* substr )) {
711- if (!hour_was_2_digits ) {
712- goto parse_error ;
713- }
706+ else {
714707 bestunit = PANDAS_FR_h ;
715708 goto parse_timezone ;
716709 }
717710
718- /* PARSE THE MINUTES */
719- /* First digit required */
720- out -> min = (* substr - '0' );
721- ++ substr ;
722- -- sublen ;
723- /* Second digit optional if there was a separator */
724- if (isdigit (* substr )) {
725- out -> min = 10 * out -> min + (* substr - '0' );
726- ++ substr ;
727- -- sublen ;
711+ /* Can't have a trailing ':' */
712+ if (sublen == 0 ) {
713+ goto parse_error ;
714+ }
715+
716+ /* PARSE THE MINUTES (2 digits) */
717+ if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
718+ out -> min = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
719+
728720 if (out -> min >= 60 ) {
729721 PyErr_Format (PyExc_ValueError ,
730- "Minutes out of range in datetime string \"%s\"" , str );
722+ "Minutes out of range in datetime string \"%s\"" , str );
731723 goto error ;
732724 }
725+ substr += 2 ;
726+ sublen -= 2 ;
733727 }
734- else if (!has_hms_sep ) {
735- goto parse_error ;
728+ else if (sublen >= 1 && isdigit (substr [0 ])) {
729+ out -> min = substr [0 ] - '0' ;
730+ ++ substr ;
731+ -- sublen ;
736732 }
737-
738- if (sublen == 0 ) {
739- bestunit = PANDAS_FR_m ;
740- goto finish ;
733+ else {
734+ goto parse_error ;
741735 }
742736
743- /* If we make it through this condition block, then the next
744- * character is a digit. */
745- if (has_hms_sep && * substr == ':' ) {
737+ /* Next character must be a ':' or the end of the string */
738+ if (sublen > 0 && * substr == ':' ) {
746739 ++ substr ;
747740 -- sublen ;
748- /* Cannot have a trailing ':' */
749- if (sublen == 0 || !isdigit (* substr )) {
750- goto parse_error ;
751- }
752- }
753- else if (!has_hms_sep && isdigit (* substr )) {
754741 }
755742 else {
756743 bestunit = PANDAS_FR_m ;
757744 goto parse_timezone ;
758745 }
759746
760- /* PARSE THE SECONDS */
761- /* First digit required */
762- out -> sec = (* substr - '0' );
763- ++ substr ;
764- -- sublen ;
765- /* Second digit optional if there was a separator */
766- if (isdigit (* substr )) {
767- out -> sec = 10 * out -> sec + (* substr - '0' );
768- ++ substr ;
769- -- sublen ;
747+ /* Can't have a trailing ':' */
748+ if (sublen == 0 ) {
749+ goto parse_error ;
750+ }
751+
752+ /* PARSE THE SECONDS (2 digits) */
753+ if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
754+ out -> sec = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
755+
770756 if (out -> sec >= 60 ) {
771757 PyErr_Format (PyExc_ValueError ,
772- "Seconds out of range in datetime string \"%s\"" , str );
758+ "Seconds out of range in datetime string \"%s\"" , str );
773759 goto error ;
774760 }
761+ substr += 2 ;
762+ sublen -= 2 ;
763+ }
764+ else if (sublen >= 1 && isdigit (substr [0 ])) {
765+ out -> sec = substr [0 ] - '0' ;
766+ ++ substr ;
767+ -- sublen ;
775768 }
776- else if (! has_hms_sep ) {
769+ else {
777770 goto parse_error ;
778771 }
779772
0 commit comments