@@ -54,9 +54,15 @@ namespace pcs // i.e. "pythonic c++ strings"
5454 template <class CharT >
5555 inline const bool is_ascii (const CharT ch) noexcept ; // !< Returns true if character ch gets ASCII code, or false otherwise.
5656
57+ template <class CharT >
58+ inline const bool is_id_continue (const CharT ch) noexcept ; // !< Returns true if character is a continuing char for identifiers, or false otherwise.
59+
5760 template <class CharT >
5861 inline const bool is_decimal (const CharT ch) noexcept ; // !< Returns true if character is a decimal digit, or false otherwise.
5962
63+ template <class CharT >
64+ inline const bool is_id_start (const CharT ch) noexcept ; // !< Returns true if character is a starting char for identifiers, or false otherwise.
65+
6066 template <class CharT >
6167 inline const bool is_punctuation (const CharT ch) noexcept ; // !< Returns true if character ch is punctuation, or false otherwise.
6268
@@ -383,7 +389,7 @@ namespace pcs // i.e. "pythonic c++ strings"
383389 /* * \brief Returns true if all characters in the string are alphabetic and there is at least one character, or false otherwise. */
384390 inline const bool isalpha () const noexcept
385391 {
386- return this ->size () > 0 && std::all_of (this ->cbegin (), this ->cend (), [](const value_type ch) { return pcs::is_alpha<CharT>(ch); });
392+ return ! this ->empty () && std::all_of (this ->cbegin (), this ->cend (), [](const value_type ch) { return pcs::is_alpha<CharT>(ch); });
387393 }
388394
389395
@@ -394,7 +400,7 @@ namespace pcs // i.e. "pythonic c++ strings"
394400 #endif
395401 inline const bool isascii () const noexcept
396402 {
397- return this ->size () == 0 || std::all_of (this ->cbegin (), this ->cend (), pcs::is_ascii<CharT>);
403+ return this ->empty () || std::all_of (this ->cbegin (), this ->cend (), pcs::is_ascii<CharT>);
398404 }
399405
400406
@@ -407,7 +413,7 @@ namespace pcs // i.e. "pythonic c++ strings"
407413 */
408414 inline const bool isdecimal () const noexcept
409415 {
410- return this ->size () > 0 && std::all_of (this ->cbegin (), this ->cend (), pcs::is_decimal<CharT>);
416+ return ! this ->empty () && std::all_of (this ->cbegin (), this ->cend (), pcs::is_decimal<CharT>);
411417 }
412418
413419
@@ -418,7 +424,8 @@ namespace pcs // i.e. "pythonic c++ strings"
418424 * handling, such as the compatibility superscript digits. This
419425 * covers digits which cannot be used to form numbers in base 10,
420426 * like the Kharosthi numbers. Formally, a digit is a character
421- * that has the property value Numeric_Type=Digit or Numeric_Type=Decimal.
427+ * that has the property value Numeric_Type=Digit or Numeric_Type
428+ * =Decimal.
422429 *
423430 * CAUTION: current implementation of library cpp-strings does
424431 * not implement above algorithm. It just returns the same result
@@ -430,6 +437,32 @@ namespace pcs // i.e. "pythonic c++ strings"
430437 }
431438
432439
440+ // --- isidentifier() ----------------------------------
441+ /* * \brief Returns true if the string is not empty and is a valid identifier according to the language definition, or false otherwise.
442+ *
443+ * CAUTION: the current implementation of this method does not deal with the proper c++
444+ * defintiion of identifiers (see https://en.cppreference.com/w/cpp/language/identifiers
445+ * and https://www.unicode.org/reports/tr31/#Table_Lexical_Classes_for_Identifiers).
446+ *
447+ * While the specification of identifiers in c++ is this one:
448+ *
449+ * identifier ::= XID_Start XID_Continue*
450+ * XID_Start ::= ID_Start XID_Continue*
451+ * ID_Start ::= <characters derived from the Unicode General_Category of uppercase letters, lowercase letters, titlecase letters, modifier letters, other letters, letter numbers, plus Other_ID_Start, minus Pattern_Syntax and Pattern_White_Space code points>
452+ * XID_Continue ::= <characters derived from ID_Continue as per Unicode specs Section 5.1, NFKC Modifications (https://www.unicode.org/reports/tr31/#NFKC_Modifications)>
453+ * ID_Continue ::= ID_Start | <characters having the Unicode General_Category of nonspacing marks, spacing combining marks, decimal number, connector punctuation, plus Other_ID_Continue, minus Pattern_Syntax and Pattern_White_Space code points>
454+ *
455+ * the currently implemented rule is this simpler one:
456+ *
457+ * identifier ::= ID_Start id_continue*
458+ * id_continue ::= ID_Start | <decimal number>
459+ */
460+ inline const bool isidentifier () const noexcept
461+ {
462+ return !this ->empty () && pcs::is_id_start ((*this )[0 ]) && (this ->size () == 1 || std::all_of (this ->cbegin () + 1 , this ->cend (), pcs::is_id_continue));
463+ }
464+
465+
433466 // --- isnumeric() -------------------------------------
434467 inline const bool isnumeric () const noexcept
435468 {
@@ -449,10 +482,7 @@ namespace pcs // i.e. "pythonic c++ strings"
449482 /* * \brief Returns true if there are only whitespace characters in the string and there is at least one character, or false otherwise. */
450483 inline const bool isspace () const noexcept
451484 {
452- if (this ->size () == 0 )
453- return false ;
454- else
455- return std::all_of (this ->cbegin (), this ->cend (), pcs::is_space<CharT>);
485+ return !this ->empty () && std::all_of (this ->cbegin (), this ->cend (), pcs::is_space<CharT>);
456486 }
457487
458488
@@ -688,23 +718,31 @@ namespace pcs // i.e. "pythonic c++ strings"
688718 /* * \brief SHOULD NEVER BE USED. Use next specializations instead. */
689719 template <class CharT >
690720 inline const bool is_decimal (const CharT ch) noexcept
691- {
692- return false ;
693- }
721+ { return false ; }
694722
695723 /* * \brief Returns true if character is a decimal digit, or false otherwise. */
696724 template <>
697725 inline const bool is_decimal<char >(const char ch) noexcept
698- {
699- return std::isdigit (static_cast <unsigned char >(ch));
700- }
726+ { return std::isdigit (static_cast <unsigned char >(ch)); }
701727
702728 /* * \brief Returns true if character is a decimal digit, or false otherwise. */
703729 template <>
704730 inline const bool is_decimal<wchar_t >(const wchar_t ch) noexcept
705- {
706- return std::isdigit (ch);
707- }
731+ { return std::iswdigit (ch); }
732+
733+
734+ // --- is_id_continue() ------------------------------------
735+ /* * \brief Returns true if character is a continuing char for identifiers, or false otherwise. */
736+ template <class CharT >
737+ inline const bool is_id_continue (const CharT ch) noexcept
738+ { return pcs::is_id_start (ch) || pcs::is_decimal (ch); }
739+
740+
741+ // --- is_id_start() ---------------------------------------
742+ /* * \brief Returns true if character is a starting char for identifiers, or false otherwise. */
743+ template <class CharT >
744+ inline const bool is_id_start (const CharT ch) noexcept
745+ { return pcs::is_alpha (ch) || ch == CharT (' _' ); }
708746
709747
710748 // --- is_punctuation() ------------------------------------
0 commit comments