Skip to content

Commit 27edefc

Browse files
committed
#26 - Implement method CppStringT::isidentifier()
Completed. A few fixes on strings emptyness evaluation plus one code simplification.
1 parent 0bd3d0a commit 27edefc

File tree

1 file changed

+55
-17
lines changed

1 file changed

+55
-17
lines changed

cpp-strings/cppstrings.h

Lines changed: 55 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,15 @@ namespace pcs // i.e. "pythonic c++ strings"
5454
template<class CharT>
5555
inline const bool is_ascii(const CharT ch) noexcept; //!< Returns true if character ch gets ASCII code, or false otherwise.
5656

57+
template<class CharT>
58+
inline const bool is_id_continue(const CharT ch) noexcept; //!< Returns true if character is a continuing char for identifiers, or false otherwise.
59+
5760
template<class CharT>
5861
inline const bool is_decimal(const CharT ch) noexcept; //!< Returns true if character is a decimal digit, or false otherwise.
5962

63+
template<class CharT>
64+
inline const bool is_id_start(const CharT ch) noexcept; //!< Returns true if character is a starting char for identifiers, or false otherwise.
65+
6066
template<class CharT>
6167
inline const bool is_punctuation(const CharT ch) noexcept; //!< Returns true if character ch is punctuation, or false otherwise.
6268

@@ -383,7 +389,7 @@ namespace pcs // i.e. "pythonic c++ strings"
383389
/** \brief Returns true if all characters in the string are alphabetic and there is at least one character, or false otherwise. */
384390
inline const bool isalpha() const noexcept
385391
{
386-
return this->size() > 0 && std::all_of(this->cbegin(), this->cend(), [](const value_type ch) { return pcs::is_alpha<CharT>(ch); });
392+
return !this->empty() && std::all_of(this->cbegin(), this->cend(), [](const value_type ch) { return pcs::is_alpha<CharT>(ch); });
387393
}
388394

389395

@@ -394,7 +400,7 @@ namespace pcs // i.e. "pythonic c++ strings"
394400
#endif
395401
inline const bool isascii() const noexcept
396402
{
397-
return this->size() == 0 || std::all_of(this->cbegin(), this->cend(), pcs::is_ascii<CharT>);
403+
return this->empty() || std::all_of(this->cbegin(), this->cend(), pcs::is_ascii<CharT>);
398404
}
399405

400406

@@ -407,7 +413,7 @@ namespace pcs // i.e. "pythonic c++ strings"
407413
*/
408414
inline const bool isdecimal() const noexcept
409415
{
410-
return this->size() > 0 && std::all_of(this->cbegin(), this->cend(), pcs::is_decimal<CharT>);
416+
return !this->empty() && std::all_of(this->cbegin(), this->cend(), pcs::is_decimal<CharT>);
411417
}
412418

413419

@@ -418,7 +424,8 @@ namespace pcs // i.e. "pythonic c++ strings"
418424
* handling, such as the compatibility superscript digits. This
419425
* covers digits which cannot be used to form numbers in base 10,
420426
* like the Kharosthi numbers. Formally, a digit is a character
421-
* that has the property value Numeric_Type=Digit or Numeric_Type=Decimal.
427+
* that has the property value Numeric_Type=Digit or Numeric_Type
428+
* =Decimal.
422429
*
423430
* CAUTION: current implementation of library cpp-strings does
424431
* not implement above algorithm. It just returns the same result
@@ -430,6 +437,32 @@ namespace pcs // i.e. "pythonic c++ strings"
430437
}
431438

432439

440+
//--- isidentifier() ----------------------------------
441+
/** \brief Returns true if the string is not empty and is a valid identifier according to the language definition, or false otherwise.
442+
*
443+
* CAUTION: the current implementation of this method does not deal with the proper c++
444+
* defintiion of identifiers (see https://en.cppreference.com/w/cpp/language/identifiers
445+
* and https://www.unicode.org/reports/tr31/#Table_Lexical_Classes_for_Identifiers).
446+
*
447+
* While the specification of identifiers in c++ is this one:
448+
*
449+
* identifier ::= XID_Start XID_Continue*
450+
* XID_Start ::= ID_Start XID_Continue*
451+
* ID_Start ::= <characters derived from the Unicode General_Category of uppercase letters, lowercase letters, titlecase letters, modifier letters, other letters, letter numbers, plus Other_ID_Start, minus Pattern_Syntax and Pattern_White_Space code points>
452+
* XID_Continue ::= <characters derived from ID_Continue as per Unicode specs Section 5.1, NFKC Modifications (https://www.unicode.org/reports/tr31/#NFKC_Modifications)>
453+
* ID_Continue ::= ID_Start | <characters having the Unicode General_Category of nonspacing marks, spacing combining marks, decimal number, connector punctuation, plus Other_ID_Continue, minus Pattern_Syntax and Pattern_White_Space code points>
454+
*
455+
* the currently implemented rule is this simpler one:
456+
*
457+
* identifier ::= ID_Start id_continue*
458+
* id_continue ::= ID_Start | <decimal number>
459+
*/
460+
inline const bool isidentifier() const noexcept
461+
{
462+
return !this->empty() && pcs::is_id_start((*this)[0]) && (this->size() == 1 || std::all_of(this->cbegin() + 1, this->cend(), pcs::is_id_continue));
463+
}
464+
465+
433466
//--- isnumeric() -------------------------------------
434467
inline const bool isnumeric() const noexcept
435468
{
@@ -449,10 +482,7 @@ namespace pcs // i.e. "pythonic c++ strings"
449482
/** \brief Returns true if there are only whitespace characters in the string and there is at least one character, or false otherwise. */
450483
inline const bool isspace() const noexcept
451484
{
452-
if (this->size() == 0)
453-
return false;
454-
else
455-
return std::all_of(this->cbegin(), this->cend(), pcs::is_space<CharT>);
485+
return !this->empty() && std::all_of(this->cbegin(), this->cend(), pcs::is_space<CharT>);
456486
}
457487

458488

@@ -688,23 +718,31 @@ namespace pcs // i.e. "pythonic c++ strings"
688718
/** \brief SHOULD NEVER BE USED. Use next specializations instead. */
689719
template<class CharT>
690720
inline const bool is_decimal(const CharT ch) noexcept
691-
{
692-
return false;
693-
}
721+
{ return false; }
694722

695723
/** \brief Returns true if character is a decimal digit, or false otherwise. */
696724
template<>
697725
inline const bool is_decimal<char>(const char ch) noexcept
698-
{
699-
return std::isdigit(static_cast<unsigned char>(ch));
700-
}
726+
{ return std::isdigit(static_cast<unsigned char>(ch)); }
701727

702728
/** \brief Returns true if character is a decimal digit, or false otherwise. */
703729
template<>
704730
inline const bool is_decimal<wchar_t>(const wchar_t ch) noexcept
705-
{
706-
return std::isdigit(ch);
707-
}
731+
{ return std::iswdigit(ch); }
732+
733+
734+
//--- is_id_continue() ------------------------------------
735+
/** \brief Returns true if character is a continuing char for identifiers, or false otherwise. */
736+
template<class CharT>
737+
inline const bool is_id_continue(const CharT ch) noexcept
738+
{ return pcs::is_id_start(ch) || pcs::is_decimal(ch); }
739+
740+
741+
//--- is_id_start() ---------------------------------------
742+
/** \brief Returns true if character is a starting char for identifiers, or false otherwise. */
743+
template<class CharT>
744+
inline const bool is_id_start(const CharT ch) noexcept
745+
{ return pcs::is_alpha(ch) || ch == CharT('_'); }
708746

709747

710748
//--- is_punctuation() ------------------------------------

0 commit comments

Comments
 (0)