Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
Comment thread
dmytrokirpa marked this conversation as resolved.
"type": "patch",
"comment": "fix: support initials calculation for GB18030-2022 extension characters",
"packageName": "@fluentui/react-avatar",
"email": "dmytrokirpa@microsoft.com",
"dependentChangeType": "patch"
}
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,26 @@ describe('getInitials', () => {
expect(result).toEqual('');
});

it('calculates initials for GB18030-2022 extension characters (CJK Ext B-I)', () => {
// These characters are encoded as surrogate pairs; the character itself should be returned as the initial
expect(getInitials('𬸚', false)).toEqual('𬸚'); // GFZB-196
expect(getInitials('𢃾', false)).toEqual('𢃾'); // CJK Ext B
expect(getInitials('𪜀', false)).toEqual('𪜀'); // CJK Ext C
expect(getInitials('𫜴', false)).toEqual('𫜴'); // CJK Ext C
expect(getInitials('𫟰', false)).toEqual('𫟰'); // CJK Ext D
expect(getInitials('𬺠', false)).toEqual('𬺠'); // CJK Ext E
expect(getInitials('𮓇', false)).toEqual('𮓇'); // CJK Ext F
expect(getInitials('𪛝', false)).toEqual('𪛝'); // BX
expect(getInitials('𰉖', false)).toEqual('𰉖'); // GX
expect(getInitials('𱘍', false)).toEqual('𱘍'); // HX
expect(getInitials('𮯰', false)).toEqual('𮯰'); // IX
});

it('calculates initials for mixed strings starting with GB18030-2022 extension characters', () => {
// First code point of a mixed string should be used as the initial
expect(getInitials('𫚭齅䶱5𮯠灋𬘭r𫟼蝌龯𪛒𪛛㊣𫜹⾢Z𱔟𫍲𮴋䶺𰆬a', false)).toEqual('𫚭');
});

it('calculates an expected initials for Japanese names', () => {
let result = getInitials('松田', false);
expect(result).toEqual('');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ const UNWANTED_ENCLOSURES_REGEX: RegExp = /[\(\[\{][^\)\]\}]*[\)\]\}]/g;

/**
* Regular expression matching special ASCII characters except space, plus some unicode special characters.
* Applies after unwanted enclosures have been removed
* Applies after unwanted enclosures have been removed.
* Note: the range starts at \uE000 (not \uD800) to avoid matching surrogate code units, which would break
* supplementary Unicode characters (encoded as surrogate pairs in UTF-16) such as GB18030-2022 extension characters.
*/
const UNWANTED_CHARS_REGEX: RegExp = /[\0-\u001F\!-/:-@\[-`\{-\u00BF\u0250-\u036F\uD800-\uFFFF]/g;
const UNWANTED_CHARS_REGEX: RegExp = /[\0-\u001F\!-/:-@\[-`\{-\u00BF\u0250-\u036F\uE000-\uFFFF]/g;

/**
* Regular expression matching phone numbers. Applied after chars matching UNWANTED_CHARS_REGEX have been removed
Expand All @@ -28,30 +30,43 @@ const MULTIPLE_WHITESPACES_REGEX: RegExp = /\s+/g;
* Arabic: Arabic, Arabic Supplement, Arabic Extended-A.
* Korean: Hangul Jamo, Hangul Compatibility Jamo, Hangul Jamo Extended-A, Hangul Syllables, Hangul Jamo Extended-B.
* Japanese: Hiragana, Katakana.
* CJK: CJK Unified Ideographs Extension A, CJK Unified Ideographs, CJK Compatibility Ideographs,
* CJK Unified Ideographs Extension B
* CJK: CJK Unified Ideographs Extension A, CJK Unified Ideographs, CJK Compatibility Ideographs.
* Note: Supplementary CJK characters (GB18030-2022 extension characters in Ext B-I) are intentionally not listed
* here so they can be rendered as initials.
*/
const UNSUPPORTED_TEXT_REGEX: RegExp =
/[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\u1100-\u11FF\u3130-\u318F\uA960-\uA97F\uAC00-\uD7AF\uD7B0-\uD7FF\u3040-\u309F\u30A0-\u30FF\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]|[\uD840-\uD869][\uDC00-\uDED6]/;
/[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\u1100-\u11FF\u3130-\u318F\uA960-\uA97F\uAC00-\uD7AF\uD7B0-\uD7FF\u3040-\u309F\u30A0-\u30FF\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]/;

function getFirstCodePoint(value: string): string {
if (!value) {
return '';
}

const codePoint = value.codePointAt(0);
return codePoint === undefined ? '' : String.fromCodePoint(codePoint);
}

function getInitialsLatin(displayName: string, isRtl: boolean, firstInitialOnly?: boolean): string {
let initials = '';

const splits: string[] = displayName.split(' ');
if (splits.length !== 0) {
initials += splits[0].charAt(0).toUpperCase();
// Use code point-aware helper to correctly handle supplementary characters (e.g. GB18030-2022 extension chars)
// that are encoded as surrogate pairs; charAt(0) would only return half of such a character.
initials += getFirstCodePoint(splits[0]).toUpperCase();
}

if (!firstInitialOnly) {
if (splits.length === 2) {
initials += splits[1].charAt(0).toUpperCase();
initials += getFirstCodePoint(splits[1]).toUpperCase();
} else if (splits.length === 3) {
initials += splits[2].charAt(0).toUpperCase();
initials += getFirstCodePoint(splits[2]).toUpperCase();
}
}

if (isRtl && initials.length > 1) {
return initials.charAt(1) + initials.charAt(0);
if (isRtl && [...initials].length > 1) {
const chars = [...initials];
return chars[1] + chars[0];
}

return initials;
Expand Down Expand Up @@ -95,9 +110,12 @@ export function getInitials(

displayName = cleanupDisplayName(displayName);

// For names containing CJK characters, and phone numbers, we don't display initials
// Check only the first code point against UNSUPPORTED_TEXT_REGEX so that names starting with a supported
// character (e.g. GB18030-2022 extension characters) produce an initial even when the rest of the string
// contains BMP CJK characters that would otherwise trigger the regex.
const firstCodePoint = getFirstCodePoint(displayName);
if (
UNSUPPORTED_TEXT_REGEX.test(displayName) ||
UNSUPPORTED_TEXT_REGEX.test(firstCodePoint) ||
(!options?.allowPhoneInitials && PHONENUMBER_REGEX.test(displayName))
) {
return '';
Expand Down
Loading