Skip to content

Commit 3b8d670

Browse files
authored
Add unicodedata.east_asian_width (RustPython#4523)
1 parent 8b7158f commit 3b8d670

File tree

5 files changed

+46
-5
lines changed

5 files changed

+46
-5
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/test/test_unicodedata.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,8 +243,6 @@ def test_issue29456(self):
243243
# For tests of unicodedata.is_normalized / self.db.is_normalized ,
244244
# see test_normalization.py .
245245

246-
# TODO: RUSTPYTHON
247-
@unittest.expectedFailure
248246
def test_east_asian_width(self):
249247
eaw = self.db.east_asian_width
250248
self.assertRaises(TypeError, eaw, b'a')

extra_tests/snippets/builtin_str_unicode.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
assert unicodedata.name('a') == 'LATIN SMALL LETTER A'
1919
assert unicodedata.lookup('LATIN SMALL LETTER A') == 'a'
2020
assert unicodedata.bidirectional('a') == 'L'
21+
assert unicodedata.east_asian_width('\u231a') == 'W'
2122
assert unicodedata.normalize('NFC', 'bla') == 'bla'
2223

2324
# testing unicodedata.ucd_3_2_0 for idna

stdlib/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ unic-ucd-bidi = "0.9.0"
6767
unic-ucd-category = "0.9.0"
6868
unic-ucd-age = "0.9.0"
6969
unic-ucd-ident = "0.9.0"
70+
ucd = "0.1.1"
7071

7172
# compression
7273
adler32 = "1.2.0"

stdlib/src/unicodedata.rs

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,15 @@ pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
1010
.into_ref(vm)
1111
.into();
1212

13-
for attr in ["category", "lookup", "name", "bidirectional", "normalize"]
14-
.iter()
15-
.copied()
13+
for attr in [
14+
"category",
15+
"lookup",
16+
"name",
17+
"bidirectional",
18+
"east_asian_width",
19+
"normalize",
20+
]
21+
.into_iter()
1622
{
1723
crate::vm::extend_module!(vm, &module, {
1824
attr => ucd.get_attr(attr, vm).unwrap(),
@@ -29,6 +35,7 @@ mod unicodedata {
2935
VirtualMachine,
3036
};
3137
use itertools::Itertools;
38+
use ucd::{Codepoint, EastAsianWidth};
3239
use unic_char_property::EnumeratedCharProperty;
3340
use unic_normal::StrNormalForm;
3441
use unic_ucd_age::{Age, UnicodeVersion, UNICODE_VERSION};
@@ -113,6 +120,16 @@ mod unicodedata {
113120
Ok(bidi.to_owned())
114121
}
115122

123+
/// NOTE: This function uses 9.0.0 database instead of 3.2.0
124+
#[pymethod]
125+
fn east_asian_width(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult<String> {
126+
Ok(self
127+
.extract_char(character, vm)?
128+
.map_or(EastAsianWidth::Neutral, |c| c.east_asian_width())
129+
.abbr_name()
130+
.to_owned())
131+
}
132+
116133
#[pymethod]
117134
fn normalize(
118135
&self,
@@ -138,6 +155,23 @@ mod unicodedata {
138155
}
139156
}
140157

158+
trait EastAsianWidthAbbrName {
159+
fn abbr_name(&self) -> &'static str;
160+
}
161+
162+
impl EastAsianWidthAbbrName for EastAsianWidth {
163+
fn abbr_name(&self) -> &'static str {
164+
match self {
165+
EastAsianWidth::Narrow => "Na",
166+
EastAsianWidth::Wide => "W",
167+
EastAsianWidth::Neutral => "N",
168+
EastAsianWidth::Ambiguous => "A",
169+
EastAsianWidth::FullWidth => "F",
170+
EastAsianWidth::HalfWidth => "H",
171+
}
172+
}
173+
}
174+
141175
#[pyattr]
142176
fn ucd_3_2_0(vm: &VirtualMachine) -> PyRef<Ucd> {
143177
Ucd {

0 commit comments

Comments
 (0)