Skip to content

Commit b572b91

Browse files
committed
relocate a few int functions to common
1 parent f21ad9b commit b572b91

File tree

6 files changed

+153
-134
lines changed

6 files changed

+153
-134
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

common/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ threading = ["parking_lot"]
1313
[dependencies]
1414
ascii = { workspace = true }
1515
bitflags = { workspace = true }
16+
bstr = { workspace = true }
1617
cfg-if = { workspace = true }
1718
itertools = { workspace = true }
1819
libc = { workspace = true }

common/src/int.rs

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
use bstr::ByteSlice;
2+
use num_bigint::{BigInt, BigUint, Sign};
3+
use num_traits::{ToPrimitive, Zero};
4+
5+
pub fn bytes_to_int(lit: &[u8], mut base: u32) -> Option<BigInt> {
6+
// split sign
7+
let mut lit = lit.trim();
8+
let sign = match lit.first()? {
9+
b'+' => Some(Sign::Plus),
10+
b'-' => Some(Sign::Minus),
11+
_ => None,
12+
};
13+
if sign.is_some() {
14+
lit = &lit[1..];
15+
}
16+
17+
// split radix
18+
let first = *lit.first()?;
19+
let has_radix = if first == b'0' {
20+
match base {
21+
0 => {
22+
if let Some(parsed) = lit.get(1).and_then(detect_base) {
23+
base = parsed;
24+
true
25+
} else {
26+
if let [_first, ref others @ .., last] = lit {
27+
let is_zero =
28+
others.iter().all(|&c| c == b'0' || c == b'_') && *last == b'0';
29+
if !is_zero {
30+
return None;
31+
}
32+
}
33+
return Some(BigInt::zero());
34+
}
35+
}
36+
16 => lit.get(1).map_or(false, |&b| matches!(b, b'x' | b'X')),
37+
2 => lit.get(1).map_or(false, |&b| matches!(b, b'b' | b'B')),
38+
8 => lit.get(1).map_or(false, |&b| matches!(b, b'o' | b'O')),
39+
_ => false,
40+
}
41+
} else {
42+
if base == 0 {
43+
base = 10;
44+
}
45+
false
46+
};
47+
if has_radix {
48+
lit = &lit[2..];
49+
if lit.first()? == &b'_' {
50+
lit = &lit[1..];
51+
}
52+
}
53+
54+
// remove zeroes
55+
let mut last = *lit.first()?;
56+
if last == b'0' {
57+
let mut count = 0;
58+
for &cur in &lit[1..] {
59+
if cur == b'_' {
60+
if last == b'_' {
61+
return None;
62+
}
63+
} else if cur != b'0' {
64+
break;
65+
};
66+
count += 1;
67+
last = cur;
68+
}
69+
let prefix_last = lit[count];
70+
lit = &lit[count + 1..];
71+
if lit.is_empty() && prefix_last == b'_' {
72+
return None;
73+
}
74+
}
75+
76+
// validate
77+
for c in lit {
78+
let c = *c;
79+
if !(c.is_ascii_alphanumeric() || c == b'_') {
80+
return None;
81+
}
82+
83+
if c == b'_' && last == b'_' {
84+
return None;
85+
}
86+
87+
last = c;
88+
}
89+
if last == b'_' {
90+
return None;
91+
}
92+
93+
// parse
94+
let number = if lit.is_empty() {
95+
BigInt::zero()
96+
} else {
97+
let uint = BigUint::parse_bytes(lit, base)?;
98+
BigInt::from_biguint(sign.unwrap_or(Sign::Plus), uint)
99+
};
100+
Some(number)
101+
}
102+
103+
#[inline]
104+
pub fn detect_base(c: &u8) -> Option<u32> {
105+
let base = match c {
106+
b'x' | b'X' => 16,
107+
b'b' | b'B' => 2,
108+
b'o' | b'O' => 8,
109+
_ => return None,
110+
};
111+
Some(base)
112+
}
113+
114+
// num-bigint now returns Some(inf) for to_f64() in some cases, so just keep that the same for now
115+
#[inline(always)]
116+
pub fn bigint_to_finite_float(int: &BigInt) -> Option<f64> {
117+
int.to_f64().filter(|f| f.is_finite())
118+
}
119+
120+
#[test]
121+
fn test_bytes_to_int() {
122+
assert_eq!(bytes_to_int(&b"0b101"[..], 2).unwrap(), BigInt::from(5));
123+
assert_eq!(bytes_to_int(&b"0x_10"[..], 16).unwrap(), BigInt::from(16));
124+
assert_eq!(bytes_to_int(&b"0b"[..], 16).unwrap(), BigInt::from(11));
125+
assert_eq!(bytes_to_int(&b"+0b101"[..], 2).unwrap(), BigInt::from(5));
126+
assert_eq!(bytes_to_int(&b"0_0_0"[..], 10).unwrap(), BigInt::from(0));
127+
assert_eq!(bytes_to_int(&b"09_99"[..], 0), None);
128+
assert_eq!(bytes_to_int(&b"000"[..], 0).unwrap(), BigInt::from(0));
129+
assert_eq!(bytes_to_int(&b"0_"[..], 0), None);
130+
assert_eq!(bytes_to_int(&b"0_100"[..], 10).unwrap(), BigInt::from(100));
131+
}

common/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ pub mod encodings;
1717
pub mod float_ops;
1818
pub mod format;
1919
pub mod hash;
20+
pub mod int;
2021
pub mod linked_list;
2122
pub mod lock;
2223
pub mod os;

vm/src/builtins/int.rs

Lines changed: 8 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@ use crate::{
33
builtins::PyStrRef,
44
bytesinner::PyBytesInner,
55
class::PyClassImpl,
6-
common::{format::FormatSpec, hash},
6+
common::{
7+
format::FormatSpec,
8+
hash,
9+
int::{bigint_to_finite_float, bytes_to_int},
10+
},
711
convert::{IntoPyException, ToPyObject, ToPyResult},
812
function::{
913
ArgByteOrder, ArgIntoBool, OptionalArg, OptionalOption, PyArithmeticValue,
@@ -14,8 +18,7 @@ use crate::{
1418
AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult,
1519
TryFromBorrowedObject, VirtualMachine,
1620
};
17-
use bstr::ByteSlice;
18-
use num_bigint::{BigInt, BigUint, Sign};
21+
use num_bigint::{BigInt, Sign};
1922
use num_integer::Integer;
2023
use num_rational::Ratio;
2124
use num_traits::{One, Pow, PrimInt, Signed, ToPrimitive, Zero};
@@ -836,138 +839,16 @@ fn try_int_radix(obj: &PyObject, base: u32, vm: &VirtualMachine) -> PyResult<Big
836839
}
837840
}
838841

839-
pub(crate) fn bytes_to_int(lit: &[u8], mut base: u32) -> Option<BigInt> {
840-
// split sign
841-
let mut lit = lit.trim();
842-
let sign = match lit.first()? {
843-
b'+' => Some(Sign::Plus),
844-
b'-' => Some(Sign::Minus),
845-
_ => None,
846-
};
847-
if sign.is_some() {
848-
lit = &lit[1..];
849-
}
850-
851-
// split radix
852-
let first = *lit.first()?;
853-
let has_radix = if first == b'0' {
854-
match base {
855-
0 => {
856-
if let Some(parsed) = lit.get(1).and_then(detect_base) {
857-
base = parsed;
858-
true
859-
} else {
860-
if let [_first, ref others @ .., last] = lit {
861-
let is_zero =
862-
others.iter().all(|&c| c == b'0' || c == b'_') && *last == b'0';
863-
if !is_zero {
864-
return None;
865-
}
866-
}
867-
return Some(BigInt::zero());
868-
}
869-
}
870-
16 => lit.get(1).map_or(false, |&b| matches!(b, b'x' | b'X')),
871-
2 => lit.get(1).map_or(false, |&b| matches!(b, b'b' | b'B')),
872-
8 => lit.get(1).map_or(false, |&b| matches!(b, b'o' | b'O')),
873-
_ => false,
874-
}
875-
} else {
876-
if base == 0 {
877-
base = 10;
878-
}
879-
false
880-
};
881-
if has_radix {
882-
lit = &lit[2..];
883-
if lit.first()? == &b'_' {
884-
lit = &lit[1..];
885-
}
886-
}
887-
888-
// remove zeroes
889-
let mut last = *lit.first()?;
890-
if last == b'0' {
891-
let mut count = 0;
892-
for &cur in &lit[1..] {
893-
if cur == b'_' {
894-
if last == b'_' {
895-
return None;
896-
}
897-
} else if cur != b'0' {
898-
break;
899-
};
900-
count += 1;
901-
last = cur;
902-
}
903-
let prefix_last = lit[count];
904-
lit = &lit[count + 1..];
905-
if lit.is_empty() && prefix_last == b'_' {
906-
return None;
907-
}
908-
}
909-
910-
// validate
911-
for c in lit {
912-
let c = *c;
913-
if !(c.is_ascii_alphanumeric() || c == b'_') {
914-
return None;
915-
}
916-
917-
if c == b'_' && last == b'_' {
918-
return None;
919-
}
920-
921-
last = c;
922-
}
923-
if last == b'_' {
924-
return None;
925-
}
926-
927-
// parse
928-
Some(if lit.is_empty() {
929-
BigInt::zero()
930-
} else {
931-
let uint = BigUint::parse_bytes(lit, base)?;
932-
BigInt::from_biguint(sign.unwrap_or(Sign::Plus), uint)
933-
})
934-
}
935-
936-
fn detect_base(c: &u8) -> Option<u32> {
937-
match c {
938-
b'x' | b'X' => Some(16),
939-
b'b' | b'B' => Some(2),
940-
b'o' | b'O' => Some(8),
941-
_ => None,
942-
}
943-
}
944-
945842
// Retrieve inner int value:
946843
pub(crate) fn get_value(obj: &PyObject) -> &BigInt {
947844
&obj.payload::<PyInt>().unwrap().value
948845
}
949846

950847
pub fn try_to_float(int: &BigInt, vm: &VirtualMachine) -> PyResult<f64> {
951-
i2f(int).ok_or_else(|| vm.new_overflow_error("int too large to convert to float".to_owned()))
952-
}
953-
// num-bigint now returns Some(inf) for to_f64() in some cases, so just keep that the same for now
954-
fn i2f(int: &BigInt) -> Option<f64> {
955-
int.to_f64().filter(|f| f.is_finite())
848+
bigint_to_finite_float(int)
849+
.ok_or_else(|| vm.new_overflow_error("int too large to convert to float".to_owned()))
956850
}
957851

958852
pub(crate) fn init(context: &Context) {
959853
PyInt::extend_class(context, context.types.int_type);
960854
}
961-
962-
#[test]
963-
fn test_bytes_to_int() {
964-
assert_eq!(bytes_to_int(&b"0b101"[..], 2).unwrap(), BigInt::from(5));
965-
assert_eq!(bytes_to_int(&b"0x_10"[..], 16).unwrap(), BigInt::from(16));
966-
assert_eq!(bytes_to_int(&b"0b"[..], 16).unwrap(), BigInt::from(11));
967-
assert_eq!(bytes_to_int(&b"+0b101"[..], 2).unwrap(), BigInt::from(5));
968-
assert_eq!(bytes_to_int(&b"0_0_0"[..], 10).unwrap(), BigInt::from(0));
969-
assert_eq!(bytes_to_int(&b"09_99"[..], 0), None);
970-
assert_eq!(bytes_to_int(&b"000"[..], 0).unwrap(), BigInt::from(0));
971-
assert_eq!(bytes_to_int(&b"0_"[..], 0), None);
972-
assert_eq!(bytes_to_int(&b"0_100"[..], 10).unwrap(), BigInt::from(100));
973-
}

vm/src/protocol/number.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::{
22
builtins::{
33
int, type_::PointerSlot, PyByteArray, PyBytes, PyComplex, PyFloat, PyInt, PyIntRef, PyStr,
44
},
5+
common::int::bytes_to_int,
56
function::ArgBytesLike,
67
stdlib::warnings,
78
AsObject, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromBorrowedObject,
@@ -40,14 +41,17 @@ impl PyObject {
4041
pub fn try_int(&self, vm: &VirtualMachine) -> PyResult<PyIntRef> {
4142
fn try_convert(obj: &PyObject, lit: &[u8], vm: &VirtualMachine) -> PyResult<PyIntRef> {
4243
let base = 10;
43-
match int::bytes_to_int(lit, base) {
44-
Some(i) => Ok(PyInt::from(i).into_ref(vm)),
45-
None => Err(vm.new_value_error(format!(
44+
let i = bytes_to_int(lit, base).ok_or_else(|| {
45+
let repr = match obj.repr(vm) {
46+
Ok(repr) => repr,
47+
Err(err) => return err,
48+
};
49+
vm.new_value_error(format!(
4650
"invalid literal for int() with base {}: {}",
47-
base,
48-
obj.repr(vm)?,
49-
))),
50-
}
51+
base, repr,
52+
))
53+
})?;
54+
Ok(PyInt::from(i).into_ref(vm))
5155
}
5256

5357
if let Some(i) = self.downcast_ref_if_exact::<PyInt>(vm) {

0 commit comments

Comments
 (0)