Skip to content

Commit 7204ab7

Browse files
borisyouknowone
andcommitted
implement _bz2
Co-Authored-By: Jeong YunWon <jeong@youknowone.org>
1 parent 3d5d3fd commit 7204ab7

File tree

4 files changed

+272
-0
lines changed

4 files changed

+272
-0
lines changed

Cargo.lock

Lines changed: 22 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

stdlib/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ unic-ucd-ident = "0.9.0"
4141
adler32 = "1.2.0"
4242
crc32fast = "1.3.2"
4343
flate2 = "1.0.23"
44+
bzip2 = "0.4"
4445

4546
num-complex = "0.4.0"
4647
num-bigint = "0.4.3"

stdlib/src/bz2.rs

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
pub(crate) use _bz2::make_module;
2+
3+
#[pymodule]
4+
mod _bz2 {
5+
use crate::common::lock::PyMutex;
6+
use crate::vm::{
7+
builtins::{PyBytesRef, PyTypeRef},
8+
function::{ArgBytesLike, OptionalArg},
9+
object::{PyPayload, PyResult},
10+
types::Constructor,
11+
VirtualMachine,
12+
};
13+
use bzip2::{write::BzEncoder, Decompress, Status};
14+
use std::{fmt, io::Write};
15+
16+
// const BUFSIZ: i32 = 8192;
17+
18+
struct DecompressorState {
19+
decoder: Decompress,
20+
eof: bool,
21+
needs_input: bool,
22+
// input_buffer: Vec<u8>,
23+
// output_buffer: Vec<u8>,
24+
}
25+
26+
#[pyattr]
27+
#[pyclass(name = "BZ2Decompressor")]
28+
#[derive(PyPayload)]
29+
struct BZ2Decompressor {
30+
state: PyMutex<DecompressorState>,
31+
}
32+
33+
impl fmt::Debug for BZ2Decompressor {
34+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
35+
write!(f, "_bz2.BZ2Decompressor")
36+
}
37+
}
38+
39+
impl Constructor for BZ2Decompressor {
40+
type Args = ();
41+
42+
fn py_new(cls: PyTypeRef, _: Self::Args, vm: &VirtualMachine) -> PyResult {
43+
Self {
44+
state: PyMutex::new(DecompressorState {
45+
decoder: Decompress::new(false),
46+
eof: false,
47+
needs_input: true,
48+
// input_buffer: Vec::new(),
49+
// output_buffer: Vec::new(),
50+
}),
51+
}
52+
.into_ref_with_type(vm, cls)
53+
.map(Into::into)
54+
}
55+
}
56+
57+
#[pyclass(with(Constructor))]
58+
impl BZ2Decompressor {
59+
#[pymethod]
60+
fn decompress(
61+
&self,
62+
data: ArgBytesLike,
63+
// TODO: PyIntRef
64+
max_length: OptionalArg<i32>,
65+
vm: &VirtualMachine,
66+
) -> PyResult<PyBytesRef> {
67+
let max_length = max_length.unwrap_or(-1);
68+
if max_length >= 0 {
69+
return Err(vm.new_not_implemented_error(
70+
"the max_value argument is not implemented yet".to_owned(),
71+
));
72+
}
73+
// let max_length = if max_length < 0 || max_length >= BUFSIZ {
74+
// BUFSIZ
75+
// } else {
76+
// max_length
77+
// };
78+
79+
let mut state = self.state.lock();
80+
let DecompressorState {
81+
decoder,
82+
eof,
83+
..
84+
// needs_input,
85+
// input_buffer,
86+
// output_buffer,
87+
} = &mut *state;
88+
89+
if *eof {
90+
return Err(vm.new_exception_msg(
91+
vm.ctx.exceptions.eof_error.to_owned(),
92+
"End of stream already reached".to_owned(),
93+
));
94+
}
95+
96+
// data.with_ref(|data| input_buffer.extend(data));
97+
98+
// If max_length is negative:
99+
// read the input X bytes at a time, compress it and append it to output.
100+
// Once you're out of input, setting needs_input to true and return the
101+
// output as bytes.
102+
//
103+
// TODO:
104+
// If max_length is non-negative:
105+
// Read the input X bytes at a time, compress it and append it to
106+
// the output. If output reaches `max_length` in size, return
107+
// it (up to max_length), and store the rest of the output
108+
// for later.
109+
110+
// TODO: arbitrary choice, not the right way to do it.
111+
let mut buf = Vec::with_capacity(data.len() * 32);
112+
113+
let before = decoder.total_in();
114+
let res = data.with_ref(|data| decoder.decompress_vec(data, &mut buf));
115+
let _written = (decoder.total_in() - before) as usize;
116+
117+
let res = match res {
118+
Ok(x) => x,
119+
// TODO: error message
120+
_ => return Err(vm.new_os_error("Invalid data stream".to_owned())),
121+
};
122+
123+
if res == Status::StreamEnd {
124+
*eof = true;
125+
}
126+
Ok(vm.ctx.new_bytes(buf.to_vec()))
127+
}
128+
129+
#[pyproperty]
130+
fn eof(&self) -> bool {
131+
let state = self.state.lock();
132+
state.eof
133+
}
134+
135+
#[pyproperty]
136+
fn unused_data(&self, vm: &VirtualMachine) -> PyBytesRef {
137+
// Data found after the end of the compressed stream.
138+
// If this attribute is accessed before the end of the stream
139+
// has been reached, its value will be b''.
140+
vm.ctx.new_bytes(b"".to_vec())
141+
// alternatively, be more honest:
142+
// Err(vm.new_not_implemented_error(
143+
// "unused_data isn't implemented yet".to_owned(),
144+
// ))
145+
//
146+
// TODO
147+
// let state = self.state.lock();
148+
// if state.eof {
149+
// vm.ctx.new_bytes(state.input_buffer.to_vec())
150+
// else {
151+
// vm.ctx.new_bytes(b"".to_vec())
152+
// }
153+
}
154+
155+
#[pyproperty]
156+
fn needs_input(&self) -> bool {
157+
// False if the decompress() method can provide more
158+
// decompressed data before requiring new uncompressed input.
159+
let state = self.state.lock();
160+
state.needs_input
161+
}
162+
163+
// TODO: mro()?
164+
}
165+
166+
struct CompressorState {
167+
flushed: bool,
168+
encoder: Option<BzEncoder<Vec<u8>>>,
169+
}
170+
171+
#[pyattr]
172+
#[pyclass(name = "BZ2Compressor")]
173+
#[derive(PyPayload)]
174+
struct BZ2Compressor {
175+
state: PyMutex<CompressorState>,
176+
}
177+
178+
impl fmt::Debug for BZ2Compressor {
179+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
180+
write!(f, "_bz2.BZ2Compressor")
181+
}
182+
}
183+
184+
impl Constructor for BZ2Compressor {
185+
type Args = (OptionalArg<i32>,);
186+
187+
fn py_new(cls: PyTypeRef, args: Self::Args, vm: &VirtualMachine) -> PyResult {
188+
let (compresslevel,) = args;
189+
// TODO: seriously?
190+
// compresslevel.unwrap_or(bzip2::Compression::best().level().try_into().unwrap());
191+
let compresslevel = compresslevel.unwrap_or(9);
192+
let level = match compresslevel {
193+
valid_level @ 1..=9 => bzip2::Compression::new(valid_level as u32),
194+
_ => {
195+
return Err(
196+
vm.new_value_error("compresslevel must be between 1 and 9".to_owned())
197+
)
198+
}
199+
};
200+
201+
Self {
202+
state: PyMutex::new(CompressorState {
203+
flushed: false,
204+
encoder: Some(BzEncoder::new(Vec::new(), level)),
205+
}),
206+
}
207+
.into_ref_with_type(vm, cls)
208+
.map(Into::into)
209+
}
210+
}
211+
212+
// TODO: return partial results from compress() instead of returning everything in flush()
213+
#[pyclass(with(Constructor))]
214+
impl BZ2Compressor {
215+
#[pymethod]
216+
fn compress(&self, data: ArgBytesLike, vm: &VirtualMachine) -> PyResult<PyBytesRef> {
217+
let mut state = self.state.lock();
218+
if state.flushed {
219+
return Err(vm.new_value_error("Compressor has been flushed".to_owned()));
220+
}
221+
222+
// let CompressorState { flushed, encoder } = &mut *state;
223+
let CompressorState { encoder, .. } = &mut *state;
224+
225+
// TODO: handle Err
226+
data.with_ref(|input_bytes| encoder.as_mut().unwrap().write_all(input_bytes).unwrap());
227+
Ok(vm.ctx.new_bytes(Vec::new()))
228+
}
229+
230+
#[pymethod]
231+
fn flush(&self, vm: &VirtualMachine) -> PyResult<PyBytesRef> {
232+
let mut state = self.state.lock();
233+
if state.flushed {
234+
return Err(vm.new_value_error("Repeated call to flush()".to_owned()));
235+
}
236+
237+
// let CompressorState { flushed, encoder } = &mut *state;
238+
let CompressorState { encoder, .. } = &mut *state;
239+
240+
// TODO: handle Err
241+
let out = encoder.take().unwrap().finish().unwrap();
242+
state.flushed = true;
243+
Ok(vm.ctx.new_bytes(out.to_vec()))
244+
}
245+
}
246+
}

stdlib/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ mod random;
2525
mod statistics;
2626
// TODO: maybe make this an extension module, if we ever get those
2727
// mod re;
28+
#[cfg(not(any(target_arch = "wasm32")))]
29+
mod bz2;
2830
#[cfg(not(target_arch = "wasm32"))]
2931
pub mod socket;
3032
#[cfg(unix)]
@@ -115,6 +117,7 @@ pub fn get_module_inits() -> impl Iterator<Item = (Cow<'static, str>, StdlibInit
115117
"select" => select::make_module,
116118
"_socket" => socket::make_module,
117119
"faulthandler" => faulthandler::make_module,
120+
"_bz2" => bz2::make_module,
118121
}
119122
#[cfg(feature = "ssl")]
120123
{

0 commit comments

Comments
 (0)