From 2f516bfeec5f469cf7e1d871236a08c94b8c78fa Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 9 Oct 2024 12:22:05 +0200 Subject: [PATCH 1/2] Use dev version of next Jubako release --- Cargo.lock | 3 +-- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c58333c..1a3df59 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -896,8 +896,7 @@ dependencies = [ [[package]] name = "jubako" -version = "0.3.1" -source = "git+https://github.com/jubako/jubako.git#9f7eaee8e01f7096fb724869bd619c9cdb6af4aa" +version = "0.3.2-dev" dependencies = [ "blake3", "bstr", diff --git a/Cargo.toml b/Cargo.toml index f0a4512..1b0dd18 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ homepage = "https://github.com/jubako/arx" license = "MIT" [workspace.dependencies] -jbk = { git = "https://github.com/jubako/jubako.git", package = "jubako", version = "0.3.1" } +jbk = { git = "https://github.com/jubako/jubako.git", package = "jubako", version = "0.3.2-dev" } clap = { version = "4.4.5", features = ["derive"] } clap_mangen = "0.2.20" clap_complete = "4.5.0" From 68350f134d7d540f6fbf73bcf3bd32362a7e9d72 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 9 Oct 2024 12:35:51 +0200 Subject: [PATCH 2/2] [Python] Introduce `Stream` as intermediate object to retrieve content Content stored in Arx may be bigger than addressable size (especially on 32bit). `get_content()` now return a Stream user can read from instead of directly return a `bytes` of all the content. --- python/src/arx.rs | 24 ++++++------------------ python/src/entry.rs | 6 +++--- python/src/lib.rs | 1 + python/src/stream.rs | 38 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 21 deletions(-) create mode 100644 python/src/stream.rs diff --git a/python/src/arx.rs b/python/src/arx.rs index 5ec3d9e..1d4ec68 100644 --- a/python/src/arx.rs +++ b/python/src/arx.rs @@ -1,6 +1,7 @@ use std::sync::Arc; use crate::iterator::EntryIter; +use crate::stream::Stream; use super::content_address::ContentAddress; use super::entry::Entry; @@ -10,7 +11,6 @@ use pyo3::exceptions::PyRuntimeError; use pyo3::exceptions::{PyOSError, PyUnicodeDecodeError, PyValueError}; use pyo3::prelude::*; use pyo3::types::PyUnicode; -use std::io::Read; /// An Arx archive. /// @@ -48,21 +48,13 @@ impl Arx { Self(Arc::new(arx)) } - pub(crate) fn get_content_rust<'py>( + pub(crate) fn get_content_rust( arx: &arx::Arx, - py: Python<'py>, content: jbk::ContentAddress, - ) -> PyResult<&'py pyo3::types::PyBytes> { + ) -> PyResult { let bytes = arx.container.get_bytes(content).unwrap(); match bytes { - MayMissPack::FOUND(bytes) => { - let mut stream = bytes.stream(); - let read = |slice: &mut [u8]| { - stream.read_exact(slice).unwrap(); - Ok(()) - }; - pyo3::types::PyBytes::new_with(py, bytes.size().into_usize(), read) - } + MayMissPack::FOUND(bytes) => Ok(Stream(bytes.stream())), MayMissPack::MISSING(pack_info) => Err(PyOSError::new_err(format!( "Cannot found pack {}", pack_info.uuid @@ -102,12 +94,8 @@ impl Arx { } /// Get the content associated to contentAddress - fn get_content<'py>( - &self, - py: Python<'py>, - content: ContentAddress, - ) -> PyResult<&'py pyo3::types::PyBytes> { - Self::get_content_rust(self, py, content.0) + fn get_content(&self, content: ContentAddress) -> PyResult { + Self::get_content_rust(self, content.0) } fn __iter__(slf: PyRef<'_, Self>) -> PyResult> { diff --git a/python/src/entry.rs b/python/src/entry.rs index 2bef8c9..8afb23b 100644 --- a/python/src/entry.rs +++ b/python/src/entry.rs @@ -6,7 +6,7 @@ use pyo3::{ prelude::*, }; -use crate::{content_address::ContentAddress, iterator::EntryIter}; +use crate::{content_address::ContentAddress, iterator::EntryIter, stream::Stream}; /// An entry i an arx archive. /// @@ -148,9 +148,9 @@ impl Entry { /// Get the content of the file entry. /// /// Raise an exception if entry is not a file. - fn get_content<'py>(&self, py: Python<'py>) -> PyResult<&'py pyo3::types::PyBytes> { + fn get_content(&self) -> PyResult { match &self.entry { - arx::Entry::File(f) => super::arx::Arx::get_content_rust(&self.arx, py, f.content()), + arx::Entry::File(f) => super::arx::Arx::get_content_rust(&self.arx, f.content()), _ => Err(PyTypeError::new_err("Not a file")), } } diff --git a/python/src/lib.rs b/python/src/lib.rs index 4325a13..ec64ff6 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -3,6 +3,7 @@ mod content_address; mod creator; mod entry; mod iterator; +mod stream; use pyo3::prelude::*; /// A Python module implemented in Rust. diff --git a/python/src/stream.rs b/python/src/stream.rs new file mode 100644 index 0000000..45f8ecb --- /dev/null +++ b/python/src/stream.rs @@ -0,0 +1,38 @@ +use jbk::reader::ByteStream; +use pyo3::prelude::*; +use std::io::Read; + +#[pyclass] +pub struct Stream(pub ByteStream); + +#[pymethods] +impl Stream { + /// Read `size` bytes from the stream. + /// + /// Returned `bytes` may be shorter than `size` if data left to be read is smaller than requested. + fn read<'py>(&mut self, py: Python<'py>, size: usize) -> PyResult<&'py pyo3::types::PyBytes> { + let size = std::cmp::min(size, self.0.size_left() as usize); + let read_fn = |slice: &mut [u8]| { + self.0.read_exact(slice).unwrap(); + Ok(()) + }; + pyo3::types::PyBytes::new_with(py, size, read_fn) + } + + /// Get the full size of the stream. + fn size(&self) -> u64 { + self.0.size() + } + + /// Get the size of the data left to read. + /// + /// Equivalent to `size() - tell()` + fn size_left(&self) -> u64 { + self.0.size_left() + } + + /// Get the current offset (already read data) of the stream. + fn tell(&self) -> u64 { + self.0.offset() + } +}