If data in create_buffer_with_data isn't a multiple of 4, just round …

…up. (#626) * If data in create_buffer_with_data isn't a multiple of 4, just round up. * If data in create_buffer_with_data isn't a multiple of 4, just round up. * If data in create_buffer_with_data isn't a multiple of 4, just round up. * Address review comments. Add missing test. --------- Co-authored-by: Jaron Rademeyer <[email protected]>
pygfx · Oct 22, 2024 · 014ab1b · 014ab1b
1 parent b86f810
commit 014ab1b
Show file tree

Hide file tree

Showing 3 changed files with 65 additions and 41 deletions.
diff --git a/tests/test_wgpu_native_buffer.py b/tests/test_wgpu_native_buffer.py
@@ -1,7 +1,7 @@
-import random
-import ctypes
 import sys
 
+import pytest
+
 import wgpu.utils
 import numpy as np
 
@@ -137,7 +137,7 @@ def test_buffer_init3():
 
 
 @mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib")
-def test_consequitive_writes1():
+def test_consecutive_writes1():
     # The inefficient way
 
     device = wgpu.utils.get_default_device()
@@ -164,7 +164,7 @@ def test_consequitive_writes1():
 
 
 @mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib")
-def test_consequitive_writes2():
+def test_consecutive_writes2():
     # The efficient way
 
     device = wgpu.utils.get_default_device()
@@ -191,7 +191,27 @@ def test_consequitive_writes2():
 
 
 @mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib")
-def test_consequitive_reads():
+def test_unaligned_writes():
+    device = wgpu.utils.get_default_device()
+
+    # Create buffer
+    buf = device.create_buffer(
+        size=12, usage=wgpu.BufferUsage.MAP_WRITE | wgpu.BufferUsage.COPY_SRC
+    )
+
+    # Write in parts
+    buf.map_sync("write")
+    buf.write_mapped(b"xyz", 8)
+    buf.write_mapped(b"abcdefghi", 0)
+    buf.unmap()
+
+    # Download from buffer to CPU
+    data = device.queue.read_buffer(buf)
+    assert data == b"abcdefghiyz\0"
+
+
+@mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib")
+def test_consecutive_reads():
     device = wgpu.utils.get_default_device()
 
     # Create buffer
@@ -268,18 +288,17 @@ def test_buffer_mapping_fails():
     buf.write_mapped(b"1" * 12, 0)
     buf.write_mapped(b"1" * 12, 8)
 
+    # These are no okay, too!
+    buf.write_mapped(b"1" * 3)  # not multiple of 4
+    buf.write_mapped(b"1" * 6)  # not multiple of 4
+    buf.write_mapped(b"1" * 9)  # not multiple of 4
+
     with raises(ValueError):
         buf.write_mapped(b"")  # not empty
     with raises(ValueError):
         buf.write_mapped(b"1" * 64)  # too large for buffer
     with raises(ValueError):
         buf.write_mapped(b"1" * 32)  # too large for mapped range
-    with raises(ValueError):
-        buf.write_mapped(b"1" * 3)  # not multiple of 4
-    with raises(ValueError):
-        buf.write_mapped(b"1" * 6)  # not multiple of 4
-    with raises(ValueError):
-        buf.write_mapped(b"1" * 9)  # not multiple of 4
 
     # Can unmap multiple times though!
     buf.unmap()
@@ -436,33 +455,28 @@ def test_write_buffer1():
 def test_write_buffer2():
     device = wgpu.utils.get_default_device()
 
-    nx, ny, nz = 100, 1, 1
-    data0 = (ctypes.c_float * 100)(*[random.random() for i in range(nx * ny * nz)])
-    data1 = (ctypes.c_float * 100)()
-    nbytes = ctypes.sizeof(data1)
+    size = 100
+    data0 = np.random.random(size)  # it's just bits.  f32 or f64 doesn't matter
+    data1 = np.array(data0)
+    nbytes = data1.nbytes
 
     # Create buffer
     buf4 = device.create_buffer(
         size=nbytes, usage=wgpu.BufferUsage.COPY_DST | wgpu.BufferUsage.COPY_SRC
     )
 
-    for i in range(len(data1)):
-        data1[i] = data0[i]
-
     # Upload from CPU to buffer
     device.create_command_encoder()  # we seem to need to create one
     device.queue.write_buffer(buf4, 0, data1)
 
     # We swipe the data. You could also think that we passed something into
     # write_buffer without holding a reference to it. Anyway, write_buffer
     # seems to copy the data at the moment it is called.
-    for i in range(len(data1)):
-        data1[i] = 1
-
+    data1.fill(1.0)
     device.queue.submit([])
 
     # Download from buffer to CPU
-    data2 = data1.__class__.from_buffer(device.queue.read_buffer(buf4))
+    data2 = np.frombuffer(device.queue.read_buffer(buf4), dtype=data0.dtype)
     assert iters_equal(data0, data2)
 
 
@@ -518,5 +532,21 @@ def test_buffer_map_read_and_write():
     assert data1 == data2
 
 
+@pytest.mark.parametrize("size", [4, 11, 20, 22, 10000])
+def test_create_buffer_with_data(size):
+    device = wgpu.utils.get_default_device()
+    data = np.random.bytes(size)
+    buffer = device.create_buffer_with_data(data=data, usage=wgpu.BufferUsage.COPY_SRC)
+
+    # Make sure that the length of the buffer is the next multiple of 4
+    assert buffer._nbytes % 4 == 0
+    assert 0 <= buffer._nbytes - size <= 3
+
+    # Make sure that the contents of the buffer is the data padded with 0s.
+    copy = device.queue.read_buffer(buffer)
+    assert copy[0:size] == data
+    assert copy[size:] == bytes(buffer._nbytes - size)
+
+
 if __name__ == "__main__":
     run_tests(globals())
diff --git a/wgpu/_classes.py b/wgpu/_classes.py
@@ -810,6 +810,9 @@ def create_buffer(
             size (int): The size of the buffer in bytes.
             usage (flags.BufferUsage): The ways in which this buffer will be used.
             mapped_at_creation (bool): Whether the buffer is initially mapped.
+
+        Alignment: the size must be a multiple of 4.
+
         """
         raise NotImplementedError()
 
@@ -826,7 +829,8 @@ def create_buffer_with_data(self, *, label="", data, usage: "flags.BufferUsage")
                 includes bytes, bytearray, ctypes arrays, numpy arrays, etc.).
             usage (flags.BufferUsage): The ways in which this buffer will be used.
 
-        Alignment: the size (in bytes) of the data must be a multiple of 4.
+        Alignment: if the size (in bytes) of data is not a multiple of 4, the buffer
+                   size is rounded up to the nearest multiple of 4.
 
         Also see `GPUBuffer.write_mapped()` and `GPUQueue.write_buffer()`.
         """
@@ -838,7 +842,7 @@ def create_buffer_with_data(self, *, label="", data, usage: "flags.BufferUsage")
 
         # Create a view of known type
         data = memoryview(data).cast("B")
-        size = data.nbytes
+        size = (data.nbytes + 3) & ~3  # round up to a multiple of 4
 
         # Create the buffer and write data
         buf = self.create_buffer(
@@ -1431,7 +1435,7 @@ def read_mapped(self, buffer_offset=None, size=None, *, copy=True):
         raise NotImplementedError()
 
     @apidiff.add("Replacement for get_mapped_range")
-    def write_mapped(self, data, buffer_offset=None, size=None):
+    def write_mapped(self, data, buffer_offset=None):
         """Write mapped buffer data.
 
         This method must only be called when the buffer is in a mapped state.
@@ -1447,11 +1451,9 @@ def write_mapped(self, data, buffer_offset=None, size=None):
             buffer_offset (int): the buffer offset in bytes. Must be at least
                 as large as the offset specified in ``map()``. The default
                 is the offset of the mapped range.
-            size (int): the size to write (in bytes). The default is the size of
-                the data, so this argument can typically be ignored. The
-                resulting range must fit into the range specified in ``map()``.
 
-        Alignment: the buffer offset must be a multiple of 8, the size must be a multiple of 4.
+        Alignment: the buffer offset must be a multiple of 8.
+
 
         Also see `GPUBuffer.read_mapped, `GPUQueue.read_buffer()` and `GPUQueue.write_buffer()`.
         """

diff --git a/wgpu/backends/wgpu_native/_api.py b/wgpu/backends/wgpu_native/_api.py
@@ -2065,7 +2065,7 @@ def _check_range(self, offset, size):
         if size < 1:
             raise ValueError("Mapped size must be larger than zero.")
         if size % 4:
-            raise ValueError("Mapped offset must be a multiple of 4.")
+            raise ValueError("Mapped size must be a multiple of 4.")
         if offset + size > self.size:
             raise ValueError("Mapped range must not extend beyond total buffer size.")
         return offset, size
@@ -2192,7 +2192,7 @@ def read_mapped(self, buffer_offset=None, size=None, *, copy=True):
             self._mapped_memoryviews.append(data)
             return data
 
-    def write_mapped(self, data, buffer_offset=None, size=None):
+    def write_mapped(self, data, buffer_offset=None):
         # Can we even write?
         if self._map_state != enums.BufferMapState.mapped:
             raise RuntimeError("Can only write to a buffer if its mapped.")
@@ -2204,27 +2204,19 @@ def write_mapped(self, data, buffer_offset=None, size=None):
         # Cast data to a memoryview. This also works for e.g. numpy arrays,
         # and the resulting memoryview will be a view on the data.
         data = memoryview(data).cast("B")
+        size = (data.nbytes + 3) & ~3
 
-        # Check offset and size
-        if size is None:
-            size = data.nbytes
         offset, size = self._check_range(buffer_offset, size)
         if offset < self._mapped_status[0] or (offset + size) > self._mapped_status[1]:
             raise ValueError(
                 "The range for buffer writing is not contained in the currently mapped range."
             )
 
-        # Check data size and given size. If the latter was given, it should match!
-        if data.nbytes != size:  # no-cover
-            raise ValueError(
-                "Data passed to GPUBuffer.write_mapped() does not match the given size."
-            )
-
         # Get mapped memoryview
         # H: void * f(WGPUBuffer buffer, size_t offset, size_t size)
         src_ptr = libf.wgpuBufferGetMappedRange(self._internal, offset, size)
         src_address = int(ffi.cast("intptr_t", src_ptr))
-        src_m = get_memoryview_from_address(src_address, size)
+        src_m = get_memoryview_from_address(src_address, data.nbytes)
 
         # Copy data. If not contiguous, this operation may be slower.
         src_m[:] = data