Modified sycl_timer example to use dpctl.tensor function

This removes use of dpnp.matmul from the example, making this example self-contained.
IntelPython · Nov 3, 2023 · 097ecf5 · 097ecf5
1 parent 645044a
commit 097ecf5
Showing 1 changed file with 29 additions and 12 deletions.
diff --git a/examples/python/sycl_timer.py b/examples/python/sycl_timer.py
@@ -15,14 +15,27 @@
 # limitations under the License.
 
 
-import dpnp
 import numpy as np
 
 import dpctl
 import dpctl.tensor as dpt
 from dpctl import SyclTimer
 
-n = 4000
+
+def matmul(m1, m2):
+    """Naive matrix multiplication implementation"""
+    assert m1.ndim == 2
+    assert m2.ndim == 2
+    assert m1.shape[1] == m2.shape[0]
+    m1 = m1[:, dpt.newaxis, :]
+    m2 = dpt.permute_dims(m2, (1, 0))[dpt.newaxis, :, :]
+    # form m_prod[i, j, k] = m1[i,k] * m2[k, j]
+    m_prods = m1 * m2
+    # sum over k
+    return dpt.sum(m_prods, axis=-1)
+
+
+n = 500
 
 try:
     q = dpctl.SyclQueue(property="enable_profiling")
@@ -33,32 +46,36 @@
     )
     exit(0)
 
-a = dpt.reshape(dpt.arange(n * n, dtype=np.float32, sycl_queue=q), (n, n))
-b = dpt.reshape(
-    dpt.asarray(np.random.random(n * n), dtype=np.float32, sycl_queue=q), (n, n)
-)
+a_flat = dpt.arange(n * n, dtype=dpt.float32, sycl_queue=q)
+a = dpt.reshape(a_flat, (n, n))
 
-timer = SyclTimer(time_scale=1)
+b_rand = np.random.random(n * n).astype(np.float32)
+b_flat = dpt.asarray(b_rand, dtype=dpt.float32, sycl_queue=q)
+b = dpt.reshape(b_flat, (n, n))
 
 wall_times = []
 device_times = []
+
 print(
-    f"Performing matrix multiplication of two {n} by {n} matrices "
+    f"Computing naive matrix multiplication of two {n} by {n} matrices "
     f"on {q.sycl_device.name}, repeating 5 times."
 )
+print()
 for _ in range(5):
+    timer = SyclTimer(time_scale=1)
     with timer(q):
-        a_matmul_b = dpnp.matmul(a, b)
+        a_matmul_b = matmul(a, b)
     host_time, device_time = timer.dt
     wall_times.append(host_time)
     device_times.append(device_time)
 
-c = dpnp.asnumpy(a_matmul_b)
-cc = np.dot(dpnp.asnumpy(a), dpnp.asnumpy(b))
+c = dpt.asnumpy(a_matmul_b)
+cc = np.dot(dpt.asnumpy(a), dpt.asnumpy(b))
 
 print("Wall time: ", wall_times, "\nDevice time: ", device_times)
+print()
 print(
     "Accuracy test: passed."
     if np.allclose(c, cc)
-    else (f"Accuracy test: failed. Discrepancy {np.max(np.abs(c-cc))}")
+    else (f"Accuracy test: FAILED. \n   Discrepancy = {np.max(np.abs(c-cc))}")
 )