From 9760adc1578473507cdd91e095ed5e6f76b11b51 Mon Sep 17 00:00:00 2001 From: Jhalak Patel Date: Mon, 30 Sep 2024 09:56:10 -0700 Subject: [PATCH] Add initial IR for alloc enqueue --- .../mlir-tensorrt/Conversion/Passes.td | 1 + .../lib/Compiler/StableHloToExecutable.cpp | 10 +- .../include/mlir-executor-c/Runtime/Runtime.h | 16 ++ .../Runtime/Backend/Lua/LuaRuntime.h | 7 + .../executor/lib/CAPI/Runtime/Runtime.cpp | 59 +++++- .../lib/Runtime/Backend/Lua/LuaRuntime.cpp | 168 +++++++++++++++++- .../Lua/Modules/TensorRT/TensorRTModule.cpp | 82 ++++++++- .../python/bindings/Runtime/RuntimePyBind.cpp | 54 +++++- .../ClusteringDynamicShape/alloc_enqueue.mlir | 13 ++ .../test_stablehlo_alloc_enqueue.py | 124 +++++++++++++ 10 files changed, 521 insertions(+), 13 deletions(-) create mode 100644 mlir-tensorrt/test/Target/Lua/IntegrationTests/ClusteringDynamicShape/alloc_enqueue.mlir create mode 100644 mlir-tensorrt/test/python/IntegrationTests/test_stablehlo_alloc_enqueue.py diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Conversion/Passes.td b/mlir-tensorrt/compiler/include/mlir-tensorrt/Conversion/Passes.td index 1eb2f538a..f56fb2f61 100644 --- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Conversion/Passes.td +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Conversion/Passes.td @@ -192,6 +192,7 @@ def ConvertCUDAToExecutorPass : Pass<"convert-cuda-to-executor", //===----------------------------------------------------------------------===// // ConvertTensorRTRuntimeToExecutorPass //===----------------------------------------------------------------------===// +// TODO: Modify this pass to generate non-DPS stype enqueue functions. def ConvertTensorRTRuntimeToExecutorPass : Pass<"convert-tensorrt-runtime-to-executor", "::mlir::ModuleOp"> { let summary = "Converts TensorRTRuntime dialect ops to executor dialect operations"; diff --git a/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp b/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp index 1ed9d8bdd..ce68e4a3d 100644 --- a/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp +++ b/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp @@ -487,11 +487,13 @@ StableHloToExecutableTask::compileStableHLOToExecutable( runner = pm.get(); } + runner->printAsTextualPipeline(llvm::dbgs()); + // Setup pass manager - if (failed(runner->run(module))) - return getInternalErrorStatus( - "failed to run compilation on module with symbol name: {0}", - module.getName() ? *module.getName() : "no-symbol-name"); + // if (failed(runner->run(module))) + // return getInternalErrorStatus( + // "failed to run compilation on module with symbol name: {0}", + // module.getName() ? *module.getName() : "no-symbol-name"); // Translate to Runtime Executable FailureOr> exeStorage = diff --git a/mlir-tensorrt/executor/include/mlir-executor-c/Runtime/Runtime.h b/mlir-tensorrt/executor/include/mlir-executor-c/Runtime/Runtime.h index 345412aee..a72f1c3b0 100644 --- a/mlir-tensorrt/executor/include/mlir-executor-c/Runtime/Runtime.h +++ b/mlir-tensorrt/executor/include/mlir-executor-c/Runtime/Runtime.h @@ -289,6 +289,12 @@ static inline bool mtrtRuntimeValueIsNull(MTRT_RuntimeValue value) { return !value.ptr; } +// Returns whether the RuntimeValue is MemRef. +MLIR_CAPI_EXPORTED bool mtrtRuntimeValueIsMemRef(MTRT_RuntimeValue value); + +// Returns whether the RuntimeValue is Scalar. +MLIR_CAPI_EXPORTED bool mtrtRuntimeValueIsScalar(MTRT_RuntimeValue value); + /// Cast a MTRT_MemRefValue to a generic MTRT_RuntimeValue. MLIR_CAPI_EXPORTED MTRT_RuntimeValue mtrtMemRefCastToRuntimeValue(MTRT_MemRefValue memref); @@ -383,6 +389,16 @@ MLIR_CAPI_EXPORTED MTRT_Status mtrtRuntimeSessionExecuteFunction( const MTRT_RuntimeValue *inArgs, size_t numInArgs, const MTRT_RuntimeValue *outArgs, size_t numOutArgs, MTRT_Stream stream); +/// Variant of above function which return results. +MLIR_CAPI_EXPORTED MTRT_Status mtrtRuntimeSessionExecuteFunctionWithResult( + MTRT_RuntimeSession session, MTRT_RuntimeClient client, + MTRT_StringView name, const MTRT_RuntimeValue *inArgs, size_t numInArgs, + MTRT_RuntimeValue *resultArgs, size_t numResultArgs, MTRT_Stream stream); + +MLIR_CAPI_EXPORTED MTRT_Status mtrtRuntimeSessionGetNbResults(MTRT_RuntimeSession session, + MTRT_StringView name, + int64_t *numResults); + //===----------------------------------------------------------------------===// // DLPack //===----------------------------------------------------------------------===// diff --git a/mlir-tensorrt/executor/include/mlir-executor/Runtime/Backend/Lua/LuaRuntime.h b/mlir-tensorrt/executor/include/mlir-executor/Runtime/Backend/Lua/LuaRuntime.h index c953172b9..86dc3112e 100644 --- a/mlir-tensorrt/executor/include/mlir-executor/Runtime/Backend/Lua/LuaRuntime.h +++ b/mlir-tensorrt/executor/include/mlir-executor/Runtime/Backend/Lua/LuaRuntime.h @@ -100,6 +100,13 @@ executeFunctionWithLuaBackend(LuaRuntimeSession &session, std::string_view name, llvm::ArrayRef outputArgs, std::optional stream = {}); +/// Execute a named function in the session with the specified input args and return results. +StatusOr>> +executeFunctionWithResultWithLuaBackend( + LuaRuntimeSession &session, RuntimeClient &client, std::string_view name, + llvm::ArrayRef inputArgs, + std::optional stream = {}); + } // namespace mlirtrt::runtime #endif // MLIR_TENSORRT_RUNTIME_BACKEND_LUA_LUARUNTIME_H diff --git a/mlir-tensorrt/executor/lib/CAPI/Runtime/Runtime.cpp b/mlir-tensorrt/executor/lib/CAPI/Runtime/Runtime.cpp index 8b4e208e8..27bbbcb15 100644 --- a/mlir-tensorrt/executor/lib/CAPI/Runtime/Runtime.cpp +++ b/mlir-tensorrt/executor/lib/CAPI/Runtime/Runtime.cpp @@ -641,6 +641,16 @@ MTRT_ScalarValue mtrtRuntimeValueDynCastToScalar(MTRT_RuntimeValue v) { return wrap(static_cast(x)); } +bool mtrtRuntimeValueIsMemRef(MTRT_RuntimeValue value) { + RuntimeValue *x = unwrap(value); + return x->getKind() == RuntimeValue::Kind::MemRef; +} + +bool mtrtRuntimeValueIsScalar(MTRT_RuntimeValue value) { + RuntimeValue *x = unwrap(value); + return x->getKind() == RuntimeValue::Kind::Scalar; +} + //===----------------------------------------------------------------------===// // MTRT_RuntimeSessionOptions //===----------------------------------------------------------------------===// @@ -697,7 +707,6 @@ MTRT_Status mtrtRuntimeSessionExecuteFunction( llvm::SmallVector outArgValues = llvm::map_to_vector(llvm::ArrayRef(outArgs, numOutArgs), [](MTRT_RuntimeValue arg) { return unwrap(arg); }); - StatusOr>> result = executeFunctionWithLuaBackend( *cppSession, std::string_view(name.data, name.length), inArgValues, @@ -705,11 +714,57 @@ MTRT_Status mtrtRuntimeSessionExecuteFunction( !mtrtStreamIsNull(stream) ? std::optional(unwrap(stream)->getRawStream()) : std::nullopt); - if (!result.isOk()) + if (!result.isOk()) { return wrap(result.getStatus()); + } + return mtrtStatusGetOk(); +} + +MTRT_Status mtrtRuntimeSessionExecuteFunctionWithResult( + MTRT_RuntimeSession session, MTRT_RuntimeClient client, + MTRT_StringView name, const MTRT_RuntimeValue *inArgs, size_t numInArgs, + MTRT_RuntimeValue *resultArgs, size_t numResultArgs, + MTRT_Stream stream) { + LuaRuntimeSession *cppSession = + static_cast(unwrap(session)); + + RuntimeClient *cppClient = unwrap(client); + + llvm::SmallVector inArgValues = + llvm::map_to_vector(llvm::ArrayRef(inArgs, numInArgs), + [](MTRT_RuntimeValue arg) { return unwrap(arg); }); + StatusOr>> results = + executeFunctionWithResultWithLuaBackend( + *cppSession, *cppClient, std::string_view(name.data, name.length), + inArgValues, + !mtrtStreamIsNull(stream) + ? std::optional(unwrap(stream)->getRawStream()) + : std::nullopt); + if (!results.isOk()) { + return wrap(results.getStatus()); + } + + assert(results->size() == numResultArgs); + + for (size_t i = 0; i < numResultArgs; ++i) { + resultArgs[i] = wrap((*results)[i].release()); + } return mtrtStatusGetOk(); } + +MTRT_Status mtrtRuntimeSessionGetNbResults(MTRT_RuntimeSession session, + MTRT_StringView name, + int64_t *numResults) { + LuaRuntimeSession *cppSession = + static_cast(unwrap(session)); + *numResults = cppSession->getExecutable() + .getFunction(std::string_view(name.data, name.length)) + .getSignature() + .getNumResults(); + return mtrtStatusGetOk(); +} + //===----------------------------------------------------------------------===// // MTRT_RuntimeClient //===----------------------------------------------------------------------===// diff --git a/mlir-tensorrt/executor/lib/Runtime/Backend/Lua/LuaRuntime.cpp b/mlir-tensorrt/executor/lib/Runtime/Backend/Lua/LuaRuntime.cpp index 33c15b35c..6bed36c42 100644 --- a/mlir-tensorrt/executor/lib/Runtime/Backend/Lua/LuaRuntime.cpp +++ b/mlir-tensorrt/executor/lib/Runtime/Backend/Lua/LuaRuntime.cpp @@ -423,6 +423,83 @@ static Status pushScalarArgument(sol::state_view &lua, return getOkStatus(); } +// Function to extract shape and stride from sol::object table +std::tuple, std::vector> +extractShapeAndStride(const sol::table &table) { + size_t tableSize = table.size(); + assert(tableSize >= 3 && + "Table does not contain shape and stride information"); + size_t shapeStrideSize = (tableSize - 3) / 2; + std::vector shape; + std::vector stride; + + shape.reserve(shapeStrideSize); + stride.reserve(shapeStrideSize); + + // Extract shape + for (size_t i = 4; i <= 3 + shapeStrideSize; ++i) { + shape.push_back(table[i].get()); + } + + // Extract stride + for (size_t i = 4 + shapeStrideSize; i <= tableSize; ++i) { + stride.push_back(table[i].get()); + } + + return std::make_tuple(shape, stride); +} + +// Convert sol::object to MemRefValue +StatusOr> +solObjectToMemRefValue(RuntimeClient *client, const sol::object &obj) { + assert(obj.is() && "Expected a table for MemRefValue"); + + sol::table memrefTable = obj.as(); + uintptr_t ptr = memrefTable[1].get(); + int64_t offset = memrefTable[3].get(); + + auto [shape, strides] = extractShapeAndStride(memrefTable); + + // TODO: How to extract this information. Should we use function signature to fill in this information later? + mlirtrt::runtime::PointerType addressSpace = + mlirtrt::runtime::PointerType::device; + int64_t bitsPerElement = 32; + std::optional device = + std::nullopt; + std::optional scalarType = ScalarTypeCode::f32; + + return MemRefValue::create(client, addressSpace, bitsPerElement, ptr, offset, + llvm::ArrayRef(shape), + llvm::ArrayRef(strides), device, + scalarType); +} + +// Convert sol::object to ScalarValue +std::unique_ptr solObjectToScalarValue(const sol::object &obj) { + + // TODO: ScalarType is not known. Should we use function signature to fill in + // this information later? Since ScalarValue data type is int64_t. Let's cast + // the object value to int64_t for now. + return std::make_unique(obj.as(), ScalarTypeCode::unknown); +} + +// Convert sol::object to RuntimeValue's +llvm::SmallVector> +solObjectToRuntimeValues(RuntimeClient *client, + std::vector const &results) { + llvm::SmallVector> values; + for (sol::object r : results) { + // if (r.is()) { + // Assume it's a MemRefValue if it's a table + values.emplace_back(std::move(*solObjectToMemRefValue(client, r))); + // } else { + // // Assume it's a ScalarValue for all other cases + // values.emplace_back(solObjectToScalarValue(r)); + // } + } + return values; +} + static Status validateArgsTypesAgainstFuncArgs(const RuntimeValue *runArg, const TypeUnionView &sigArg) { if (sigArg.isa()) { @@ -520,11 +597,11 @@ runtime::executeFunctionWithLuaBackend( return getStatusWithMsg(StatusCode::InternalError, "no function named \"", std::string(name), "\" found"); - if (sig.getNumResults() > 0) - return getInvalidArgStatus("functions with {0} results are not supported", - sig.getNumResults()); - // Validate the number of arguments against the signature. + if (sig.getNumResults() != 0) + return getInvalidArgStatus( + "function expects 0 result args but received {0}", + sig.getNumResults()); if (sig.getNumOutputArgs() != outputArgs.size()) return getInvalidArgStatus( "function expects {0} output args (destination args) but received {1}", @@ -600,3 +677,86 @@ runtime::executeFunctionWithLuaBackend( return llvm::SmallVector>{}; } + +StatusOr>> +runtime::executeFunctionWithResultWithLuaBackend( + LuaRuntimeSession &session, + RuntimeClient &client, + std::string_view name, + llvm::ArrayRef inputArgs, + std::optional stream) { + + FunctionView meta = session.getExecutable().getFunction(name); + FunctionSignatureView sig = meta.getSignature(); + + // Call the main function, if present. + sol::state &lua = session.getLuaState(); + AllocTracker &tracker = session.getAllocTracker(); + sol::protected_function funcObj = lua[name]; + if (funcObj.get_type() != sol::type::function) + return getStatusWithMsg(StatusCode::InternalError, "no function named \"", + std::string(name), "\" found"); + + // Validate the number of arguments against the signature. + if (sig.getNumOutputArgs() != 0) + return getInvalidArgStatus( + "function expects 0 output args (destination args) but received {0}", + sig.getNumOutputArgs()); + if (sig.getNumInputArgs() != inputArgs.size()) + return getInvalidArgStatus("function expects {0} input args " + "(non-destination args) but received {1}", + sig.getNumInputArgs(), inputArgs.size()); + + // Validate the inferred Lua function type here against the signature. + for (unsigned i = 0; i < inputArgs.size(); ++i) { + auto status = validateArgsTypesAgainstFuncArgs(inputArgs[i], sig.getArg(i)); + if (!status.isOk()) + return getInvalidArgStatus( + "Input argument {0} validation failed against " + "corresponding function signature arg {0}. Reason: {1}", + i, status.getString()); + } + + // Create the arguments. + llvm::SmallVector args; + args.reserve(inputArgs.size()); + for (auto [idx, rv] : llvm::enumerate(inputArgs)) { + if (MemRefValue *memref = llvm::dyn_cast(rv)) { + MTRT_RETURN_IF_ERROR(pushMemRefTableArg(lua, tracker, args, *memref)); + continue; + } + if (ScalarValue *scalar = llvm::dyn_cast(rv)) { + MTRT_RETURN_IF_ERROR(pushScalarArgument(lua, args, *scalar)); + continue; + } + return getInvalidArgStatus( + "input argument #{0} to function {1} has an unsupported type; " + "arguments must be either MemRefs or scalars", + idx + 1, name); + } + if (stream) + RETURN_STATUS_IF_ERROR(session.setCudaStream(*stream)); + + // If the number of arguments exceed a particular threshold, then + // we pass arguments packed into a table, otherwise we pass as arguments. + sol::protected_function_result result = + sig.getCConv() == CallingConvention::unpacked + ? funcObj(sol::as_args(args)) + : funcObj(args); + + if (!result.valid()) { + sol::error err(result); + return getStatusWithMsg(StatusCode::InternalError, + "failed to run function \"", std::string(name), + "\": ", err.what()); + } + + int returnCount = result.return_count(); + std::vector results; + // Lua index start from 1 + for (int i = 1; i <= returnCount; ++i) { + results.push_back(result[i]); + } + + return solObjectToRuntimeValues(&client, results); +} diff --git a/mlir-tensorrt/executor/lib/Runtime/Backend/Lua/Modules/TensorRT/TensorRTModule.cpp b/mlir-tensorrt/executor/lib/Runtime/Backend/Lua/Modules/TensorRT/TensorRTModule.cpp index db196043b..5e6eb1827 100644 --- a/mlir-tensorrt/executor/lib/Runtime/Backend/Lua/Modules/TensorRT/TensorRTModule.cpp +++ b/mlir-tensorrt/executor/lib/Runtime/Backend/Lua/Modules/TensorRT/TensorRTModule.cpp @@ -257,7 +257,8 @@ prepareBuffers(const AllocTracker &allocTracker, unsigned argumentBuffersIdx = 1; // The number of arguments should be equal to the number of results plus the // number of arguments of the TensorRT engine's functional signature. - const unsigned numOperands = sig.numResults + sig.numArguments; + const unsigned numOperands = sig.numArguments; + result.reserve(va.size() / 3); std::vector &hostBuffers = context.getHostIOBuffers(); unsigned hostBufferIdx = 0; @@ -279,6 +280,10 @@ prepareBuffers(const AllocTracker &allocTracker, for (int64_t dimIdx = 0; dimIdx < rank; dimIdx++) dims.d[dimIdx] = va.get(argumentBuffersIdx++); + // Increment rank times to account for strides: This is a hack + for (int64_t dimIdx = 0; dimIdx < rank; dimIdx++) + argumentBuffersIdx++; + uintptr_t pointer = buffer.ptr + offset; MTRT_DBGF("enqueue arg %u ptr=0x%lx offset=%ld", i, buffer.ptr, offset); @@ -339,10 +344,48 @@ prepareBuffers(const AllocTracker &allocTracker, return result; } +class OutputAllocator : public nvinfer1::IOutputAllocator { +public: + OutputAllocator() = default; + ~OutputAllocator() = default; + + void *reallocateOutput(char const *tensorName, void *currentMemory, + uint64_t size, uint64_t alignment) noexcept override { + // Some memory allocators return nullptr when allocating zero bytes, but + // TensorRT requires a non-null ptr even for empty tensors, so allocate a + // dummy byte. + // Fall-back to local memory management. + void* buffer{nullptr}; + cudaMalloc(&buffer, size); + return buffer; + } + + //! IMirroredBuffer does not implement Async allocation, hence this is just a + //! wrap around + void *reallocateOutputAsync(char const *tensorName, void *currentMemory, + uint64_t size, uint64_t alignment, + cudaStream_t /*stream*/) noexcept override { + return reallocateOutput(tensorName, currentMemory, size, alignment); + } + + void notifyShape(char const *tensorName, + nvinfer1::Dims const &dims) noexcept override { + mFinalDims = dims; + } + + nvinfer1::Dims getFinalDims() { return mFinalDims; } + + void* getMemory() { return memory.get(); } + +private: + std::unique_ptr memory; + nvinfer1::Dims mFinalDims; +}; + static Status enqueueV3Wrapper(AllocTracker &tracker, ResourceTracker &resourceTracker, NvInferExecContextWrapper &context, - CudaStreamPtr stream, sol::table &va) { + CudaStreamPtr stream, sol::table &va, std::optional outputDescriptors = 0) { StatusOr>> buffers = prepareBuffers(tracker, context, stream, va); if (!buffers.isOk()) @@ -360,6 +403,13 @@ static Status enqueueV3Wrapper(AllocTracker &tracker, return getStatusWithMsg(StatusCode::InternalError, "failed to set input-consumed event"); + std::unique_ptr allocator(new OutputAllocator()); + if (outputDescriptors) { + // Register an output allocator. `enqueueV3` callback should set output + // pointer, and notify shapes. + context->setOutputAllocator("result0", allocator.get()); + } + if (!context->enqueueV3(stream)) return getStatusWithMsg(StatusCode::InternalError, "failed to enqueue engine execution on stream"); @@ -369,6 +419,18 @@ static Status enqueueV3Wrapper(AllocTracker &tracker, MTRT_DBGF("%s", "enqueueV3 successful and inputs are consumed"); + int64_t* desc = reinterpret_cast(*outputDescriptors); + + if (outputDescriptors) { + // Store following: number of results, [rank, ptr, [shape, ...], [stride, ...]]... + // For now assume only one result and just copy input pointer to output + desc[0] = 1; // Number of result is 1 + desc[1] = std::get<2>((*buffers)[0]).nbDims; // Copy input rank + desc[2] = std::get<1>((*buffers)[0]); // Copy input pointer + desc[3] = std::get<2>((*buffers)[0]).d[0]; // Copy input shape + desc[4] = 1; // Use stride 1 + } + return getOkStatus(); } @@ -429,4 +491,20 @@ void mlirtrt::runtime::registerExecutorTensorRTModuleLuaRuntimeMethods( *context, stream, va); SET_LUA_ERROR_IF_ERROR(result, state); }; + + lua["_trtrt_alloc_enqueue"] = + [allocTracker, + resourceTracker](sol::this_state state, + std::shared_ptr context, + CudaStreamPtr stream, uintptr_t outputDescriptors, sol::table va) { + ADD_TENSORRT_MODULE_RANGE("trtrt_alloc_enqueue"); + sol::state_view luaState(state); + assert(context != nullptr); + assert(outputDescriptors != 0); + assert(stream != nullptr && "expected valid stream"); + Status result = enqueueV3Wrapper(*allocTracker, *resourceTracker, + *context, stream, va, outputDescriptors); + SET_LUA_ERROR_IF_ERROR(result, state); + }; + } diff --git a/mlir-tensorrt/python/bindings/Runtime/RuntimePyBind.cpp b/mlir-tensorrt/python/bindings/Runtime/RuntimePyBind.cpp index 200a7ebda..82655c059 100644 --- a/mlir-tensorrt/python/bindings/Runtime/RuntimePyBind.cpp +++ b/mlir-tensorrt/python/bindings/Runtime/RuntimePyBind.cpp @@ -567,6 +567,15 @@ static MTRT_RuntimeValue convertArgType(py::object obj) { throw std::runtime_error("argument must be MemRef or scalar"); } +/// Convert Runtime value to PyMemRefValue or PyScalarValue object. +static py::object convertGenericArgToPyObject(MTRT_RuntimeValue value) { + if (mtrtRuntimeValueIsMemRef(value)) + return py::cast(mtrtRuntimeValueDynCastToMemRef(value)); + if (mtrtRuntimeValueIsScalar(value)) + return py::cast(mtrtRuntimeValueDynCastToScalar(value)); + return py::none(); +} + //===----------------------------------------------------------------------===// // Declare the bindings. //===----------------------------------------------------------------------===// @@ -927,5 +936,48 @@ PYBIND11_MODULE(_api, m) { THROW_IF_MTRT_ERROR(s); }, py::arg("name"), py::arg("in_args"), py::arg("out_args"), - py::arg("stream") = py::none()); + py::arg("stream") = py::none()) + .def( + "execute_function_with_result", + [](PyRuntimeSession &self, PyRuntimeClient &client, std::string name, + std::vector inArgs, + std::optional stream) -> py::object { + MTRT_StringView nameRef{name.data(), name.size()}; + + auto inArgsGeneric = llvm::map_to_vector(inArgs, convertArgType); + + // Query the function metadata to get the number of output arguments + int64_t numResults; + MTRT_Status s = mtrtRuntimeSessionGetNbResults(self, nameRef, &numResults); + THROW_IF_MTRT_ERROR(s); + + // Prepare a vector to hold output arguments + std::vector resultArgs(numResults); + + // Execute the function, letting it populate outArgsGeneric + s = mtrtRuntimeSessionExecuteFunctionWithResult( + self, client, nameRef, inArgsGeneric.data(), + inArgsGeneric.size(), resultArgs.data(), resultArgs.size(), + stream ? *stream : mtrtStreamGetNull()); + THROW_IF_MTRT_ERROR(s); + + // Convert the output arguments to Python objects + std::vector outArgs; + for (const auto& arg : resultArgs) { + outArgs.push_back(convertGenericArgToPyObject(arg)); + } + + // Process the results + if (outArgs.empty()) { + return py::none(); + } else if (outArgs.size() == 1) { + return outArgs[0]; + } else { + return outArgs[0]; + } + }, + py::arg("client"), py::arg("name"), py::arg("in_args"), py::arg("stream") = py::none(), + "Execute a function and return the result as a Python object"); + + } diff --git a/mlir-tensorrt/test/Target/Lua/IntegrationTests/ClusteringDynamicShape/alloc_enqueue.mlir b/mlir-tensorrt/test/Target/Lua/IntegrationTests/ClusteringDynamicShape/alloc_enqueue.mlir new file mode 100644 index 000000000..b8f19afa6 --- /dev/null +++ b/mlir-tensorrt/test/Target/Lua/IntegrationTests/ClusteringDynamicShape/alloc_enqueue.mlir @@ -0,0 +1,13 @@ +// RUN: mlir-tensorrt-opt %s \ +// RUN: -pass-pipeline="builtin.module(inline{default-pipeline=canonicalize inlining-threshold=4294967295 max-iterations=4 },stablehlo-ext-lower-special-custom-calls,stablehlo-ext-expand-tuples{entry-function-name=main},stablehlo-ext-canonicalize-shapes{max-iterations=4},stablehlo-raise-qdq,stablehlo-ext-constant-folding,stablehlo-gather-to-slice,stablehlo-ext-canonicalize-shapes{max-iterations=4},stablehlo-canonicalize-dot-general,stablehlo-ext-constant-folding,stablehlo-ext-canonicalize-shapes{max-iterations=4},func.func(stablehlo-ext-canonicalize-scatter),func.func(stablehlo-ext-canonicalize-gather),stablehlo-ext-constant-folding,stablehlo-ext-canonicalize-shapes{max-iterations=4},cse,func.func(tensorrt-stablehlo-input-preprocessing),cse,stablehlo-ext-constant-folding,stablehlo-ext-canonicalize-shapes{max-iterations=4},canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},convert-stablehlo-to-scf,func.func(tensorrt-infer-plugin-shapes),func.func(plan-materialize-shape-calculations),func.func(canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true}),plan-refine-types,plan-create-shape-funcs,func.func(plan-populate-func-bounds-attrs),stablehlo-clustering{disable-create-shape-func-pass=false disallow-host-tensors-in-tensorrt-clusters=false entrypoint=main},plan-create-closed-regions{test-pre-walk-order=false},plan-outline-clusters,func-ext-duplicate-function-elimination,plan-eliminate-shape-ops,func.func(stablehlo-to-arith-pipeline),func.func(stablehlo-to-std),tensorrt.module(convert-stablehlo-to-tensorrt{allow-i64-to-i32-conversion=false convert-conditionals=true convert-loops=false}),convert-tensorrt-to-runtime,func.func(post-clustering-validation),canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},inline{default-pipeline=canonicalize inlining-threshold=4294967295 max-iterations=4 },func.func(cse),func.func(canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true}),func.func(scf-detensorize-loops),func.func(canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true}),tensorrt.module(tensorrt-broadcast-elimination,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},cse,tensorrt-transpose-elimination,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},cse,tensorrt-reshape-elimination,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},cse,tensorrt-raise-normalizations,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},cse,tensorrt-apply-bug-wars{tensorrt-strongly-typed=false tensorrt-version=8.5},canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},cse,tensorrt-expand-ops,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},cse,tensorrt-expand-ops,func.func(tensorrt-legalize-int8),translate-tensorrt-to-engine),memref-cast-elimination,plan-alloc-tensors,plan-bufferize,memref-cast-elimination,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},drop-equivalent-buffer-results,func.func(buffer-loop-hoisting),func.func(buffer-hoisting),expand-realloc{emit-deallocs=false},canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},ownership-based-buffer-deallocation{private-function-dynamic-ownership=false},canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},buffer-deallocation-simplification,bufferization-lower-deallocations,cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},func.func(canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true}),convert-memref-to-cuda,convert-plan-to-executor,executor-allocs-to-globals,func.func(executor-populate-func-metadata),convert-tensorrt-runtime-to-executor{index-bitwidth=64 use-packed-memref-cconv=true},convert-cuda-to-executor{index-bitwidth=64 use-packed-memref-cconv=true},drop-nested-modules,convert-scf-to-cf,fold-memref-alias-ops,memref-expand,expand-strided-metadata,cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},convert-memref-to-executor{allow-unchecked-memref-cast-conversion=true index-bitwidth=64 use-packed-memref-cconv=true},cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},convert-std-to-executor{index-bitwidth=64 use-packed-memref-cconv=true},cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},executor-lower-globals,convert-executor-to-executor{index-bitwidth=64 use-packed-memref-cconv=true},executor-decompose-aggregate-loads-and-stores{target=lua},executor-expand-ops{lower-alloca=true lower-getoffset=true},cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},executor-lower-to-runtime-builtins{index-bitwidth=64 target=lua use-packed-memref-cconv=true},executor-pack-arguments{max-arguments=100})" \ +// RUN: | mlir-tensorrt-translate -mlir-to-runtime-executable -allow-unregistered-dialect | mlir-tensorrt-runner -input-type=rtexe + +func.func public @main(%arg0: tensor<1xf32>, %arg1: tensor<1xf32>) -> tensor<1xf32> { + // Add operation + %result = stablehlo.add %arg0, %arg1 : tensor<1xf32> + return %result: tensor<1xf32> +} + +// CHECK: result[0] = 2.000 + +// CHECK-NOT: result \ No newline at end of file diff --git a/mlir-tensorrt/test/python/IntegrationTests/test_stablehlo_alloc_enqueue.py b/mlir-tensorrt/test/python/IntegrationTests/test_stablehlo_alloc_enqueue.py new file mode 100644 index 000000000..c8f680738 --- /dev/null +++ b/mlir-tensorrt/test/python/IntegrationTests/test_stablehlo_alloc_enqueue.py @@ -0,0 +1,124 @@ +# RUN: %PYTHON %s +import time + +import mlir_tensorrt.compiler.api as compiler +import mlir_tensorrt.compiler.ir as ir +import mlir_tensorrt.runtime.api as runtime +import numpy as np + +ASM = """ +func.func @main(%arg0: tensor<1xf32>, %arg1: tensor<1xf32>) -> tensor<1xf32> { + %1 = stablehlo.add %arg0, %arg1 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32> + func.return %1 : tensor<1xf32> +} +""" + +EXECUTOR = """ +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry, #dlti.dl_entry, 64 : i64>, #dlti.dl_entry, 64 : i64>>, executor.global_init_func = @executor_init_globals, executor.process_grid_shape = array} { + executor.func private @_dealloc(...) + executor.func private @_inttoptr_i64_i64(i64) -> !executor.ptr + executor.func private @_load_i64(...) -> i64 + executor.func private @_store_i64(...) + executor.func private @executor_alloc(i64, i64) -> !executor.ptr + executor.func private @__cuda_stream_create() -> !executor.ptr + executor.global @stream0 constant : !executor.ptr + executor.func private @_trtrt_alloc_enqueue(!executor.opaque<"trtrt_context">, !executor.ptr, !executor.ptr, ...) + executor.func private @_trtrt_create_runtime() -> !executor.opaque<"trtrt_runtime"> + executor.func private @_trtrt_create_context(!executor.opaque<"trtrt_engine">) -> !executor.opaque<"trtrt_context"> + executor.func private @_trtrt_load(!executor.opaque<"trtrt_runtime">, !executor.ptr, i64) -> !executor.opaque<"trtrt_engine"> + executor.global @tensorrt_runtime : !executor.opaque<"trtrt_runtime"> + executor.constant_resource @tensorrt_cluster_engine_data dense<"0x66747274000000000100000000000000D4410000000000000A040A02727472744E474E4500000000380000000000000092410000000000007074727400000000EF000000000000007A41000000000000028001000A0280010004028001000A0280010002D2800100078001000000000012800100108001000780010008000000078001000900000007800100F8C3250007800100883BA00007800100C00000000880010000000003000000000880010000001002000000000780010000C000000780010000900100078001000002000007800100380000000A80010000078001000004000007800100FFFFFF7F07800100FFFF000007800100FFFF000007800100000400000980010000000DE702000000EF7FED7F0A800100009E800100098001000000000000000000617FE98001000980010000000000000000000980010000000000000000000780010000000000098001000000000000000000098001000000000000000000167FE880010007800100000000000A800100000A800100010A8001000031800100078001000000000009800100000000000000000009800100010000000000000018800100110000000000000001800C8001001100000000000000556E6E616D6564204E6574776F726B203001800880010000000000000000000680010000000000018001800100010000000000000001800880010000000000000000000880010003000000000000009F8001002680010007800100000000009D80010018800100070000000000000001800C8001000700000000000000726573756C74303780010003800100000E800100078001000000000003800100000180098001000800000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000780010000000080C87F118001000780010001000000018007800100010000000000000001000000EE7F2480010007800100000000005380010007800100010000000680010002000000158001000780010000000000098001000100000000000000627F607F9F8001002680010007800100000000009D80010018800100040000000000000001800C8001000400000000000000617267313780010003800100000E800100078001000000000003800100000180098001000800000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000780010000000080C87F118001000780010001000000018007800100010000000000000001000000EE7F2480010007800100000000005380010007800100000000000680010001000000158001000780010000000000098001000000000000000000627F607F9F8001002680010007800100000000009D80010018800100040000000000000001800C8001000400000000000000617267303780010003800100000E800100078001000000000003800100000180098001000800000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000780010000000080C87F118001000780010001000000018007800100010000000000000001000000EE7F2480010007800100000000005380010007800100000000000680010000000000158001000780010000000000098001000000000000000000627F607F098001000100000000000000CC800100CB800100078001000000000049800100088001000100000000000000078001005900000001800280010098320000000000007F454C460201013307000000000000000200BE007E0000000000000000000000B831000000000000782D00000000000059055900400038000400400011000100002E7368737472746162002E737472746162002E73796D746162002E73796D7461625F73686E6478002E6E762E696E666F002E746578742E67656E6572617465644E6174697665506F696E7477697365002E6E762E696E666F2E67656E6572617465644E6174697665506F696E7477697365002E6E762E7368617265642E67656E6572617465644E6174697665506F696E7477697365002E6E762E676C6F62616C2E696E6974002E6E762E636F6E7374616E7434002E6E762E676C6F62616C002E6E762E636F6E7374616E74302E67656E6572617465644E6174697665506F696E7477697365002E72656C2E6E762E636F6E7374616E74302E67656E6572617465644E6174697665506F696E7477697365002E64656275675F6672616D65002E72656C2E6E762E636F6E7374616E7434002E72656C2E64656275675F6672616D65002E72656C612E64656275675F6672616D65002E6E762E63616C6C6772617068002E6E762E70726F746F74797065002E6E762E72656C2E616374696F6E00002E7368737472746162002E737472746162002E73796D746162002E73796D7461625F73686E6478002E6E762E696E666F002E746578742E67656E6572617465644E6174697665506F696E7477697365002E6E762E696E666F2E67656E6572617465644E6174697665506F696E7477697365002E6E762E7368617265642E67656E6572617465644E6174697665506F696E7477697365002E6E762E676C6F62616C2E696E6974005F5A4E537439747275655F747970653576616C756545002E6E762E636F6E7374616E7434002E6E762E676C6F62616C005F5A4E5374313066616C73655F747970653576616C756545005F5A4E35707767656E364B417272617949664A4C6931454545386B4E425F44494D5345005F5A4E35707767656E364B4172726179494E36335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137313053726353756243566563454A4C6931454545386B4E425F44494D5345005F5A4E35707767656E364B4172726179494E36335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137313044737453756243566563454A4C6931454545386B4E425F44494D5345005F5A4E35707767656E364B417272617949694A4C6931454545386B4E425F44494D5345005F5A4E35707767656E364B4172726179494E535F3654656E736F72494C6931454E36335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F32323331373753726343566563454C4E535F36494F5479706545304545454A4C6932454545386B4E425F44494D5345005F5A4E35707767656E364B4172726179494E535F3654656E736F72494C6931454E36335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F32323331373744737443566563454C4E535F36494F5479706545314545454A4C6931454545386B4E425F44494D5345005F5A4E35707767656E364B417272617949694A4C6932454545386B4E425F44494D5345005F5A4E35707767656E364B41727261794950664A4C6932454545386B4E425F44494D5345005F5A4E35707767656E364B41727261794950664A4C6931454545386B4E425F44494D5345005F5A4E35707767656E364B4172726179494E36335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137313044737453756243566563454A4C6938454545386B4E425F44494D5345005F5A4E35707767656E364B4172726179494E53305F494E36335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137313044737453756243566563454A4C6938454545454A4C6931454545386B4E425F44494D5345005F5A4E35707767656E364B4172726179494E36335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137313053726353756243566563454A4C6938454545386B4E425F44494D5345005F5A4E35707767656E364B4172726179494E53305F494E36335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137313053726353756243566563454A4C6938454545454A4C6932454545386B4E425F44494D5345005F5A4E35707767656E364B417272617949664A4C6932454545386B4E425F44494D5345005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137386E62496E7075747345005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137396E624F75747075747345005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137366E6244696D7345005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F32323331373769647844696D4345005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137386356656353697A6545005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313731317375624356656353697A6545005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313731316E624356656353706C697445005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313735697350435145005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313731316861735372635363616C6545005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313731316861734473745363616C6545005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F32323331373763746153697A6545005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313731326E62496E6E6572497465727345005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313731326E624F75746572497465727345005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137376E62497465727345005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F32323331373767727053697A6545005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313731306772707350657243746145005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313731307365677350657247727045005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F32323331373131635665637350657247727045005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137313767727053697A6547744356656353697A6545005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F3232333137313669734C6173744356656350616464656445005F5A4E36315F494E5445524E414C5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313736335F474C4F42414C5F5F4E5F5F30303030303030305F32345F67656E6572617465644E6174697665506F696E74776973655F62326337633265345F323233313731346D61784E6174697665416C69676E45002E72656C2E6E762E636F6E7374616E74302E67656E6572617465644E6174697665506F696E7477697365002E6E762E636F6E7374616E74302E67656E6572617465644E6174697665506F696E7477697365002E64656275675F6672616D65002E72656C2E6E762E636F6E7374616E7434002E72656C2E64656275675F6672616D65002E72656C612E64656275675F6672616D65002E6E762E63616C6C6772617068002E6E762E70726F746F74797065002E6E762E72656C2E616374696F6E0067656E6572617465644E6174697665506F696E7477697365000000000000000000000000000000000000000000000000000000003200000003000D00000000000000000000000000000000007300000003000F00000000000000000000000000000000009700000003000E0000000000000000000000000000000000A700000001000E0074000000000000000100000000000000BE00000003000B0000000000000000000000000000000000CC0000000300100000000000000000000000000000000000D70000000100100004000000000000000100000000000000F000000001000E00440000000000000004000000000000001401000001000E00080000000000000004000000000000008601000001000E0060000000000000000400000000000000F801000001000E00340000000000000004000000000000001C02000001000E0050000000000000000400000000000000A902000001000E00180000000000000004000000000000003603000001000E00680000000000000004000000000000005A03000001000E00200000000000000004000000000000007F03000001000E003C000000000000000400000000000000A403000001000E00000000000000000004000000000000001604000001000E00580000000000000004000000000000009504000001000E002C0000000000000004000000000000000705000001000E004C0000000000000004000000000000008605000001000E0010000000000000000400000000000000AA05000001000E00700000000000000004000000000000003806000001000E0028000000000000000400000000000000C706000001000E0048000000000000000400000000000000530700000100100000000000000000000400000000000000E007000001000E000C0000000000000004000000000000006E08000001000E00640000000000000004000000000000000009000001000E00380000000000000004000000000000009209000001001000060000000000000001000000000000001D0A00000100100008000000000000000100000000000000AF0A00000100100005000000000000000100000000000000410B000001000E0054000000000000000400000000000000CE0B000001000E001C000000000000000400000000000000610C000001000E006C000000000000000400000000000000F40C000001000E0024000000000000000400000000000000810D000001000E00400000000000000004000000000000000E0E000001000E00040000000000000004000000000000009F0E000001000E005C000000000000000400000000000000300F000001000E0030000000000000000400000000000000C20F000001000E00750000000000000001000000000000005A1000000100100007000000000000000100000000000000F110000001000E0014000000000000000400000000000000B111000003000C0000000000000000000000000000000000D811000003000400000000000000000000000000000000001A12000003000700000000000000000000000000000000003612000003000800000000000000000000000000000000004512000012100D000000000000000000800D000000000000FFFFFFFF2400000000000000FFFFFFFFFFFFFFFF0300047CFFFFFFFF0F0C818080280008FF8180280881808028000000FFFFFFFF340000000000000000000000000000000000000000000000800D000000000000040400000004400000000C818080280004F002000000000000000000042F08002F00000028000000041108002F00000000000000041208002F00000000000000043704007E000000040A08002B000000600158000319580004170C00000000000500400000F0610004170C00000000000400100000F0C10004170C000000000003000C0000F0110004170C00000000000200080000F0110004170C00000000000100040000F0110004170C00000000000000000000F01100031BFF00041C280000010000900B0000E00B0000000C0000200C0000400C0000600C0000800C0000B00C0000D00C0000041E04000000000000000000FFFFFFFF00000000FEFFFFFF00000000FDFFFFFF00000000FCFFFFFF00000000730000000000000000000011250005362001000000000000020000002A00000018010000000000000200000029000000100100000000000002000000280000000801000000000000020000002700000000010000000000000200000026000000F8000000000000000200000025000000F0000000000000000200000024000000E8000000000000000200000023000000E0000000000000000200000022000000D8000000000000000200000021000000D0000000000000000200000020000000C800000000000000020000001F000000C000000000000000020000001E000000B800000000000000020000001D000000B000000000000000020000001C000000A800000000000000020000001B000000A000000000000000020000001A000000980000000000000002000000190000009000000000000000020000001800000088000000000000000200000017000000800000000000000002000000160000007800000000000000020000001500000070000000000000000200000014000000680000000000000002000000130000006000000000000000020000001200000058000000000000000200000011000000500000000000000002000000100000004800000000000000020000000F0000004000000000000000020000000E0000003800000000000000020000000D0000003000000000000000020000000C0000002800000000000000020000000B0000002000000000000000020000000A000000180000000000000002000000090000001000000000000000020000000800000008000000000000000200000007000000000000000000000002000000040000004400000000000000020000002F0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000247601FF000A0000FF008E0700E40F0019790000000000000021000000280E0019790300000000000025000000620E000C720000FF0000007052F00300E21F00247800038000000000028E0700CA2F00197804FF050000000016010000CE0F000C8A00FF006000007052F20300E40F000C8A00FF006600007052F40300E40F00078802FF010000000000800000E40F00078803FF010000000000000100E40F000C7A0004005800007062F20300E20F00888300FF020000000000000000E80100888300FF020200000000000000E80100888300FF030100000000000000E80100888300FF030300000000000000E801001D7B0000000000000000010000EC0F004D190000000000000000800300EA0F00137A1200005900000000000000E21F00B97A040000460000000A000000E20F0013721300040000000000000000C40F0006731000120000000094200000220E000C720004FF0000007062F40300E40F00127800001F000000FFC08E0700CA0F0008731000100000000010000000241E0010780210FEFFFF0FFFE0FF0700CC1F00057303000200000000F0210000640000247202FFFF000000FF008E0700E41F00247211FFFF000000030A8E0700C82F002472111112000000FF028E0700C80F00277203031100000002008E0700C80F00247211FFFF00000013008E0700C80F002772030311000000FF008E0700C80F00247203FFFF000000030A8E0700C80F00247203120300000011028E0700CA0F000C720012030000007040F00300DA0F002488030301000000120A8E0700E20F000C7A00FF005900007052F00300C80F000C720012030000007040F20300DA0F002498030301000000120A8E0700C80F0024A203FFFF000000030A8E0700E20F00128A03FF00590000FF338E0700C80F0016780003042100000000000000C80F001078020020000000FFE0FF0700E40F041078030040000000FFE0FF0700E40F000C7A0002005A00007062F00300E40F000C7A0003005A00007062F20300E40F001078100060000000FFE0FF0700C80F000C7A0010005A00007062F40300E40F001078100080000000FFE0FF0700C60F00848902FF000200000000000000220E000C7A0010005A00007062F60300E40F0010781000A0000000FFE0FF0700E20F00848903FF000300000000000000660E000C7A0010005A00007062F80300E20F00849912FF000200000000000000A80E00849913FF000300000000000000E80E0084A914FF000200000000000000280F0084A915FF000300000000000000280F0084B91AFF000200000000000000280F0084B91BFF000300000000000000220F000C720002FF00000070527C0400E21F00247A020000600000FF028E0700E20F040C720003FF00000070527A0400E22F00247A030000660000FF028E0700C60F001088110220000000FFE0FF0700E40F041098210240000000FFE0FF0700E40F001088100320000000FFE0FF0700E40F041098170340000000FFE0FF0700E20F00246211FFFF00000002028E0700E20F000C720012FF0000007052FC0400E44F0010A8250260000000FFE0FF0700E20F00245210FFFF00000003028E0700E20F000C720013FF0000007052FA0400C48F0084C913FF000200000000000000220E0010781200C0000000FFE0FF0700E40F0010A8160360000000FFE0FF0700E40F0010B8240280000000FFE0FF0700E40F00246221FFFF00000002028E0700E20F000C720014FF00000070527C0500E40F0184C914FF000300000000000000620E00245217FFFF00000003028E0700E20F000C720015FF00000070527A0500C40F0010B8200380000000FFE0FF0700E40F0410C81D02A0000000FFE0FF0700E40F0010C81C03A0000000FFE0FF0700C60F00246225FFFF00000002028E0700E20F000C72001AFF0000007052FC0500C60F00245216FFFF00000003028E0700E20F000C7A0012005A00007062FA0300D20F00246224FFFF00000002028E0700E20F000C72001BFF0000007052FC0500C60F0084D912FF000200000000000000A20E0010D81F02C0000000FFE0FF0700E40F0010D81E03C0000000FFE0FF0700CE0F00246220FFFF00000003028E0700E20F000C720013FF00000070527C0600E41F0084D913FF000300000000000000360E0024621DFFFF00000002028E0700E20F000C720014FF00000070527C0600DA2F0024621CFFFF00000003028E0700E20F000C720012FF0000007052FC0600DA4F0010621F02FF000000FFE0FF0700E40F000C720013FF0000007052FC0600E21F00248415FF04000000FF008E0700D80F0024621EFFFF00000003028E0700E20F000C7A0000005A00007062FC0300E20F00248214FFFF00000010008E0700E40F0025861015005C000011028E0700E20F0003781BFF400000000000000000C60F00258614150062000014028E0700E20F00818906100400000000191E0C00680100818907140400000000191E0C0062030024E412FF04000000FF008E0700C80F0025E62202005C000012028E0700C80F0025E612030062000012028E0700E20F0081E904220400000000191E0C0066050024A414FF04000000FF008E0700E22F0081E905120400000000191E0C00620300249410FF04000000FF008E0700E21F0003781AFF400000000000000000E20F00249223FFFF00000021008E0700E44F0024B421FF04000000FF008E0700C40F0025A61214005C000025028E0700C82F0025962210005C000023028E0700E20F0081A90A120400000000191E0C0066010025A614140062000016028E0700E20F00819908220400000000191E0C00660300259610100062000017028E0700E20F0081A90B140400000000191E0C0066050025B61621005C000024028E0700E20F0002D8240004000000000F000000E20F00819909100400000000191E0C0064070024D223FFFF0000001F008E0700C42F0024D225FFFF0000001E008E0700E20F0081B90C160400000000191E0C0062030024C414FF04000000FF008E0700E44F0024C222FFFF0000001C008E0700E40F0025B61E210062000020028E0700C80F0025C61C14005C00001D028E0700E20F0081B90D1E0400000000191E0C0066050025D61224005C000023028E0700E21F0481C90E1C0400000000191E0C0066050025D610240062000025028E0700E28F0081D918120400000000191E0C0068050081D919100400000000191E0C0062050010781600E0000000FFE0FF0700E22F0025C614140062000022028E0700C60F000C7A0016005A00007062FC0300E40F0081C90F140400000000191E0C00640100037814FF400000000000000000E41F000C72001AFF0000007052FC0300E20F0045790000000200000000800300D80F0047690000E00100000000800300EA0F00037816FF020000000000000000E44F000C720014FF0000007052F20300DA0F00849911FF000200000000000000220E0010981002E0000000FFE0FF0700E20F00249415FF04000000FF008E0700E20F000C720011FF0000007052FC0400E41F00849911FF000300000000000000360E00246210FFFF00000002028E0700E20F0010980203E0000000FFE0FF0700C40F000C720011FF0000007052FC0400DA1F00246202FFFF00000003028E0700C80F00249211FFFF00000002008E0700E40F0025960215005C000010028E0700C80F00259610150062000011028E0700E20F00819912020400000000191E0C0064010021721504050000000000010000E40F02819913100400000000191E0C006201000C720016FF0000007052F20300E20F0047090000B00000000000800300F60F0021721106070000000000010000E21F0047190000900000000000800300EA0F0021720908090000000000010000E20F0047290000700000000000800300EA0F0021720B0A0B0000000000010000E20F0047390000500000000000800300EA0F0021720D0C0D0000000000010000E20F0047490000300000000000800300EA0F0021D20518190000000000010000E20F0021D20712130000000000010000E20F0221720F0E0F0000000000010000C40F0041790000000000000000800300EA4F000C72001BFF0000007052FC0300DA0F004D690000000000000000800300EA0F00247403FF04000000FF008E0700E41F00247A0200006C0000FF028E0700C80F00257602020068000003028E0700CA0F0086790002150000000419100C00E201004D090000000000000000800300EA0F0086790002118000000419100C00E203004D190000000000000000800300EA0F0086790002090001000419100C00E205024D290000000000000000800300EA0F00867900020B8001000419100C00E207004D390000000000000000800300EA0F00867900020D0002000419100C00E209004D490000000000000000800300EA0F00867900020F8002000419100C00E201004D590000000000000000800300EA0F000C720014FF0000007052FC0300E20F0086790002050003000419100C00D801004D690000000000000000800300EA0F0086790002078003000419100C00E21F004D790000000000000000800300EA0F0047790000F0FFFFFFFFFF830300C00F0018790000000000000000000000C00F0018790000000000000000000000C00F0018790000000000000000000000C00F0018790000000000000000000000C00F0018790000000000000000000000C00F0018790000000000000000000000C00F0018790000000000000000000000C00F0018790000000000000000000000C00F0018790000000000000000000000C00F000800000004000000010000000100000002000000100000000100000008000000020000000800000001000000080000000001000001000000010000000100000020000000010000000100000002000000020000008000000001000000200000000100000001000000020000000100000002000000010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000030000000000000000000000000000000000000040000000000000007F010000000000000000000000000000010000000000000000000000000000000B0000000300000000000000000000000000000000000000BF010000000000005E1200000000000000000000000000000100000000000000000000000000000013000000020000000000000000000000000000000000000020140000000000008004000000000000020000002F00000008000000000000001800000000000000120100000100000000000000000000000000000000000000A0180000000000007000000000000000000000000000000001000000000000000000000000000000290000000000007000000000000000000000000000000000101900000000000024000000000000000300000000000000040000000000000000000000000000005100000000000070400000000000000000000000000000003419000000000000B000000000000000030000000D00000004000000000000000000000000000000540100000100007000000000000000000000000000000000E4190000000000002000000000000000030000000000000004000000000000000800000000000000700100000B00007000000000000000000000000000000000081A00000000000010000000000000000000000000000000080000000000000008000000000000001F0100000900000040000000000000000000000000000000181A0000000000005002000000000000030000000B00000008000000000000001000000000000000310100000900000040000000000000000000000000000000681C0000000000001000000000000000030000000400000008000000000000001000000000000000A70000000100000002000000000000000000000000000000781C0000000000002801000000000000000000000000000008000000000000000000000000000000C00000000100000042000000000000000000000000000000A01D000000000000B801000000000000000000000D00000004000000000000000000000000000000320000000100000006001000000000000000000000000000801F000000000000800D000000000000030000002F00002880000000000000000000000000000000970000000100000003000000000000000000000000000000002D0000000000007600000000000000000000000000000004000000000000000000000000000000730000000800000043000000000000000000000000000000762D0000000000000400000000000000000000000D00000001000000000000000000000000000000B50000000800000003000000000000000000000000000000782D00000000000009000000000000000000000000000000040000000000000000000000000000000600000005000000B83100000000000000000000000000000000000000000000E000000000000000E00000000000000008000000000000000100000005000000781C000000000000000000000000000000000000000000008810000000000000881000000000000008000000000000000100000006000000002D000000000000000000000000000000000000000000007800000000000000850000000000000008000000000000000100000005000000B83100000000000000000000000000000000000000000000E000000000000000E000000000000000080000000000000008800100000000000000000018800100180000000000000001800C800100180000000000000067656E6572617465644E6174697665506F696E7477697365B67F337F0880010001000000000000000F800100078001002800008095800100088001000200000000000000018069800100020000000000000018800100040000000000000001800C800100040000000000000061726730118001000780010001000000018007800100010000000000000000000000EE7F118001000780010001000000018007800100010000000000000001000000EE7F4C8001000A80010000B37F967F18800100040000000000000001800C800100040000000000000061726731118001000780010001000000018007800100010000000000000000000000EE7F118001000780010001000000018007800100010000000000000001000000EE7F4C8001000A80010000B37F967F088001000100000000000000018069800100010000000000000018800100070000000000000001800C8001000700000000000000726573756C7430118001000780010001000000018007800100010000000000000000000000EE7F118001000780010001000000018007800100010000000000000001000000EE7F4C8001000A80010000B37F967F188001009E0000000000000001800C8001009E0000000000000050574E285B74656E736F7272742E656C656D656E745F776973655D206C6F63282F776F726B7370616365732F54656E736F7252542D496E63756261746F722F6D6C69722D74656E736F7272742F746573742F5461726765742F4C75612F496E746567726174696F6E54657374732F436C7573746572696E6744796E616D696353686170652F616C6C6F635F656E71756575652E6D6C69723A373A3133292918800100000000000000000001800C80010000000000000000000F800100078001002800008008800100090000000000000007800100FFFFFFFF628001000A8001000155800100038001000201800B80010000000000000000000180548001000100000000000000228001000380010001038001000203800100000380010000AB7FAA7F468001000A80010000B97F9D7F0A8001000007800100020000004A80010008800100FF00000000000000B57F1380010007800100010000000780010020000000078001000800000007800100010000000780010080000000EC7F07800100000000006A7F57800100098001005401000000000000098001000000000000000000A87F0880010060010000000000000880010000000000000000000180088001000100000000000000000000000000000008800100000000000000000009800100010000000000000009800100010000000000000018800100070000000000000001800C8001000700000000000000726573756C74306980010018800100070000000000000001800C8001000700000000000000726573756C7430118001000780010001000000018007800100010000000000000000000000EE7F118001000780010001000000018007800100010000000000000001000000EE7F4C8001000A80010000B37F967F018089800100020000000000000088800100078001000000000001808680010000000000000000000180078001000000000000000000777F07800100000000000180A58001000000000000000000078001000000000001801880010000000000000000000A80010000018087800100000000000000000001808780010000000000000000000180078001000000000000000000018007800100000000000000000001800780010000000000000000000180188001000000000000000000767F88800100078001000000000001808680010000000000000000000180078001000000000000000000777F07800100000000000180A58001000000000000000000078001000000000001801880010000000000000000000A80010001018087800100000000000000000001808780010000000000000000000180078001000000000000000000018007800100000000000000000001800780010000000000000000000180188001000000000000000000767F098001000000000000000000098001000000000000000000098001000000000000000000098001000000000000000000A080010001800C8001000101000000000000506172616D657465725479706500506F696E745769736500506172616D657465725375625479706500506F696E745769736545787072657373696F6E004E62496E70757441726773003200496E70757441726773005B2261726730222C202261726731225D004E624F7574707574566172730031004F757470757456617273005B2276617230225D004E62506172616D73003000506172616D73005B5D004E624C69746572616C730030004C69746572616C73005B5D004E624F7065726174696F6E730031004F7065726174696F6E73005B226175746F20636F6E73742076617230203D20707767656E3A3A69506C757328617267302C2061726731293B225D000180098001000200000000000000000000000000000001010000000000005F7F01808C800100010000000000000001808A80010003000000000000000180EA8001000300000000000000078001000100000001800980010001000000000000000100000000000000157F078001000100000001800980010001000000000000000100000000000000157F078001000100000001800980010001000000000000000100000000000000157F757F0180EA8001000300000000000000078001000100000001800980010001000000000000000100000000000000157F078001000100000001800980010001000000000000000100000000000000157F078001000100000001800980010001000000000000000100000000000000157F757F0180EA800100030000000000000007800100FFFFFFFF157F07800100FFFFFFFF157F07800100FFFFFFFF157F757F01808B80010003000000000000000A800100000180018001000300000000000000018007800100000000000000000001800780010000000000000000000180078001000000000000000000747F0A800100000180018001000300000000000000018007800100000000000000000001800780010000000000000000000180078001000000000000000000747F0A800100000180018001000300000000000000018007800100000000000000000001800780010000000000000000000180078001000000000000000000747F737F0180078001000200000000000000000000000100000001800780010001000000000000000100000094800100078001000000000001800180010001000000000000000180C180010000000000000000000180D580010000000000000000000180D68001000000000000000000078001000000000001800180010000000000000000000180018001000000000000000000068001008F8B8D8B000000000000998B8D8B"> : tensor<16852xi8> + executor.global @tensorrt_cluster_exec_ctx constant : !executor.opaque<"trtrt_context"> + func.func @main(%arg0: !executor.table, !executor.ptr, i64, i64, i64>, %arg1: !executor.table, !executor.ptr, i64, i64, i64>) -> !executor.table, !executor.ptr, i64, i64, i64> attributes {executor.function_metadata = #executor.func_meta<[memref<1xf32, #executor.memory_type>, memref<1xf32, #executor.memory_type>], [memref<1xf32, #executor.memory_type>], num_output_args = 0>} { + %c0_i64 = executor.constant 0 : i64 + %c1_i64 = executor.constant 1 : i64 + %c2_i64 = executor.constant 2 : i64 + %c3_i64 = executor.constant 3 : i64 + %c4_i64 = executor.constant 4 : i64 + %c40_i64 = executor.constant 40 : i64 + %0 = executor.get_global @tensorrt_cluster_exec_ctx : !executor.opaque<"trtrt_context"> + %1 = executor.get_global @stream0 : !executor.ptr + %2 = executor.table.get %arg0[1] : , !executor.ptr, i64, i64, i64> + %3 = executor.table.get %arg1[1] : , !executor.ptr, i64, i64, i64> + %4 = executor.call @executor_alloc(%c40_i64, %c4_i64) : (i64, i64) -> !executor.ptr + executor.call @_store_i64(%4, %c1_i64, %c1_i64) : (!executor.ptr, i64, i64) -> () + %5 = executor.table.create(%2, %c0_i64, %c1_i64, %c1_i64, %c1_i64, %3, %c0_i64, %c1_i64, %c1_i64, %c1_i64 : !executor.ptr, i64, i64, i64, i64, !executor.ptr, i64, i64, i64, i64) : !executor.table, i64, i64, i64, i64, !executor.ptr, i64, i64, i64, i64> + executor.call @_trtrt_alloc_enqueue(%0, %1, %4, %5) : (!executor.opaque<"trtrt_context">, !executor.ptr, !executor.ptr, !executor.table, i64, i64, i64, i64, !executor.ptr, i64, i64, i64, i64>) -> () + %7 = executor.call @_load_i64(%4, %c2_i64) : (!executor.ptr, i64) -> i64 + %8 = executor.call @_inttoptr_i64_i64(%7) : (i64) -> !executor.ptr + %9 = executor.call @_load_i64(%4, %c3_i64) : (!executor.ptr, i64) -> i64 + %10 = executor.call @_load_i64(%4, %c4_i64) : (!executor.ptr, i64) -> i64 + %11 = executor.table.create(%8, %8, %c0_i64, %9, %10 : !executor.ptr, !executor.ptr, i64, i64, i64) : , !executor.ptr, i64, i64, i64> + executor.call @_dealloc(%4) : (!executor.ptr) -> () + return %11 : !executor.table, !executor.ptr, i64, i64, i64> + } + func.func private @executor_init_globals() { + %c16852_i64 = executor.constant 16852 : i64 + %0 = executor.call @__cuda_stream_create() : () -> !executor.ptr + executor.set_global %0, @stream0 : !executor.ptr + %1 = executor.call @_trtrt_create_runtime() : () -> !executor.opaque<"trtrt_runtime"> + executor.set_global %1, @tensorrt_runtime : !executor.opaque<"trtrt_runtime"> + %2 = executor.load_constant_resource @tensorrt_cluster_engine_data : !executor.ptr + %3 = executor.get_global @tensorrt_runtime : !executor.opaque<"trtrt_runtime"> + %4 = executor.call @_trtrt_load(%3, %2, %c16852_i64) : (!executor.opaque<"trtrt_runtime">, !executor.ptr, i64) -> !executor.opaque<"trtrt_engine"> + %5 = executor.call @_trtrt_create_context(%4) : (!executor.opaque<"trtrt_engine">) -> !executor.opaque<"trtrt_context"> + executor.set_global %5, @tensorrt_cluster_exec_ctx : !executor.opaque<"trtrt_context"> + return + } +} +""" + + +def stablehlo_add(): + # Build/parse the main function. + with ir.Context() as context: + m = ir.Module.parse(EXECUTOR) + + # Use the compiler API to compile to executable. + client = compiler.CompilerClient(context) + opts = compiler.StableHLOToExecutableOptions( + client, + ["--tensorrt-builder-opt-level=3", "--tensorrt-strongly-typed=false"], + ) + opts.set_debug_options(False, [], "alloc_enqueue") + exe = compiler.compiler_stablehlo_to_executable(client, m.operation, opts) + + # The RuntimeClient can and should persist across multiple Executables, RuntimeSessions, etc. + # It is primarily an interface for creating and manipulating buffers. + client = runtime.RuntimeClient() + stream = client.create_stream() + devices = client.get_devices() + + import pdb + + pdb.set_trace() + + if len(devices) == 0: + return + + session_options = runtime.RuntimeSessionOptions(num_devices=1, device_id=0) + session = runtime.RuntimeSession(session_options, exe) + + arg0 = client.create_memref( + np.array([1.0], dtype=np.float32).data, + device=devices[0], + stream=stream, + ) + arg1 = client.create_memref( + np.array([2.0], dtype=np.float32).data, + device=devices[0], + stream=stream, + ) + + result = session.execute_function_with_result( + client, "main", in_args=[arg0, arg1], stream=stream + ) + + data = np.asarray(client.copy_to_host(result, stream=stream)) + stream.sync() + + print(data) + + +if __name__ == "__main__": + stablehlo_add()