Skip to content

Commit

Permalink
Add IOutputAllocator
Browse files Browse the repository at this point in the history
  • Loading branch information
jhalakpatel committed Aug 23, 2024
1 parent 4d377a1 commit fc7ca69
Show file tree
Hide file tree
Showing 15 changed files with 768 additions and 68 deletions.
70 changes: 53 additions & 17 deletions mlir-tensorrt/executor/include/mlir-executor-c/Runtime/Runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,50 @@ static inline bool mtrtDeviceIsNull(MTRT_Device device) { return !device.ptr; }
/// arguments are optional in functions below.
static inline MTRT_Device mtrtDeviceGetNull() { return MTRT_Device{nullptr}; }

//===----------------------------------------------------------------------===//
// MTRT_GpuAllocator
//===----------------------------------------------------------------------===//

// Function pointer types for the allocate and deallocate callbacks.
typedef void *(*AllocateFunc)(void *self, uint64_t size, uint64_t alignment, uint32_t flags, cudaStream_t* stream);
typedef bool (*DeallocateFunc)(void *self, void *memory, cudaStream_t* stream);

typedef struct MTRT_GpuAllocator {
void *ptr; // Pointer to the implementation (PyGpuAllocatorTrampoline in our
// case.)
// Function pointers to methods.
AllocateFunc allocate;
DeallocateFunc deallocate;
} MTRT_GpuAllocator;

//===----------------------------------------------------------------------===//
// MTRT_OutputAllocator
//===----------------------------------------------------------------------===//

// Function pointer types for the allocate and deallocate callbacks.
typedef void (*SetGpuAllocator)(void *self, MTRT_GpuAllocator gpuAllocator);
typedef void (*SetTensorName)(void *self, const char *tensorName);
typedef void (*SetCurrentMemory)(void *self, void *currentMemory);
typedef void (*SetOutputSize)(void *self, const int64_t outputSize);
typedef void *(*ReallocateOutputAsync)(void *self, char const *tensorName,
void *currentMemory, uint64_t size,
uint64_t alignment,
cudaStream_t *stream);
typedef void (*NotifyShape)(void *self, char const *tensorName, const int64_t *dims,
int64_t nbDims);

typedef struct MTRT_OutputAllocator {
void *ptr; // Pointer to the implementation (PyOutputAllocatorTrampoline in
// our case.)
// Function pointers to methods.
SetGpuAllocator setGpuAllocator;
SetTensorName setTensorName;
SetCurrentMemory setCurrentMemory;
SetOutputSize setOutputSize;
ReallocateOutputAsync reallocateOutputAsync;
NotifyShape notifyShape;
} MTRT_OutputAllocator;

//===----------------------------------------------------------------------===//
// MTRT_MemRefValue
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -172,6 +216,9 @@ typedef struct MTRT_MemRefValueInfo {
MLIR_CAPI_EXPORTED MTRT_Status
mtrtMemRefValueGetInfo(MTRT_MemRefValue memref, MTRT_MemRefValueInfo *info);

MLIR_CAPI_EXPORTED MTRT_Status mtrtMemRefValueSetOutputAllocator(
MTRT_MemRefValue memrefValue, MTRT_OutputAllocator pyOutputAllocator);

/// Create DL Managed tensor from MemRefValue.
MLIR_CAPI_EXPORTED MTRT_Status mtrtMemRefValueGetDLPackManagedTensor(
MTRT_MemRefValue memrefValue, MTRT_DLPackManagedTensor *outTensor);
Expand Down Expand Up @@ -321,21 +368,6 @@ mtrtScalarValueCastToRuntimeValue(MTRT_ScalarValue v);
MLIR_CAPI_EXPORTED MTRT_Status
mtrtScalarValueGetType(MTRT_ScalarValue scalar, MTRT_ScalarTypeCode *code);

//===----------------------------------------------------------------------===//
// MTRT_GpuAllocator
//===----------------------------------------------------------------------===//

// Function pointer types for the allocate and deallocate callbacks.
typedef void *(*AllocateFunc)(void *self, uint64_t size, uint64_t alignment, uint32_t flags, cudaStream_t* stream);
typedef bool (*DeallocateFunc)(void *self, void *memory, cudaStream_t* stream);

typedef struct MTRT_GpuAllocator {
void *ptr; // Pointer to the implementation (PyGpuAllocatorTrampoline in our
// case.)
AllocateFunc allocate; // Function pointer for allocation
DeallocateFunc deallocate; // Function pointer for deallocation
} MTRT_GpuAllocator;

//===----------------------------------------------------------------------===//
// MTRT_RuntimeSessionOptions
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -376,8 +408,8 @@ typedef struct MTRT_RuntimeSession {
/// that the session only has a read-only view in to the Executable for code and
/// constant data. Therefore the Executable must outlive the RuntimeSession.
MLIR_CAPI_EXPORTED MTRT_Status mtrtRuntimeSessionCreate(
MTRT_RuntimeSessionOptions options, MTRT_Executable executable, MTRT_GpuAllocator allocator,
MTRT_RuntimeSession *result);
MTRT_RuntimeSessionOptions options, MTRT_Executable executable,
MTRT_GpuAllocator allocator, MTRT_RuntimeSession *result);

/// Destory the session. This does not destroy the associated Executable, which
/// may be shared among many sessions.
Expand All @@ -389,6 +421,10 @@ static inline bool mtrtRuntimeSessionIsNull(MTRT_RuntimeSession session) {
return !session.ptr;
}

MLIR_CAPI_EXPORTED MTRT_Status mtrtAddMemRefOutputAllocatorSessionRegistry(
MTRT_MemRefValue memrefValue,
MTRT_OutputAllocator pyOutputAllocator);

/// Using `session`, execute the pubic function with the specified name.
/// The `inArgs` and `outArgs` are arrays for input arguments and destination
/// arguments, respectively. Input arguments may be MemRefs or scalars, but
Expand Down
22 changes: 22 additions & 0 deletions mlir-tensorrt/executor/include/mlir-executor/Runtime/API/API.h
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,12 @@ class MemRefValue : public RuntimeValue {
return v->getKind() == Kind::MemRef;
}

void setOutputAllocator(OutputAllocator* _outputAllocator) {
outputAllocator = _outputAllocator;
}

OutputAllocator *getOutputAllocator() { return outputAllocator; }

const std::optional<ScalarType> &getScalarType() const { return scalarType; }

RuntimeClient *getClient() { return client; }
Expand All @@ -691,6 +697,7 @@ class MemRefValue : public RuntimeValue {
/// address.
std::optional<const Device *> device;
std::optional<ScalarType> scalarType{};
OutputAllocator *outputAllocator{nullptr};
};

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -868,6 +875,7 @@ class RuntimeSession {
std::unique_ptr<PinnedMemoryAllocator> pinnedMemoryAllocator,
std::unique_ptr<AllocTracker> allocTracker,
std::unique_ptr<ResourceTracker> resourceTracker,
std::unique_ptr<OutputAllocatorTracker> outputAllocatorTracker,
std::unique_ptr<GpuAllocator> gpuAllocator);

ExecutableView getExecutable() const { return executable; }
Expand All @@ -882,6 +890,10 @@ class RuntimeSession {

ResourceTracker &getResourceTracker() { return *resourceTracker; }

OutputAllocatorTracker &getOutputAllocatorTracker() {
return *outputAllocatorTracker;
}

GpuAllocator &getGpuAllocator() { return *gpuAllocator; }

private:
Expand All @@ -891,6 +903,7 @@ class RuntimeSession {
std::unique_ptr<PinnedMemoryAllocator> pinnedMemoryAllocator;
std::unique_ptr<AllocTracker> allocTracker;
std::unique_ptr<ResourceTracker> resourceTracker;
std::unique_ptr<OutputAllocatorTracker> outputAllocatorTracker;
std::unique_ptr<GpuAllocator> gpuAllocator;
sol::state state;
};
Expand Down Expand Up @@ -973,6 +986,14 @@ class RuntimeClient {
return pinnedMemoryAllocator;
}

void addOutputAllocator(std::unique_ptr<OutputAllocator> outputAllocator) {
outputAllocators.emplace_back(std::move(outputAllocator));
}

OutputAllocator* getLastOutputAllocator() {
return outputAllocators.back().get();
}

private:
RuntimeClient(llvm::SmallVector<std::unique_ptr<Device>> devices)
: devices(std::move(devices)) {}
Expand All @@ -981,6 +1002,7 @@ class RuntimeClient {
PinnedMemoryAllocator pinnedMemoryAllocator;
AllocTracker allocTracker;
ResourceTracker resourceTracker;
std::vector<std::unique_ptr<OutputAllocator>> outputAllocators;
};

//===----------------------------------------------------------------------===//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ void registerLuaRuntimeMethods(lua_State *state,
const RuntimeSessionOptions &options,
PinnedMemoryAllocator *pinnedMemoryAllocator,
AllocTracker *allocTracker,
ResourceTracker *resourceTracker, GpuAllocator* allocator);
ResourceTracker *resourceTracker,
OutputAllocatorTracker *outputAllocatorTracker,
GpuAllocator *allocator);

} // namespace mlirtrt::runtime
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ namespace mlirtrt::runtime {
/// `main` function. It is assumed that `main` takes no arguments and returns an
/// integer result (which is returned if the execution is successful).
/// TODO: this should take a handle to a function for streaming output/errors.
StatusOr<int64_t> runExecutorLuaScript(std::string_view luaScript, GpuAllocator* allocator);
StatusOr<int64_t> runExecutorLuaScript(std::string_view luaScript,
GpuAllocator *allocator);

/// Synchronously run a serialized executor Executable one time. An `Executable`
/// is essentially a Lua script packaged with metadata and serialized constants
Expand All @@ -48,12 +49,15 @@ StatusOr<int64_t> runExecutorLuaScript(std::string_view luaScript, GpuAllocator*
/// execution is successful).
/// TODO: this should take a handle to a function for
/// streaming output/errors.
StatusOr<int64_t> runExecutorExecutable(std::unique_ptr<Executable> executable, std::unique_ptr<GpuAllocator> allocator);
StatusOr<int64_t>
runExecutorExecutable(std::unique_ptr<Executable> executable,
std::unique_ptr<GpuAllocator> allocator);

/// Create an execution state. This will setup a Lua environment and invoke
/// global initialization.
StatusOr<std::unique_ptr<RuntimeSession>>
createRuntimeSessionWithLuaBackend(ExecutableView executable, std::unique_ptr<GpuAllocator> allocator,
createRuntimeSessionWithLuaBackend(ExecutableView executable,
std::unique_ptr<GpuAllocator> allocator,
const RuntimeSessionOptions &options);

/// Set the primary stream for the loaded executable to use.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ class ResourceTracker;
/// Lua state.
void registerExecutorTensorRTModuleLuaRuntimeMethods(
lua_State *luaState, PinnedMemoryAllocator *pinnedMemoryAllocator,
AllocTracker *allocTracker, ResourceTracker *resourceTracker, GpuAllocator* allocator);
AllocTracker *allocTracker, ResourceTracker *resourceTracker,
OutputAllocatorTracker *outputAllocatorTracker, GpuAllocator *allocator);

} // namespace mlirtrt::runtime

Expand Down
109 changes: 109 additions & 0 deletions mlir-tensorrt/executor/include/mlir-executor/Support/Allocators.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ namespace mlirtrt {

struct EventPool;

//===----------------------------------------------------------------------===//
// GpuAllocator and CustomTensorRTAllocator
//===----------------------------------------------------------------------===//

class GpuAllocator {
public:
GpuAllocator() = default;
Expand All @@ -56,6 +60,111 @@ class CustomTensorRTAllocator : public GpuAllocator {
cudaStream_t* stream) override;
};

//===----------------------------------------------------------------------===//
// OutputAllocator and CustomTensorRTOuputAllocator
//===----------------------------------------------------------------------===//

//!
//! Class to allocate memory for outputs with data-dependent shapes. The sizes
//! of those are unknown so pre-allocation is not possible.
//!
class OutputAllocator {
public:
virtual ~OutputAllocator() = default;
virtual void setGpuAllocator(GpuAllocator* gpuAllocator) = 0;
virtual void setTensorName(const char *tensorName) = 0;
virtual void setCurrentMemory(void *currentMemory) = 0;
virtual void setOutputSize(const int64_t outputSize) = 0;
virtual void *reallocateOutputAsync(char const *tensorName,
void *currentMemory, uint64_t size,
uint64_t alignment,
cudaStream_t * /*stream*/) = 0;
virtual void notifyShape(char const *tensorName, const int64_t *dims,
int64_t nbDims) = 0;
};

class CustomTensorRTOuputAllocator : public OutputAllocator {
public:
CustomTensorRTOuputAllocator() = default;
~CustomTensorRTOuputAllocator() {
if (mOutputPtr != nullptr) {
cudaFree(mOutputPtr);
}
}

void setGpuAllocator(GpuAllocator* gpuAllocator) override {
mGpuAllocator = gpuAllocator;
}

//! Methods are called just after construction. TODO: can they be called
//! during construction?
void setTensorName(const char *tensorName) override {
mTensorName = tensorName;
}

void setCurrentMemory(void *currentMemory) override {
mCurrentMemory = currentMemory;
}

void setOutputSize(int64_t outputSize) override { mOutputSize = outputSize; }

void *reallocateOutputAsync(char const *tensorName, void *currentMemory,
uint64_t size, uint64_t alignment,
cudaStream_t * /*stream*/) override;

void notifyShape(char const *tensorName, const int64_t *dims,
int64_t nbDims) override;

//! nullptr if memory could not be allocated
void *mOutputPtr{nullptr};

//! Size of allocation pointed to by output.
uint64_t mOutputSize{0};

bool mReallocateOutputCalled{false};

bool mNotifyShapeCalled{false};

//! Dimensions of tensor.
std::vector<int64_t> mOutputDims;

private:
GpuAllocator* mGpuAllocator;
const char *mTensorName;
void *mCurrentMemory;
};

class OutputAllocatorTracker {
public:
OutputAllocatorTracker() = default;
~OutputAllocatorTracker() = default;

OutputAllocatorTracker(const OutputAllocatorTracker &) = delete;
OutputAllocatorTracker &operator=(const OutputAllocatorTracker &) = delete;
OutputAllocatorTracker(OutputAllocatorTracker &&) = default;
OutputAllocatorTracker &operator=(OutputAllocatorTracker &&) = default;

// Add a new OutputAllocator
void addAllocator(void *ptr, OutputAllocator *allocator) {
mOutputAllocatorRegistry.emplace_back(std::make_pair(ptr, allocator));
}

// Get a reference to an OutputAllocator
OutputAllocator *getAllocator(void *ptr) {
auto it = std::find_if(
mOutputAllocatorRegistry.begin(), mOutputAllocatorRegistry.end(),
[ptr](const auto &pair) { return pair.first == ptr; });

if (it != mOutputAllocatorRegistry.end()) {
return it->second;
}
return nullptr;
}

private:
std::vector<std::pair<void *, OutputAllocator *>> mOutputAllocatorRegistry;
};

//===----------------------------------------------------------------------===//
// PoolTrackedCudaEvent
//===----------------------------------------------------------------------===//
Expand Down
Loading

0 comments on commit fc7ca69

Please sign in to comment.