Skip to content

Commit

Permalink
Allow ULTs to work with enabled TimestampPacketWrite
Browse files Browse the repository at this point in the history
Change-Id: Idd4622469220b859e8724d9179837c685377ce52
  • Loading branch information
BartoszDunajski authored and Compute-Runtime-Automation committed Nov 7, 2018
1 parent b0acc5e commit 1e0064f
Show file tree
Hide file tree
Showing 15 changed files with 74 additions and 36 deletions.
4 changes: 3 additions & 1 deletion runtime/command_queue/enqueue_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) {
auto waitlistEvent = castToObjectOrAbort<Event>(eventsRequest.eventWaitList[i]);
eventBuilder.getEvent()->addTimestampPacketNodes(*waitlistEvent->getTimestampPacketNodes());
if (!waitlistEvent->isUserEvent()) {
eventBuilder.getEvent()->addTimestampPacketNodes(*waitlistEvent->getTimestampPacketNodes());
}
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions runtime/command_queue/hardware_interface.inl
Original file line number Diff line number Diff line change
Expand Up @@ -183,15 +183,15 @@ void HardwareInterface<GfxFamily>::dispatchWalker(

dispatchWorkarounds(commandStream, commandQueue, kernel, true);

if (currentTimestampPacketNodes) {
if (currentTimestampPacketNodes && commandQueue.getDevice().getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex)->tag;
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, nullptr, timestampPacket, TimestampPacket::WriteOperationType::BeforeWalker);
}

// Program the walker. Invokes execution so all state should already be programmed
auto walkerCmd = allocateWalkerSpace(*commandStream, kernel);

if (currentTimestampPacketNodes) {
if (currentTimestampPacketNodes && commandQueue.getDevice().getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex)->tag;
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, walkerCmd, timestampPacket, TimestampPacket::WriteOperationType::AfterWalker);
}
Expand Down
11 changes: 1 addition & 10 deletions unit_tests/command_queue/command_queue_hw_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -935,11 +935,6 @@ HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenNoBlockedThenKern
cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, status);
EXPECT_EQ(1u, mockKernel->makeResidentCalls);

std::map<GraphicsAllocation *, uint32_t>::iterator it = csr.makeResidentAllocations.begin();
for (; it != csr.makeResidentAllocations.end(); it++) {
EXPECT_EQ(1u, it->second);
}
}

HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenBlockedThenKernelGetResidencyCalledOnce) {
Expand All @@ -963,17 +958,13 @@ HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenBlockedThenKernel
EXPECT_EQ(1u, mockKernel->getResidencyCalls);

userEvent.setStatus(CL_COMPLETE);

std::map<GraphicsAllocation *, uint32_t>::iterator it = csr.makeResidentAllocations.begin();
for (; it != csr.makeResidentAllocations.end(); it++) {
EXPECT_EQ(1u, it->second);
}
}

HWTEST_F(CommandQueueHwTest, givenKernelSplitEnqueueReadBufferWhenBlockedThenEnqueueSurfacesMakeResidentIsCalledOnce) {
UserEvent userEvent(context);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true;
csr.timestampPacketWriteEnabled = false;

BufferDefaults::context = context;
std::unique_ptr<Buffer> buffer(BufferHelper<>::create());
Expand Down
4 changes: 2 additions & 2 deletions unit_tests/command_queue/enqueue_copy_buffer_event_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ TEST_F(EnqueueCopyBuffer, eventReturnedShouldBeMaxOfInputEventsAndCmdQPlus1) {

uint32_t taskLevelEvent1 = 8;
uint32_t taskLevelEvent2 = 19;
Event event1(nullptr, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);

cl_event eventWaitList[] = {
&event1,
Expand Down
4 changes: 2 additions & 2 deletions unit_tests/command_queue/enqueue_fill_buffer_event_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ HWTEST_F(FillBufferEventTests, eventReturnedShouldBeMaxOfInputEventsAndCmdQPlus1

uint32_t taskLevelEvent1 = 8;
uint32_t taskLevelEvent2 = 19;
Event event1(nullptr, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 15);
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 16);
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 15);
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 16);

float pattern[] = {1.0f};
size_t patternSize = sizeof(pattern);
Expand Down
10 changes: 6 additions & 4 deletions unit_tests/command_queue/enqueue_handler_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,19 +261,21 @@ HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenAddPatchInfoCommentsForAUBDu

HWTEST_F(EnqueueHandlerTest, givenExternallySynchronizedParentEventWhenRequestingEnqueueWithoutGpuSubmissionThenTaskCountIsNotInherited) {
struct ExternallySynchEvent : Event {
ExternallySynchEvent() : Event(nullptr, CL_COMMAND_MARKER, 0, 0) {
ExternallySynchEvent(CommandQueue *cmdQueue) : Event(cmdQueue, CL_COMMAND_MARKER, 0, 0) {
transitionExecutionStatus(CL_COMPLETE);
this->updateTaskCount(7);
}
bool isExternallySynchronized() const override {
return true;
}
};
ExternallySynchEvent synchEvent;

auto mockCmdQ = new MockCommandQueueHw<FamilyType>(context, pDevice, 0);

ExternallySynchEvent synchEvent(mockCmdQ);
cl_event inEv = &synchEvent;
cl_event outEv = nullptr;

auto mockCmdQ = new MockCommandQueueHw<FamilyType>(context, pDevice, 0);
bool blocking = false;
MultiDispatchInfo emptyDispatchInfo;
mockCmdQ->template enqueueHandler<CL_COMMAND_MARKER>(nullptr,
Expand Down Expand Up @@ -356,4 +358,4 @@ HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenCommandIsBlokingThenCom
nullptr);
EXPECT_EQ(mockCsr->waitForTaskCountRequiredTaskCount, mockCmdQ->completionStampTaskCount);
mockCmdQ->release();
}
}
4 changes: 2 additions & 2 deletions unit_tests/command_queue/enqueue_kernel_event_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ TEST_F(EventTests, eventReturnedShouldBeMaxOfInputEventsAndCmdQPlus1) {

uint32_t taskLevelEvent1 = 8;
uint32_t taskLevelEvent2 = 19;
Event event1(nullptr, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 15);
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 16);
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 15);
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 16);

cl_event eventWaitList[] =
{
Expand Down
4 changes: 3 additions & 1 deletion unit_tests/command_queue/enqueue_kernel_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1004,9 +1004,10 @@ HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenEnqueueK

//Two more surfaces from preemptionAllocation and SipKernel
size_t csrSurfaceCount = (pDevice->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0;
size_t timestampPacketSurfacesCount = mockCsr->peekTimestampPacketWriteEnabled() ? 1 : 0;

EXPECT_EQ(0, mockCsr->flushCalledCount);
EXPECT_EQ(5u + csrSurfaceCount, cmdBuffer->surfaces.size());
EXPECT_EQ(5u + csrSurfaceCount + timestampPacketSurfacesCount, cmdBuffer->surfaces.size());
}

HWTEST_F(EnqueueKernelTest, givenReducedAddressSpaceGraphicsAllocationForHostPtrWithL3FlushRequiredWhenEnqueueKernelIsCalledThenFlushIsCalledForReducedAddressSpacePlatforms) {
Expand Down Expand Up @@ -1455,6 +1456,7 @@ HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelReturningEv

auto mockCsr = new MockCsrHw2<FamilyType>(pDevice->getHardwareInfo(), *pDevice->executionEnvironment);
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->timestampPacketWriteEnabled = false;
pDevice->resetCommandStreamReceiver(mockCsr);

auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
Expand Down
2 changes: 2 additions & 0 deletions unit_tests/command_queue/enqueue_kernel_two_ooq_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TwoOOQsTwoDependentWalkers, shouldHaveOneVFEState) {
HWTEST_F(TwoOOQsTwoDependentWalkers, shouldHaveAPipecontrolBetweenWalkers) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;

pDevice->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;

parseWalkers<FamilyType>();
auto itorCmd = find<PIPE_CONTROL *>(itorWalker1, itorWalker2);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@ HWCMDTEST_F(IGFX_GEN8_CORE, IOQWithTwoWalkers, shouldHaveOneVFEState) {
}

HWTEST_F(IOQWithTwoWalkers, shouldHaveAPipecontrolBetweenWalkers2) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.timestampPacketWriteEnabled = false;

enqueueTwoKernels<FamilyType>();
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();

typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;

Expand Down
6 changes: 3 additions & 3 deletions unit_tests/command_queue/enqueue_read_buffer_event_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ TEST_F(EnqueueReadBuffer, eventReturnedShouldBeMaxOfInputEventsAndCmdQPlus1) {

uint32_t taskLevelEvent1 = 8;
uint32_t taskLevelEvent2 = 19;
Event event1(nullptr, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);

cl_bool blockingRead = CL_TRUE;
size_t offset = 0;
Expand Down Expand Up @@ -255,4 +255,4 @@ TEST_F(EnqueueReadBuffer, givenOutOfOrderQueueAndDisabledSupportCpuCopiesAndDstP
EXPECT_EQ(19u, pCmdOOQ->taskLevel);

pEvent->release();
}
}
6 changes: 3 additions & 3 deletions unit_tests/command_queue/enqueue_write_buffer_event_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ TEST_F(EnqueueWriteBufferTypeTest, eventReturnedShouldBeMaxOfInputEventsAndCmdQP

uint32_t taskLevelEvent1 = 8;
uint32_t taskLevelEvent2 = 19;
Event event1(nullptr, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);

cl_bool blockingWrite = CL_TRUE;
size_t offset = 0;
Expand Down Expand Up @@ -256,4 +256,4 @@ TEST_F(EnqueueWriteBufferTypeTest, givenOutOfOrderQueueAndDisabledSupportCpuCopi
EXPECT_EQ(19u, pCmdOOQ->taskLevel);

pEvent->release();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenconfigureCSRtoNonDirtyStateWh
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTaskIsSubmittedViaCsrThenBbEndCoversPaddingEnoughToFitMiBatchBufferStart) {
auto &mockCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
mockCsr.overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr.timestampPacketWriteEnabled = false;

configureCSRtoNonDirtyState<FamilyType>();

Expand Down Expand Up @@ -269,6 +270,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDeviceWithThreadGroupPreempti
HWTEST_F(CommandStreamReceiverFlushTaskTests, higherTaskLevelShouldSendAPipeControl) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.timestampPacketWriteEnabled = false;

configureCSRtoNonDirtyState<FamilyType>();
commandStreamReceiver.taskLevel = taskLevel / 2;

Expand Down Expand Up @@ -667,6 +670,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithOnlyEnoughMemoryForPr
pDevice->setForceWhitelistedRegs(true, &forceRegs);

auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.timestampPacketWriteEnabled = false;
// Force a PIPE_CONTROL through a taskLevel transition
taskLevel = commandStreamReceiver.peekTaskLevel() + 1;

Expand Down Expand Up @@ -698,6 +702,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithOnlyEnoughMemoryForPr
pDevice->setForceWhitelistedRegs(true, &forceRegs);

auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.timestampPacketWriteEnabled = false;
// Force a PIPE_CONTROL through a taskLevel transition
taskLevel = commandStreamReceiver.peekTaskLevel() + 1;
commandStreamReceiver.lastSentCoherencyRequest = 0;
Expand Down Expand Up @@ -963,7 +968,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelRequiringDCFlush
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
MockContext ctx(pDevice);
CommandQueueHw<FamilyType> commandQueue(&ctx, pDevice, 0);
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.timestampPacketWriteEnabled = false;
cl_event blockingEvent;
MockEvent<UserEvent> mockEvent(&ctx);
blockingEvent = &mockEvent;
Expand Down Expand Up @@ -1002,7 +1008,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelNotRequiringDCFl
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
MockContext ctx(pDevice);
CommandQueueHw<FamilyType> commandQueue(&ctx, pDevice, 0);
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.timestampPacketWriteEnabled = false;
cl_event blockingEvent;
MockEvent<UserEvent> mockEvent(&ctx);
blockingEvent = &mockEvent;
Expand Down Expand Up @@ -2855,6 +2862,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests,

auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0], *pDevice->executionEnvironment);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->timestampPacketWriteEnabled = false;

mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);

Expand Down
33 changes: 31 additions & 2 deletions unit_tests/helpers/timestamp_packet_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWhenDispat

auto &cmdStream = mockCmdQ->getCS(0);

device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
HardwareInterface<FamilyType>::dispatchWalker(
*mockCmdQ,
multiDispatchInfo,
Expand Down Expand Up @@ -305,6 +306,33 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWhenDispat
EXPECT_EQ(2u, walkersFound);
}

HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketDisabledWhenDispatchingGpuWalkerThenDontAddPipeControls) {
MockTimestampPacketContainer timestampPacket(device->getMemoryManager(), 1);
MockMultiDispatchInfo multiDispatchInfo(kernel->mockKernel);
auto &cmdStream = mockCmdQ->getCS(0);

device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;

HardwareInterface<FamilyType>::dispatchWalker(
*mockCmdQ,
multiDispatchInfo,
0,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
&timestampPacket,
device->getPreemptionMode(),
false);

HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0);

auto cmdItor = find<typename FamilyType::PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
EXPECT_EQ(hwParser.cmdList.end(), cmdItor);
}

HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenObtainNewStampAndPassToEvent) {
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
auto mockMemoryManager = new MockMemoryManager(*device->getExecutionEnvironment());
Expand Down Expand Up @@ -812,11 +840,12 @@ HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingWithoutK
event0.addTimestampPacketNodes(node1);
Event event1(&cmdQ, 0, 0, 0);
event1.addTimestampPacketNodes(node2);
UserEvent userEvent;

cl_event waitlist[] = {&event0, &event1};
cl_event waitlist[] = {&event0, &event1, &userEvent};

cl_event clOutEvent;
cmdQ.enqueueMarkerWithWaitList(2, waitlist, &clOutEvent);
cmdQ.enqueueMarkerWithWaitList(3, waitlist, &clOutEvent);

auto outEvent = castToObject<Event>(clOutEvent);

Expand Down
2 changes: 1 addition & 1 deletion unit_tests/kernel/kernel_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1642,7 +1642,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenKernelIsaIs
EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size());
pKernel->makeResident(pDevice->getCommandStreamReceiver());
EXPECT_EQ(1u, commandStreamReceiver.makeResidentAllocations.size());
EXPECT_EQ(commandStreamReceiver.makeResidentAllocations.begin()->first, pKernel->getKernelInfo().getGraphicsAllocation());
EXPECT_TRUE(commandStreamReceiver.isMadeResident(pKernel->getKernelInfo().getGraphicsAllocation()));

memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation);
}
Expand Down

0 comments on commit 1e0064f

Please sign in to comment.