diff --git a/src/mem/DcacheCtrl.py b/src/mem/DcacheCtrl.py new file mode 100644 index 0000000000..23c14f0485 --- /dev/null +++ b/src/mem/DcacheCtrl.py @@ -0,0 +1,109 @@ +### The copyright needs be modified for UCD/DArchR/the names of the writers + + +# Copyright (c) 2012-2020 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2013 Amin Farmahini-Farahani +# Copyright (c) 2015 University of Kaiserslautern +# Copyright (c) 2015 The University of Bologna +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.params import * +from m5.proxy import * +from m5.objects.QoSMemCtrl import * + +# Enum for memory scheduling algorithms, currently First-Come +# First-Served and a First-Row Hit then First-Come First-Served +class MemSched(Enum): vals = ['fcfs', 'frfcfs'] + +# MemCtrl is a single-channel single-ported Memory controller model +# that aims to model the most important system-level performance +# effects of a memory controller, interfacing with media specific +# interfaces +class DcacheCtrl(QoSMemCtrl): + type = 'DcacheCtrl' + cxx_header = "mem/dcache_ctrl.hh" + + # single-ported on the system interface side, instantiate with a + # bus in front of the controller for multiple ports + port = ResponsePort("This port responds to memory requests") + + # Interface to volatile, DRAM media + dram = Param.DRAMInterface("DRAM interface") + + # Interface to non-volatile media + nvm = Param.NVMInterface("NVM interface") + + dram_cache_size = Param.MemorySize('512MiB', + "DRAM cache block size in bytes") + block_size = Param.Unsigned('64', + "DRAM cache block size in bytes") + addr_size = Param.Unsigned('64', + "Addr size of the request from outside world") + orb_max_size = Param.Unsigned(256, "Outstanding Requests Buffer size") + crb_max_size = Param.Unsigned(64, "Conflicting Requests Buffer size") + + # JASON: We need to think about this a bit + # The dram interface is a abstract memory, but we don't need the backing + # store. So, null should be true, in_addr_map should be false, + # kvm_map false, and conf_table_reported false + + # read and write buffer depths are set in the interface + # the controller will read these values when instantiated + + # threshold in percent for when to forcefully trigger writes and + # start emptying the write buffer + write_high_thresh_perc = Param.Percent(85, "Threshold to force writes") + + # threshold in percentage for when to start writes if the read + # queue is empty + write_low_thresh_perc = Param.Percent(50, "Threshold to start writes") + + # minimum write bursts to schedule before switching back to reads + min_writes_per_switch = Param.Unsigned(16, "Minimum write bursts before " + "switching to reads") + + # scheduler, address map and page policy + mem_sched_policy = Param.MemSched('frfcfs', "Memory scheduling policy") + + # pipeline latency of the controller and PHY, split into a + # frontend part and a backend part, with reads and writes serviced + # by the queues only seeing the frontend contribution, and reads + # serviced by the memory seeing the sum of the two + static_frontend_latency = Param.Latency("10ns", "Static frontend latency") + static_backend_latency = Param.Latency("10ns", "Static backend latency") + + command_window = Param.Latency("10ns", "Static backend latency") diff --git a/src/mem/SConscript b/src/mem/SConscript index cf7adc8668..cbe01368f8 100644 --- a/src/mem/SConscript +++ b/src/mem/SConscript @@ -47,9 +47,13 @@ SimObject('AbstractMemory.py') SimObject('AddrMapper.py') SimObject('Bridge.py') SimObject('MemCtrl.py') +SimObject('DcacheCtrl.py') SimObject('MemInterface.py') SimObject('DRAMInterface.py') SimObject('NVMInterface.py') +SimObject('DCMemInterface.py') +SimObject('DRAMDCInterface.py') +SimObject('NVMDCInterface.py') SimObject('ExternalMaster.py') SimObject('ExternalSlave.py') SimObject('MemObject.py') @@ -64,10 +68,13 @@ Source('addr_mapper.cc') Source('bridge.cc') Source('coherent_xbar.cc') Source('drampower.cc') +Source('dramcachepower.cc') Source('external_master.cc') Source('external_slave.cc') Source('mem_ctrl.cc') +Source('dcache_ctrl.cc') Source('mem_interface.cc') +Source('dcmem_interface.cc') Source('noncoherent_xbar.cc') Source('packet.cc') Source('port.cc') @@ -116,12 +123,14 @@ DebugFlag('Bridge') DebugFlag('CommMonitor') DebugFlag('DRAM') DebugFlag('DRAMPower') +DebugFlag('DRAMDCPower') DebugFlag('DRAMState') DebugFlag('NVM') DebugFlag('ExternalPort') DebugFlag('HtmMem', 'Hardware Transactional Memory (Mem side)') DebugFlag('LLSC') DebugFlag('MemCtrl') +DebugFlag('DcacheCtrl') DebugFlag('MMU') DebugFlag('MemoryAccess') DebugFlag('PacketQueue') diff --git a/src/mem/dcache_ctrl.cc b/src/mem/dcache_ctrl.cc new file mode 100644 index 0000000000..c8a53b4ebd --- /dev/null +++ b/src/mem/dcache_ctrl.cc @@ -0,0 +1,2932 @@ +/// The copyright needs be modified for UCD/DArchR/the names of the writers +/* + * Copyright (c) 2010-2020 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2013 Amin Farmahini-Farahani + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mem/dcache_ctrl.hh" + +#include "base/trace.hh" +#include "debug/DRAM.hh" +#include "debug/DcacheCtrl.hh" +#include "debug/Drain.hh" +#include "debug/NVM.hh" +#include "debug/QOS.hh" +#include "mem/mem_interface.hh" +#include "sim/system.hh" + +DcacheCtrl::DcacheCtrl(const DcacheCtrlParams &p) : + QoS::MemCtrl(p), + port(name() + ".port", *this), isTimingMode(false), + retry(false), + nextReqEvent([this]{ processNextReqEvent(); }, name()), + respondEvent([this]{ processRespondEvent(); }, name()), + dramReadEvent([this]{ processDramReadEvent(); }, name()), + respDramReadEvent([this]{ processRespDramReadEvent(); }, name()), + waitingToIssueNvmReadEvent([this] + { processWaitingToIssueNvmReadEvent(); }, name()), + nvmReadEvent([this]{ processNvmReadEvent(); }, name()), + respNvmReadEvent([this]{ processRespNvmReadEvent(); }, name()), + overallWriteEvent([this]{ processOverallWriteEvent(); }, name()), + dram(p.dram), nvm(p.nvm), + dramCacheSize(p.dram_cache_size), + blockSize(p.block_size), + addrSize(p.addr_size), + orbMaxSize(p.orb_max_size), orbSize(0), + crbMaxSize(p.crb_max_size), crbSize(0), + writeHighThreshold(p.write_high_thresh_perc * p.orb_max_size / 100.0), + writeLowThreshold(p.write_low_thresh_perc* p.orb_max_size / 100.0), + minWritesPerSwitch(p.min_writes_per_switch), + memSchedPolicy(p.mem_sched_policy), + frontendLatency(p.static_frontend_latency), + backendLatency(p.static_backend_latency), + commandWindow(p.command_window), + nextBurstAt(0), prevArrival(0), + nextReqTime(0), + stats(*this) +{ + DPRINTF(DcacheCtrl, "Setting up controller\n"); + + pktDramRead.resize(1); + pktNvmReadWaitIssue.resize(1); + pktNvmRead.resize(1); + pktDramWrite.resize(1); + pktNvmWrite.resize(1); + + stallRds = false; + drainDramWrite = false; + drainNvmWrite = false; + + if (orbMaxSize>512) { + dramWrDrainPerc = 0.25; + } + else { + dramWrDrainPerc = 0.5; + } + // NVM Write Drain is defined by the write queue size + // defined by the NVM interface + + if (orbMaxSize == 1) { + writeHighThreshold = 1; + } + + // if (orbMaxSize == 1) { + // minWritesPerSwitch = 2; + // minDrWrPerSwitch = 1; + // minNvWrPerSwitch = 1; + // } + // else { + // minWritesPerSwitch = orbMaxSize * 0.2; + // minDrWrPerSwitch = 0.7 * minWritesPerSwitch; + // minNvWrPerSwitch = minWritesPerSwitch - minDrWrPerSwitch; + // } + + minDrWrPerSwitch = 0.7 * minWritesPerSwitch; + minNvWrPerSwitch = minWritesPerSwitch - minDrWrPerSwitch; + + drWrCounter = 0; + nvWrCounter = 0; + + // dramCacheSize = dram->dramDeviceCapacity; + // dramCacheSize = dramCacheSize*1024*1024; + + tagMetadataStore.resize(dramCacheSize/blockSize); + + // Hook up interfaces to the controller + if (dram) + dram->setCtrl(this, commandWindow); + if (nvm) + nvm->setCtrl(this, commandWindow); + + fatal_if(!dram && !nvm, "Memory controller must have an interface"); + + // perform a basic check of the write thresholds + if (p.write_low_thresh_perc >= p.write_high_thresh_perc) + fatal("Write buffer low threshold %d must be smaller than the " + "high threshold %d\n", p.write_low_thresh_perc, + p.write_high_thresh_perc); +} + +void +DcacheCtrl::init() +{ + if (!port.isConnected()) { + fatal("DcacheCtrl %s is unconnected!\n", name()); + } else { + port.sendRangeChange(); + } +} + +void +DcacheCtrl::startup() +{ + // remember the memory system mode of operation + isTimingMode = system()->isTimingMode(); + + if (isTimingMode) { + // shift the bus busy time sufficiently far ahead that we never + // have to worry about negative values when computing the time for + // the next request, this will add an insignificant bubble at the + // start of simulation + nextBurstAt = curTick() + (dram ? dram->commandOffset() : + nvm->commandOffset()); + } +} + +Tick +DcacheCtrl::recvAtomic(PacketPtr pkt) +{ + DPRINTF(DcacheCtrl, "recvAtomic: %s 0x%x\n", + pkt->cmdString(), pkt->getAddr()); + + panic_if(pkt->cacheResponding(), "Should not see packets where cache " + "is responding"); + + Tick latency = 0; + // do the actual memory access and turn the packet into a response + if (dram && dram->getAddrRange().contains(pkt->getAddr())) { + dram->access(pkt); + + if (pkt->hasData()) { + // this value is not supposed to be accurate, just enough to + // keep things going, mimic a closed page + latency = dram->accessLatency(); + } + } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) { + nvm->access(pkt); + + if (pkt->hasData()) { + // this value is not supposed to be accurate, just enough to + // keep things going, mimic a closed page + latency = nvm->accessLatency(); + } + } else { + panic("Can't handle address range for packet %s\n", + pkt->print()); + } + + return latency; +} + +Tick +DcacheCtrl::recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor) +{ + Tick latency = recvAtomic(pkt); + if (dram) { + dram->getBackdoor(backdoor); + } else if (nvm) { + nvm->getBackdoor(backdoor); + } + return latency; +} + +Addr +DcacheCtrl::returnTagDC(Addr request_addr, unsigned size) +{ + int index_bits = ceilLog2(dramCacheSize/blockSize); + int block_bits = ceilLog2(size); + return bits(request_addr, addrSize-1, (index_bits+block_bits)); +} + +Addr +DcacheCtrl::returnIndexDC(Addr request_addr, unsigned size) +{ + return bits(request_addr, ceilLog2(size) + + ceilLog2(dramCacheSize/blockSize)-1, ceilLog2(size)); +} + +void +DcacheCtrl::checkHitOrMiss(reqBufferEntry* orbEntry) +{ + // access the tagMetadataStore data structure to + // check if it's hit or miss + orbEntry->isHit = + tagMetadataStore.at(orbEntry->indexDC).validLine && + (orbEntry->tagDC == tagMetadataStore.at(orbEntry->indexDC).tagDC); + + if (!tagMetadataStore.at(orbEntry->indexDC).validLine && + !orbEntry->isHit) { + stats.numColdMisses++; + } + else if (tagMetadataStore.at(orbEntry->indexDC).validLine && + !orbEntry->isHit) { + stats.numHotMisses++; + } + + // always hit + // orbEntry->isHit = true; + + // always miss + // orbEntry->isHit = false; +} + +bool +DcacheCtrl::checkDirty(Addr addr) +{ + Addr index = returnIndexDC(addr, blockSize); + return (tagMetadataStore.at(index).validLine && + tagMetadataStore.at(index).dirtyLine); + + + // always dirty + //return true; + + // always clean + //return false; +} + +void +DcacheCtrl::handleDirtyCacheLine(reqBufferEntry* orbEntry) +{ + assert(orbEntry->dirtyLineAddr != -1); + + MemPacket* wbDccPkt = nvm->decodePacket(nullptr, + orbEntry->dirtyLineAddr, + orbEntry->owPkt->getSize(), + false, false); + + nvm->setupRank(wbDccPkt->rank, false); + + pktNvmWrite[0].push_back(wbDccPkt); + + if (pktNvmWrite[0].size() >= nvm->getMaxPendingWrites()) { + stallRds = true; + drainNvmWrite = true; + if (!overallWriteEvent.scheduled()) { + schedule(overallWriteEvent, std::max(nextReqTime, curTick())); + } + } + + unsigned rdsNum = pktDramRead[0].size() + + pktNvmReadWaitIssue[0].size() + + pktNvmRead[0].size(); + unsigned wrsNum = pktNvmWrite[0].size() + + pktDramWrite[0].size(); + + if ((rdsNum == 0 && wrsNum != 0) || + (wrsNum >= writeHighThreshold)) { + stallRds = true; + if (!overallWriteEvent.scheduled()) { + schedule(overallWriteEvent, std::max(nextReqTime, curTick())); + } + } + + if (pktNvmWrite[0].size() > maxNvWrEv) { + maxNvWrEv = pktNvmWrite[0].size(); + stats.maxNvWrEvQ = pktNvmWrite[0].size(); + } + + stats.numWrBacks++; + + // no need to call nvm->access for the dirty line. + // Because, we already have written it in nvm, while + // we were processing it into dram cache. +} + +void +DcacheCtrl::handleRequestorPkt(PacketPtr pkt) +{ + // Set is_read and is_dram to + // "true", to do initial dram Read + MemPacket* dcc_pkt = dram->decodePacket(pkt, + pkt->getAddr(), + pkt->getSize(), + true, + true); + + // pass the second argument "true", for + // initial DRAM Read for all the received packets + dram->setupRank(dcc_pkt->rank, true); + + reqBufferEntry* entry = new reqBufferEntry( + true, curTick(), + returnTagDC(pkt->getAddr(), pkt->getSize()), + returnIndexDC(pkt->getAddr(), pkt->getSize()), + pkt, dcc_pkt, + dramRead, false, false, + -1, false, + curTick(), MaxTick, + MaxTick, MaxTick, MaxTick, + MaxTick, + MaxTick, MaxTick, MaxTick + ); + + reqBuffer.emplace(pkt->getAddr(), entry); + + if (pkt->isRead()) { + logRequest(DcacheCtrl::READ, pkt->requestorId(), pkt->qosValue(), + pkt->getAddr(), 1); + } + else { + //copying the packet + PacketPtr copyOwPkt = new Packet(pkt, false, pkt->isRead()); + + accessAndRespond(pkt, frontendLatency, false); + + reqBuffer.at(copyOwPkt->getAddr()) = new reqBufferEntry( + entry->validEntry, entry->arrivalTick, + entry->tagDC, entry->indexDC, + copyOwPkt, entry->dccPkt, + entry->state, entry->isHit, entry->conflict, + entry->dirtyLineAddr, + entry->handleDirtyLine, + entry->drRd, + entry->drWr, + entry->nvWait, + entry->nvRd, + entry->nvWr, + entry->nvmIssueReadyTime, + entry->dramRdDevTime, + entry->dramWrDevTime, + entry->nvmRdDevTime + ); + delete entry; + + entry = reqBuffer.at(copyOwPkt->getAddr()); + + logRequest(DcacheCtrl::WRITE, copyOwPkt->requestorId(), + copyOwPkt->qosValue(), + copyOwPkt->getAddr(), 1); + } + + checkHitOrMiss(entry); + + if (checkDirty(entry->owPkt->getAddr()) && !entry->isHit) { + entry->dirtyLineAddr = tagMetadataStore.at(entry->indexDC).nvmAddr; + entry->handleDirtyLine = true; + } + + // Updating Tag & Metadata + tagMetadataStore.at(entry->indexDC).tagDC = entry->tagDC; + tagMetadataStore.at(entry->indexDC).indexDC = entry->indexDC; + tagMetadataStore.at(entry->indexDC).validLine = true; + + if (entry->owPkt->isRead()) { + if (entry->isHit) { + tagMetadataStore.at(entry->indexDC).dirtyLine = + tagMetadataStore.at(entry->indexDC).dirtyLine; + } + else { + tagMetadataStore.at(entry->indexDC).dirtyLine = false; + } + } + else { + tagMetadataStore.at(entry->indexDC).dirtyLine = true; + } + + tagMetadataStore.at(entry->indexDC).nvmAddr = + entry->owPkt->getAddr(); + + if (entry->owPkt->isRead()) { + stats.readReqs++; + } + else { + stats.writeReqs++; + } +} + +bool +DcacheCtrl::checkConflictInDramCache(PacketPtr pkt) +{ + unsigned indexDC = returnIndexDC(pkt->getAddr(), pkt->getSize()); + + for (auto e = reqBuffer.begin(); e != reqBuffer.end(); ++e) { + if (indexDC == e->second->indexDC + && e->second->validEntry + //&& confReqBuffer.size() < crbMaxSize + ) { + + e->second->conflict = true; + + return true; + } + } + + return false; +} + +void +DcacheCtrl::checkConflictInCRB(reqBufferEntry* orbEntry) +{ + for (auto e = confReqBuffer.begin(); e != confReqBuffer.end(); ++e) { + + auto entry = *e; + + if (returnIndexDC(entry.second->getAddr(),entry.second->getSize()) + == orbEntry->indexDC) { + orbEntry->conflict = true; + break; + } + } +} + +void +DcacheCtrl::logStatsDcache(reqBufferEntry* orbEntry) +{ + if (orbEntry->owPkt->isRead()) { + if (orbEntry->isHit) { + assert(orbEntry->drRd != MaxTick); + assert(orbEntry->dramRdDevTime != MaxTick); + + long long int stateTick1 = (curTick() - orbEntry->drRd); + assert(stateTick1 > 0); + stats.timeInDramRead += stateTick1; + + stats.totNumPktsDrRd++; + stats.drRdDevTime += orbEntry->dramRdDevTime; + + long long int stateQT1 = stateTick1 - + (orbEntry->dramRdDevTime); + assert(stateQT1 >= 0); + stats.drRdQingTime += stateQT1; + + stats.numHits++; + stats.numRdHits++; + } + else { + assert(orbEntry->drRd != MaxTick); + assert(orbEntry->nvWait != MaxTick); + assert(orbEntry->nvRd != MaxTick); + assert(orbEntry->drWr != MaxTick); + assert(orbEntry->nvmIssueReadyTime != MaxTick); + assert(orbEntry->dramRdDevTime != MaxTick); + assert(orbEntry->nvmRdDevTime != MaxTick); + assert(orbEntry->dramWrDevTime != MaxTick); + + long long int stateTick1 = + (orbEntry->nvWait - orbEntry->drRd); + assert(stateTick1 > 0); + stats.timeInDramRead += stateTick1; + + stats.totNumPktsDrRd++; + stats.drRdDevTime += orbEntry->dramRdDevTime; + + long long int stateQT1 = stateTick1 - + (orbEntry->dramRdDevTime); + assert(stateQT1 >= 0); + stats.drRdQingTime += stateQT1; + + long long int stateTick2 = + (orbEntry->nvRd - orbEntry->nvWait); + assert(stateTick2 >= 0); + stats.timeInWaitingToIssueNvmRead += stateTick2; + + if (stateTick2 > 0) { + stats.totNumPktsNvmRdWait++; + } + + long long int stateTick3 = + (orbEntry->drWr - orbEntry->nvRd); + assert(stateTick3 > 0); + stats.timeInNvmRead += stateTick3; + + stats.totNumPktsNvmRd++; + stats.nvRdDevTime += orbEntry->nvmRdDevTime; + + long long int stateQT2 = stateTick2 + stateTick3 - + (orbEntry->nvmRdDevTime); + assert(stateQT2 >= 0); + stats.nvmRdQingTime += stateQT2; + + long long int stateTick4 = + (orbEntry->dccPkt->readyTime - curTick()); + assert(stateTick4 > 0); + stats.timeInDramWrite += stateTick4; + + stats.totNumPktsDrWr++; + stats.drWrDevTime += orbEntry->dramWrDevTime; + + long long int stateQT3 = stateTick4 - + (orbEntry->dramWrDevTime); + assert(stateQT3 >= 0); + stats.drWrQingTime += stateQT3; + + stats.numMisses++; + stats.numRdMisses++; + } + } + else { + if (orbEntry->isHit) { + assert(orbEntry->drRd != MaxTick); + assert(orbEntry->drWr != MaxTick); + assert(orbEntry->dramRdDevTime != MaxTick); + assert(orbEntry->dramWrDevTime != MaxTick); + + long long int stateTick1 = + (orbEntry->drWr - orbEntry->drRd); + assert(stateTick1 > 0); + stats.timeInDramRead += stateTick1; + + stats.totNumPktsDrRd++; + stats.drRdDevTime += orbEntry->dramRdDevTime; + + long long int stateQT1 = stateTick1 - + (orbEntry->dramRdDevTime); + assert(stateQT1 >= 0); + stats.drRdQingTime += stateQT1; + + long long int stateTick2 = + (orbEntry->dccPkt->readyTime - curTick()); + assert(stateTick2 > 0); + stats.timeInDramWrite += stateTick2; + + stats.totNumPktsDrWr++; + stats.drWrDevTime += orbEntry->dramWrDevTime; + + long long int stateQT2 = + stateTick2 - (orbEntry->dramWrDevTime); + assert(stateQT2 >= 0); + stats.drWrQingTime += stateQT2; + + stats.numHits++; + stats.numWrHits++; + } + else { + assert(orbEntry->drRd != MaxTick); + assert(orbEntry->nvWait != MaxTick); + assert(orbEntry->nvRd != MaxTick); + assert(orbEntry->drWr != MaxTick); + assert(orbEntry->nvmIssueReadyTime != MaxTick); + assert(orbEntry->dramRdDevTime != MaxTick); + assert(orbEntry->nvmRdDevTime != MaxTick); + assert(orbEntry->dramWrDevTime != MaxTick); + + long long int stateTick1 = + (orbEntry->nvWait - orbEntry->drRd); + assert(stateTick1 > 0); + stats.timeInDramRead += stateTick1; + + stats.totNumPktsDrRd++; + stats.drRdDevTime += orbEntry->dramRdDevTime; + + long long int stateQT1 = stateTick1 - + (orbEntry->dramRdDevTime); + assert(stateQT1 >= 0); + stats.drRdQingTime += stateQT1; + + long long int stateTick2 = + (orbEntry->nvRd - orbEntry->nvWait); + + assert(stateTick2 >= 0); + stats.timeInWaitingToIssueNvmRead += stateTick2; + + if (stateTick2 > 0) { + stats.totNumPktsNvmRdWait++; + } + + long long int stateTick3 = + (orbEntry->drWr - orbEntry->nvRd); + assert(stateTick3 > 0); + stats.timeInNvmRead += stateTick3; + + stats.totNumPktsNvmRd++; + stats.nvRdDevTime += orbEntry->nvmRdDevTime; + + long long int stateQT2 = stateTick2 + stateTick3 - + (orbEntry->nvmRdDevTime); + assert(stateQT2 >= 0); + stats.nvmRdQingTime += stateQT2; + + long long int stateTick4 = + (orbEntry->dccPkt->readyTime - curTick()); + assert(stateTick4 > 0); + stats.timeInDramWrite += stateTick4; + + stats.totNumPktsDrWr++; + stats.drWrDevTime += orbEntry->dramWrDevTime; + + long long int stateQT3 = stateTick4 - + (orbEntry->dramWrDevTime); + assert(stateQT3 >= 0); + stats.drWrQingTime += stateQT3; + + stats.numMisses++; + stats.numWrMisses++; + } + } +} + +bool +DcacheCtrl::resumeConflictingReq(reqBufferEntry* orbEntry) +{ + bool conflictFound = false; + + if (orbEntry->owPkt->isWrite()) { + isInWriteQueue.erase(orbEntry->owPkt->getAddr()); + } + + logStatsDcache(orbEntry); + + for (auto e = confReqBuffer.begin(); e != confReqBuffer.end(); ++e) { + + auto entry = *e; + + if (returnIndexDC(entry.second->getAddr(), entry.second->getSize()) + == orbEntry->indexDC) { + + conflictFound = true; + + Addr confAddr = entry.second->getAddr(); + + reqBuffer.erase(orbEntry->owPkt->getAddr()); + + delete orbEntry->owPkt; + + delete orbEntry->dccPkt; + + delete orbEntry; + + handleRequestorPkt(entry.second); + + reqBuffer.at(confAddr)->arrivalTick = entry.first; + + confReqBuffer.erase(e); + + checkConflictInCRB(reqBuffer.at(confAddr)); + + if (pktDramRead[0].empty() && !stallRds) { + assert(!dramReadEvent.scheduled()); + schedule(dramReadEvent, std::max(nextReqTime, curTick())); + } else { + assert(dramReadEvent.scheduled() || stallRds); + } + + pktDramRead[0].push_back(reqBuffer.at(confAddr)->dccPkt); + + if (pktDramRead[0].size() > maxDrRdEv) { + maxDrRdEv = pktDramRead[0].size(); + stats.maxDrRdEvQ = pktDramRead[0].size(); + } + + break; + } + + } + + if (!conflictFound) { + + reqBuffer.erase(orbEntry->owPkt->getAddr()); + + delete orbEntry->owPkt; + + delete orbEntry->dccPkt; + + delete orbEntry; + } + + return conflictFound; +} + +Tick +DcacheCtrl::earliestDirtyLineInDrRdResp() +{ + for (int i=0; ihandleDirtyLine) { + return reqBuffer.at(addrDramRespReady.at(i))->dccPkt->readyTime; + } + } + return MaxTick; +} + +bool +DcacheCtrl::recvTimingReq(PacketPtr pkt) +{ + // This is where we enter from the outside world + + DPRINTF(DcacheCtrl, "recvTimingReq: request %s addr %lld size %d\n", + pkt->cmdString(), pkt->getAddr(), pkt->getSize()); + + panic_if(pkt->cacheResponding(), "Should not see packets where cache " + "is responding"); + + panic_if(!(pkt->isRead() || pkt->isWrite()), + "Should only see read and writes at memory controller\n"); + + // Calc avg gap between requests + if (prevArrival != 0) { + stats.totGap += curTick() - prevArrival; + } + prevArrival = curTick(); + + // What type of media does this packet access? + // We set a flag to make sure every single packet + // checks DRAM first. + // bool is_dram = true; + + // Validate that pkt's address maps to the dram and nvm + assert(nvm && nvm->getAddrRange().contains(pkt->getAddr())); + //assert(dram && dram->getAddrRange().contains(pkt->getAddr())); + + + // Find out how many memory packets a pkt translates to + // If the burst size is equal or larger than the pkt size, then a pkt + // translates to only one memory packet. Otherwise, a pkt translates to + // multiple memory packets + + Addr addr = pkt->getAddr(); + + unsigned burst_size = dram->bytesPerBurst(); + + unsigned size = std::min((addr | (burst_size - 1)) + 1, + addr + pkt->getSize()) - addr; + + // process merging for writes + if (!pkt->isRead()) { + stats.writePktSize[ceilLog2(size)]++; + stats.writeBursts++; + stats.requestorWriteAccesses[pkt->requestorId()]++; + + assert(pkt->getSize() != 0); + + bool merged = isInWriteQueue.find(pkt->getAddr()) != + isInWriteQueue.end(); + + if (merged) { + + stats.mergedWrBursts++; + + accessAndRespond(pkt, frontendLatency, false); + + return true; + } + } + + // process forwarding for reads + bool foundInORB = false; + bool foundInCRB = false; + bool foundInNWB = false; + + if (pkt->isRead()) { + stats.readPktSize[ceilLog2(size)]++; + stats.readBursts++; + stats.requestorReadAccesses[pkt->requestorId()]++; + + assert(pkt->getSize() != 0); + + if (isInWriteQueue.find(pkt->getAddr()) != isInWriteQueue.end()) { + + if (!reqBuffer.empty()) { + for (const auto& e : reqBuffer) { + + // check if the read is subsumed in the write queue + // packet we are looking at + if (e.second->validEntry && + e.second->owPkt->isWrite() && + e.second->owPkt->getAddr() <= addr && + ((addr + size) <= + (e.second->owPkt->getAddr() + + e.second->owPkt->getSize()))) { + + foundInORB = true; + + stats.servicedByWrQ++; + + stats.bytesReadWrQ += burst_size; + + break; + } + } + } + + if (!foundInORB && !confReqBuffer.empty()) { + for (const auto& e : confReqBuffer) { + + // check if the read is subsumed in the write queue + // packet we are looking at + if (e.second->isWrite() && + e.second->getAddr() <= addr && + ((addr + size) <= + (e.second->getAddr() + e.second->getSize()))) { + + foundInCRB = true; + + stats.servicedByWrQ++; + + stats.bytesReadWrQ += burst_size; + + break; + } + } + } + + if (!foundInORB && !foundInCRB && !pktNvmWrite[0].empty()) { + for (int i=0; igetAddr() <= addr && + ((addr + size) <= + (pktNvmWrite[0].at(i)->getAddr() + + pktNvmWrite[0].at(i)->getSize()))) { + + foundInNWB = true; + + stats.servicedByWrQ++; + + stats.bytesReadWrQ += burst_size; + + break; + } + } + } + } + + if (foundInORB || foundInCRB || foundInNWB) { + + accessAndRespond(pkt, frontendLatency, false); + + return true; + } + } + + // process conflicting requests + // calculate dram address: ignored for now (because Dsize=Nsize) + if (checkConflictInDramCache(pkt)) { + + stats.totNumConf++; + + if (confReqBuffer.size()>=crbMaxSize) { + + stats.totNumConfBufFull++; + + retry = true; + + if (pkt->isRead()) { + stats.numRdRetry++; + } + else { + stats.numWrRetry++; + } + + return false; + } + + confReqBuffer.push_back(std::make_pair(curTick(), pkt)); + + if (pkt->isWrite()) { + isInWriteQueue.insert(pkt->getAddr()); + } + + if (confReqBuffer.size() > maxConf) { + maxConf = confReqBuffer.size(); + stats.maxNumConf = confReqBuffer.size(); + } + + return true; + } + + // process cases where ORB is full + if (reqBuffer.size() >= orbMaxSize) { + + retry = true; + + if (pkt->isRead()) { + stats.numRdRetry++; + } + else { + stats.numWrRetry++; + } + + return false; + } + + // if none of the above cases happens, + // then add the pkt to the outstanding requests buffer + + handleRequestorPkt(pkt); + + if (pkt->isWrite()) { + isInWriteQueue.insert(pkt->getAddr()); + } + + if (pktDramRead[0].empty() && !stallRds) { + + assert(!dramReadEvent.scheduled()); + + schedule(dramReadEvent, std::max(nextReqTime, curTick())); + + } else { + assert(dramReadEvent.scheduled() || stallRds); + } + + pktDramRead[0].push_back(reqBuffer.at(pkt->getAddr())->dccPkt); + + if (pktDramRead[0].size() > maxDrRdEv) { + maxDrRdEv = pktDramRead[0].size(); + stats.maxDrRdEvQ = pktDramRead[0].size(); + } + + return true; +} + +void +DcacheCtrl::processDramReadEvent() +{ + if (stallRds) { + return; + } + + assert(!pktDramRead[0].empty()); + + MemPacketQueue::iterator to_read; + + bool read_found = false; + + bool switched_cmd_type = (busState == DcacheCtrl::WRITE); + + if (switched_cmd_type) { + stats.wrToRdTurnAround++; + } + + for (auto queue = pktDramRead.rbegin(); + queue != pktDramRead.rend(); ++queue) { + to_read = chooseNext((*queue), switched_cmd_type ? + minWriteToReadDataGap() : 0, true); + if (to_read != queue->end()) { + // candidate read found + read_found = true; + break; + } + } + + if (!read_found) { + + schedule(dramReadEvent, + std::max(nextReqTime, curTick()+dram->getTBurst())); + + return; + } + + reqBufferEntry* orbEntry = reqBuffer.at((*to_read)->getAddr()); + + // sanity check for the chosen packet + assert(orbEntry->validEntry); + assert(orbEntry->dccPkt->isDram()); + assert(orbEntry->dccPkt->isRead()); + assert(orbEntry->state == dramRead); + + if (orbEntry->handleDirtyLine) { + if (pktNvmWrite[0].size() >= nvm->getMaxPendingWrites()) { + stallRds = true; + drainNvmWrite = true; + if (!overallWriteEvent.scheduled()) { + schedule(overallWriteEvent, + std::max(nextReqTime, curTick())); + } + return; + } + + if (numDirtyLinesInDrRdRespQ >= nvm->getMaxPendingWrites()) { + Tick schedTick = earliestDirtyLineInDrRdResp(); + assert(schedTick != MaxTick); + schedule(dramReadEvent, std::max(nextReqTime, schedTick+1)); + return; + } + + if (nvm->writeRespQueueFull()) { + assert(!dramReadEvent.scheduled()); + schedule(dramReadEvent, std::max(nextReqTime, + nvm->writeRespQueueFront()+2)); + return; + } + } + + busState = DcacheCtrl::READ; + + assert(packetReady(orbEntry->dccPkt)); + + Tick cmd_at = doBurstAccess(orbEntry->dccPkt); + + orbEntry->dramRdDevTime = orbEntry->dccPkt->readyTime - cmd_at; + + // sanity check + assert(orbEntry->dccPkt->size <= (orbEntry->dccPkt->isDram() ? + dram->bytesPerBurst() : + nvm->bytesPerBurst())); + assert(orbEntry->dccPkt->readyTime >= curTick()); + + if (orbEntry->owPkt->isRead() && orbEntry->isHit) { + logResponse(DcacheCtrl::READ, + orbEntry->dccPkt->requestorId(), + orbEntry->dccPkt->qosValue(), + orbEntry->owPkt->getAddr(), 1, + orbEntry->dccPkt->readyTime - orbEntry->dccPkt->entryTime); + } + + if (addrDramRespReady.empty()) { + assert(!respDramReadEvent.scheduled()); + schedule(respDramReadEvent, orbEntry->dccPkt->readyTime); + } + else { + assert(reqBuffer.at(addrDramRespReady.back())->dccPkt->readyTime + <= orbEntry->dccPkt->readyTime); + + assert(respDramReadEvent.scheduled()); + } + + addrDramRespReady.push_back(orbEntry->owPkt->getAddr()); + + if (addrDramRespReady.size() > maxDrRdRespEv) { + maxDrRdRespEv = addrDramRespReady.size(); + stats.maxDrRdRespEvQ = addrDramRespReady.size(); + } + + if (orbEntry->handleDirtyLine) { + numDirtyLinesInDrRdRespQ++; + } + + //** keep the state as it is, no transition + orbEntry->state = dramRead; + + pktDramRead[0].erase(to_read); + + if (!pktDramRead[0].empty()) { + + assert(!dramReadEvent.scheduled()); + + schedule(dramReadEvent, std::max(nextReqTime, curTick())); + } + + unsigned rdsNum = pktDramRead[0].size() + + pktNvmReadWaitIssue[0].size() + + pktNvmRead[0].size(); + unsigned wrsNum = pktNvmWrite[0].size() + + pktDramWrite[0].size(); + + if ((rdsNum == 0 && wrsNum != 0) || + (wrsNum >= writeHighThreshold)) { + + stallRds = true; + + if (!overallWriteEvent.scheduled()) { + schedule(overallWriteEvent, std::max(nextReqTime, curTick())); + } + } +} + +void +DcacheCtrl::processRespDramReadEvent() +{ + assert(!addrDramRespReady.empty()); + + reqBufferEntry* orbEntry = reqBuffer.at(addrDramRespReady.front()); + + // A series of sanity check + assert(orbEntry->validEntry); + assert(orbEntry->dccPkt->isDram()); + assert(orbEntry->dccPkt->isRead()); + assert(orbEntry->state == dramRead); + assert(orbEntry->dccPkt->readyTime == curTick()); + + if (orbEntry->handleDirtyLine) { + handleDirtyCacheLine(orbEntry); + } + + // A flag which is used for retrying read requests + // in case one slot in ORB becomes available here + // (happens only for read hits) + bool canRetry = false; + + dram->respondEvent(orbEntry->dccPkt->rank); + + // Read Hit + if (orbEntry->owPkt->isRead() && + orbEntry->dccPkt->isDram() && + orbEntry->isHit) { + PacketPtr copyOwPkt = new Packet(orbEntry->owPkt, + false, + orbEntry->owPkt->isRead()); + + accessAndRespond(orbEntry->owPkt, + frontendLatency + backendLatency, + false); + reqBuffer.at(copyOwPkt->getAddr()) = new reqBufferEntry( + orbEntry->validEntry, + orbEntry->arrivalTick, + orbEntry->tagDC, orbEntry->indexDC, + copyOwPkt, + orbEntry->dccPkt, + orbEntry->state, + orbEntry->isHit, + orbEntry->conflict, + orbEntry->dirtyLineAddr, + orbEntry->handleDirtyLine, + orbEntry->drRd, + orbEntry->drWr, + orbEntry->nvWait, + orbEntry->nvRd, + orbEntry->nvWr, + orbEntry->nvmIssueReadyTime, + orbEntry->dramRdDevTime, + orbEntry->dramWrDevTime, + orbEntry->nvmRdDevTime); + delete orbEntry; + + orbEntry = reqBuffer.at(addrDramRespReady.front()); + } + + // Write Hit + if (orbEntry->owPkt->isWrite() && + orbEntry->dccPkt->isRead() && + orbEntry->dccPkt->isDram() && + orbEntry->isHit) { + // This is a write request in initial read state. + // Delete its dcc packet which is read and create + // a new one which is write. + delete orbEntry->dccPkt; + + orbEntry->dccPkt = dram->decodePacket(orbEntry->owPkt, + orbEntry->owPkt->getAddr(), + orbEntry->owPkt->getSize(), + false, true); + orbEntry->dccPkt->entryTime = orbEntry->arrivalTick; + + // pass the second argument "false" to + // indicate a write access to dram + dram->setupRank(orbEntry->dccPkt->rank, false); + + //** transition to dramWrite + orbEntry->state = dramWrite; + orbEntry->drWr = curTick(); + + pktDramWrite[0].push_back(orbEntry->dccPkt); + + if (pktDramWrite[0].size() >= (orbMaxSize*dramWrDrainPerc)) { + stallRds = true; + drainDramWrite = true; + if (!overallWriteEvent.scheduled()) { + schedule(overallWriteEvent, + std::max(nextReqTime, curTick())); + } + } + + unsigned rdsNum = pktDramRead[0].size() + + pktNvmReadWaitIssue[0].size() + + pktNvmRead[0].size(); + unsigned wrsNum = pktNvmWrite[0].size() + + pktDramWrite[0].size(); + + if ((rdsNum == 0 && wrsNum != 0) || + (wrsNum >= writeHighThreshold)) { + + stallRds = true; + + if (!overallWriteEvent.scheduled()) { + schedule(overallWriteEvent, + std::max(nextReqTime, curTick())); + } + + } + + if (pktDramWrite[0].size() > maxDrWrEv) { + maxDrWrEv = pktDramWrite[0].size(); + stats.maxDrWrEvQ = pktDramWrite[0].size(); + } + } + + // Miss + if ((orbEntry->owPkt->isRead() && + orbEntry->dccPkt->isRead() && + orbEntry->dccPkt->isDram() && + !orbEntry->isHit) || + (orbEntry->owPkt->isWrite() && + orbEntry->dccPkt->isRead() && + orbEntry->dccPkt->isDram() && + !orbEntry->isHit)) { + // initiate a NVM read + // delete the current dcc pkt which is dram read. + delete orbEntry->dccPkt; + + // creating an nvm read dcc-pkt + orbEntry->dccPkt = nvm->decodePacket(orbEntry->owPkt, + orbEntry->owPkt->getAddr(), + orbEntry->owPkt->getSize(), + true, false); + orbEntry->dccPkt->entryTime = orbEntry->arrivalTick; + + // pass the second argument "true" to + // indicate a read access to nvm + nvm->setupRank(orbEntry->dccPkt->rank, true); + + // ready time will be calculated later in doBurstAccess + // in processNvmReadEvent + orbEntry->dccPkt->readyTime = MaxTick; + + //** transition to waitingToIssueNvmRead + // setting the state to waitingToIssueNvmRead + orbEntry->state = waitingToIssueNvmRead; + orbEntry->nvWait = curTick(); + + if (pktNvmReadWaitIssue[0].empty() && !stallRds) { + assert(!waitingToIssueNvmReadEvent.scheduled()); + schedule(waitingToIssueNvmReadEvent, curTick()); + } + else { + assert(waitingToIssueNvmReadEvent.scheduled() || stallRds); + } + + pktNvmReadWaitIssue[0].push_back(orbEntry->dccPkt); + + if (pktNvmReadWaitIssue[0].size() > maxNvRdIssEv) { + maxNvRdIssEv = pktNvmReadWaitIssue[0].size(); + stats.maxNvRdIssEvQ = pktNvmReadWaitIssue[0].size(); + } + } + + if (orbEntry->handleDirtyLine) { + numDirtyLinesInDrRdRespQ--; + } + + addrDramRespReady.pop_front(); + + if (!addrDramRespReady.empty()) { + assert(reqBuffer.at(addrDramRespReady.front())->dccPkt->readyTime + >= curTick()); + assert(!respDramReadEvent.scheduled()); + schedule(respDramReadEvent, + reqBuffer.at(addrDramRespReady.front())->dccPkt->readyTime); + } else { + + unsigned rdsNum = pktDramRead[0].size() + + pktNvmReadWaitIssue[0].size() + + pktNvmRead[0].size(); + unsigned wrsNum = pktNvmWrite[0].size() + + pktDramWrite[0].size(); + + // if there is nothing left in any queue, signal a drain + if (drainState() == DrainState::Draining && + !wrsNum && !rdsNum && + allIntfDrained()) { + DPRINTF(Drain, "Controller done draining\n"); + signalDrainDone(); + } else if (orbEntry->owPkt->isRead() && + orbEntry->dccPkt->isDram() && + orbEntry->isHit) { + // check the refresh state and kick the refresh event loop + // into action again if banks already closed and just waiting + // for read to complete + dram->checkRefreshState(orbEntry->dccPkt->rank); + } + } + + if (orbEntry->owPkt->isRead() && + orbEntry->dccPkt->isDram() && + orbEntry->isHit) { + // Remove the request from the ORB and + // bring in a conflicting req waiting + // in the CRB, if any. + canRetry = !resumeConflictingReq(orbEntry); + } + + if (retry && canRetry) { + retry = false; + port.sendRetryReq(); + } +} + +void +DcacheCtrl::processWaitingToIssueNvmReadEvent() +{ + if (stallRds) { + return; + } + + if (nvm->readsWaitingToIssue()) { + assert(!pktNvmReadWaitIssue[0].empty()); + + MemPacketQueue::iterator to_read; + + bool read_found = false; + + bool switched_cmd_type = (busState == DcacheCtrl::WRITE); + + if (switched_cmd_type) { + stats.wrToRdTurnAround++; + } + + for (auto queue = pktNvmReadWaitIssue.rbegin(); + queue != pktNvmReadWaitIssue.rend(); ++queue) { + to_read = chooseNext((*queue), switched_cmd_type ? + minWriteToReadDataGap() : 0, false); + if (to_read != queue->end()) { + // candidate read found + read_found = true; + break; + } + } + + auto e = reqBuffer.at(pktNvmReadWaitIssue[0].front()->getAddr()); + + if (read_found) { + e = reqBuffer.at((*to_read)->getAddr()); + } + + assert(e->validEntry); + assert(e->state == waitingToIssueNvmRead); + assert(!e->dccPkt->isDram()); + assert(!e->isHit); + + nvm->processReadPkt(e->dccPkt); + + e->nvmIssueReadyTime = e->dccPkt->readyTime; + + //** transition to nvmread + e->state = nvmRead; + e->nvRd = e->dccPkt->readyTime; + + if (pktNvmRead[0].empty()) { + assert(!nvmReadEvent.scheduled()); + schedule(nvmReadEvent, std::max(nextReqTime, + e->dccPkt->readyTime+1)); + } else if (nvmReadEvent.when() > e->dccPkt->readyTime) { + // move it sooner in time, to the first read with data + reschedule(nvmReadEvent, std::max(nextReqTime, + e->dccPkt->readyTime+1)); + } else { + assert(nvmReadEvent.scheduled()); + } + + pktNvmRead[0].push_back(e->dccPkt); + + if (pktNvmRead[0].size() > maxNvRdEv) { + maxNvRdEv = pktNvmRead[0].size(); + stats.maxNvRdEvQ = pktNvmRead[0].size(); + } + + if (read_found) { + pktNvmReadWaitIssue[0].erase(to_read); + } + else { + pktNvmReadWaitIssue[0].erase(pktNvmReadWaitIssue[0].begin()); + } + + unsigned rdsNum = pktDramRead[0].size() + + pktNvmReadWaitIssue[0].size() + + pktNvmRead[0].size(); + unsigned wrsNum = pktNvmWrite[0].size() + + pktDramWrite[0].size(); + + if ((rdsNum == 0 && wrsNum != 0) || + (wrsNum >= writeHighThreshold)) { + + stallRds = true; + + if (!overallWriteEvent.scheduled()) { + schedule(overallWriteEvent, std::max(nextReqTime, curTick())); + } + } + } + + else { + assert(!pktNvmRead[0].empty()); + schedule(waitingToIssueNvmReadEvent, nvmReadEvent.when()+2); + return; + } + + if (!waitingToIssueNvmReadEvent.scheduled() && + !pktNvmReadWaitIssue[0].empty()) { + schedule(waitingToIssueNvmReadEvent, curTick()); + } + +} + +void +DcacheCtrl::processNvmReadEvent() +{ + if (stallRds) { + return; + } + + assert(!pktNvmRead[0].empty()); + + MemPacketQueue::iterator to_read; + + bool read_found = false; + + bool switched_cmd_type = (busState == DcacheCtrl::WRITE); + + if (switched_cmd_type) { + stats.wrToRdTurnAround++; + } + + busState = DcacheCtrl::READ; + + for (auto queue = pktNvmRead.rbegin(); + queue != pktNvmRead.rend(); ++queue) { + to_read = chooseNext((*queue), switched_cmd_type ? + minWriteToReadDataGap() : 0, false); + if (to_read != queue->end()) { + // candidate read found + read_found = true; + break; + } + } + + reqBufferEntry* e; + + int index = 0; + + if (read_found) { + e = reqBuffer.at((*to_read)->getAddr()); + } + else { + schedule(nvmReadEvent, + std::max(nextReqTime, curTick()+nvm->getTBurst())); + + return; + } + + assert(e->validEntry); + assert(!e->isHit); + assert(!e->dccPkt->isDram()); + assert(e->state == nvmRead); + + assert(packetReady(e->dccPkt)); + + Tick cmd_at = doBurstAccess(e->dccPkt); + + e->nvmRdDevTime = e->dccPkt->readyTime - cmd_at; + + // sanity check + assert(e->dccPkt->size <= (e->dccPkt->isDram() ? + dram->bytesPerBurst() : + nvm->bytesPerBurst())); + assert(e->dccPkt->readyTime >= curTick()); + + if (e->owPkt->isRead() && !e->isHit) { + logResponse(DcacheCtrl::READ, + e->dccPkt->requestorId(), + e->dccPkt->qosValue(), + e->owPkt->getAddr(), 1, + e->dccPkt->readyTime - e->dccPkt->entryTime); + } + + if (addrNvmRespReady.empty()) { + assert(!respNvmReadEvent.scheduled()); + schedule(respNvmReadEvent, e->dccPkt->readyTime); + } + else { + assert(reqBuffer.at(addrNvmRespReady.back())->dccPkt->readyTime + <= e->dccPkt->readyTime); + + assert(respNvmReadEvent.scheduled()); + } + + addrNvmRespReady.push_back(e->owPkt->getAddr()); + + if (addrNvmRespReady.size() > maxNvRdRespEv) { + maxNvRdRespEv = addrNvmRespReady.size(); + stats.maxNvRdRespEvQ = addrNvmRespReady.size(); + } + + //** keeping the state as it is, no transition + e->state = nvmRead; + + if (read_found) { + pktNvmRead[0].erase(to_read); + } + else { + pktNvmRead[0].erase(pktNvmRead[0].begin()+index); + } + + if (!pktNvmRead[0].empty()) { + assert(!nvmReadEvent.scheduled()); + auto min = pktNvmRead[0].front(); + //index = 0; + for (int i=0; ireadyTime > pktNvmRead[0].at(i)->readyTime && + min->readyTime != pktNvmRead[0].at(i)->readyTime) { + min = pktNvmRead[0].at(i); + //index = i; + } + } + Tick maxTick = std::max(nextReqTime, curTick()); + schedule(nvmReadEvent, std::max(maxTick, min->readyTime+1)); + } + + unsigned rdsNum = pktDramRead[0].size() + + pktNvmReadWaitIssue[0].size() + + pktNvmRead[0].size(); + unsigned wrsNum = pktNvmWrite[0].size() + + pktDramWrite[0].size(); + + if (//(rdsNum == 0 && wrsNum != 0 && wrsNum >= writeLowThreshold) || + (rdsNum == 0 && wrsNum != 0) || + (wrsNum >= writeHighThreshold)) { + + stallRds = true; + + if (!overallWriteEvent.scheduled()) { + schedule(overallWriteEvent, std::max(nextReqTime, curTick())); + } + } +} + +void +DcacheCtrl::processRespNvmReadEvent() +{ + assert(!addrNvmRespReady.empty()); + + reqBufferEntry* orbEntry = reqBuffer.at(addrNvmRespReady.front()); + + // A series of sanity check + assert(orbEntry->validEntry); + assert(orbEntry->dccPkt->isRead()); + assert(!orbEntry->dccPkt->isDram()); + assert(orbEntry->state == nvmRead); + assert(!orbEntry->isHit); + assert(orbEntry->dccPkt->readyTime == curTick()); + + // Read miss from dram cache, now is available + // to send the response back to requestor + if (orbEntry->owPkt->isRead() && !orbEntry->isHit) { + + PacketPtr copyOwPkt = new Packet(orbEntry->owPkt, + false, + orbEntry->owPkt->isRead()); + + accessAndRespond(orbEntry->owPkt, + frontendLatency + backendLatency, + false); + reqBuffer.at(copyOwPkt->getAddr()) = new reqBufferEntry( + orbEntry->validEntry, + orbEntry->arrivalTick, + orbEntry->tagDC, orbEntry->indexDC, + copyOwPkt, + orbEntry->dccPkt, + orbEntry->state, + orbEntry->isHit, + orbEntry->conflict, + orbEntry->dirtyLineAddr, + orbEntry->handleDirtyLine, + orbEntry->drRd, + orbEntry->drWr, + orbEntry->nvWait, + orbEntry->nvRd, + orbEntry->nvWr, + orbEntry->nvmIssueReadyTime, + orbEntry->dramRdDevTime, + orbEntry->dramWrDevTime, + orbEntry->nvmRdDevTime); + delete orbEntry; + orbEntry = reqBuffer.at(addrNvmRespReady.front()); + + } + + // There has been a DRAM cache miss, + // initiate a DRAM write to bring it to DRAM cache + delete orbEntry->dccPkt; + + // creating a new dram write dcc-pkt + orbEntry->dccPkt = dram->decodePacket(orbEntry->owPkt, + orbEntry->owPkt->getAddr(), + orbEntry->owPkt->getSize(), + false, + true); + orbEntry->dccPkt->entryTime = orbEntry->arrivalTick; + + // pass the second argument "false" to + // indicate a write access to dram + dram->setupRank(orbEntry->dccPkt->rank, false); + + //** transition to dramWrite + // update the state of the orb entry + orbEntry->state = dramWrite; + orbEntry->drWr = curTick(); + + assert(orbEntry->dccPkt->isDram()); + assert(orbEntry->dccPkt->isWrite()); + assert(orbEntry->state==dramWrite); + pktDramWrite[0].push_back(reqBuffer.at(addrNvmRespReady.front())->dccPkt); + + if (pktDramWrite[0].size() >= (orbMaxSize*dramWrDrainPerc)) { + + stallRds = true; + + drainDramWrite = true; + + if (!overallWriteEvent.scheduled()) { + schedule(overallWriteEvent, std::max(nextReqTime, curTick())); + } + } + + unsigned rdsNum = pktDramRead[0].size() + + pktNvmReadWaitIssue[0].size() + + pktNvmRead[0].size(); + unsigned wrsNum = pktNvmWrite[0].size() + + pktDramWrite[0].size(); + + if ((rdsNum == 0 && wrsNum != 0) || + (wrsNum >= writeHighThreshold)) { + + stallRds = true; + + if (!overallWriteEvent.scheduled()) { + schedule(overallWriteEvent, std::max(nextReqTime, curTick())); + } + } + + if (pktDramWrite[0].size() > maxDrWrEv) { + maxDrWrEv = pktDramWrite[0].size(); + stats.maxDrWrEvQ = pktDramWrite[0].size(); + } + + addrNvmRespReady.pop_front(); + + if (!addrNvmRespReady.empty()) { + assert(reqBuffer.at(addrNvmRespReady.front())->dccPkt->readyTime + >= curTick()); + assert(!respNvmReadEvent.scheduled()); + schedule(respNvmReadEvent, + reqBuffer.at(addrNvmRespReady.front())->dccPkt->readyTime); + } +} + +void +DcacheCtrl::processOverallWriteEvent() +{ + assert(stallRds); + + assert(!pktDramWrite[0].empty() || !pktNvmWrite[0].empty()); + + if (drainDramWrite) { + + drWrCounter++; + + MemPacketQueue::iterator to_write; + + bool write_found = false; + + bool switched_cmd_type = (busState == DcacheCtrl::READ); + + if (switched_cmd_type) { + stats.rdToWrTurnAround++; + } + + for (auto queue = pktDramWrite.rbegin(); + queue != pktDramWrite.rend(); ++queue) { + to_write = chooseNext((*queue), switched_cmd_type ? + minReadToWriteDataGap() : 0, true); + if (to_write != queue->end()) { + // candidate write found + write_found = true; + break; + } + } + + if (!write_found) { + + schedule(overallWriteEvent, + std::max(nextReqTime, curTick()+dram->getTBurst())); + + return; + } + + auto e = reqBuffer.at(pktDramWrite[0].front()->getAddr()); + + if (write_found) { + e = reqBuffer.at((*to_write)->getAddr()); + } + + bool canRetry = false; + + assert(e->validEntry); + if (e->owPkt->isRead()) { + assert(!e->isHit); + } + assert(e->dccPkt->isDram()); + assert(e->state == dramWrite); + assert(e->dccPkt->size <= + (e->dccPkt->isDram() ? + dram->bytesPerBurst() : + nvm->bytesPerBurst()) ); + + busState = DcacheCtrl::WRITE; + + assert(packetReady(e->dccPkt)); + + Tick cmd_at = doBurstAccess(e->dccPkt); + + e->dramWrDevTime = e->dccPkt->readyTime - cmd_at; + + if (e->owPkt->isWrite()) { + // log the response + logResponse(DcacheCtrl::WRITE, + e->dccPkt->requestorId(), + e->dccPkt->qosValue(), + e->owPkt->getAddr(), 1, + e->dccPkt->readyTime - + e->dccPkt->entryTime); + } + + // Remove the request from the ORB and + // bring in a conflicting req waiting + // in the CRB, if any. + canRetry = !resumeConflictingReq(e); + + if (write_found) { + pktDramWrite[0].erase(to_write); + } + else { + pktDramWrite[0].erase(pktDramWrite[0].begin()); + } + + if (retry && canRetry) { + retry = false; + port.sendRetryReq(); + } + + if (drWrCounter < minWritesPerSwitch && !pktDramWrite[0].empty()) { + + assert(!overallWriteEvent.scheduled()); + + drainDramWrite = true; + + stallRds = true; + + schedule(overallWriteEvent, std::max(nextReqTime, curTick())); + + return; + } + else if (drainNvmWrite) { + + assert(!pktNvmWrite[0].empty()); + + assert(!overallWriteEvent.scheduled()); + + drainDramWrite = false; + + stallRds = true; + + schedule(overallWriteEvent, std::max(nextReqTime, curTick())); + + return; + } + else if ((pktDramRead[0].empty() && pktNvmReadWaitIssue[0].empty() + && pktNvmRead[0].empty()) && + (!pktDramWrite[0].empty() || !pktNvmWrite[0].empty())) { + + assert(!overallWriteEvent.scheduled()); + + drainDramWrite = false; + + drainNvmWrite = false; + + stallRds = true; + + drWrCounter = 0; + + nvWrCounter = 0; + + schedule(overallWriteEvent, std::max(nextReqTime, curTick())); + + return; + + } + else { + drainDramWrite = false; + stallRds = false; + drWrCounter = 0; + nvWrCounter = 0; + + if (!pktDramRead[0].empty() && !dramReadEvent.scheduled()) { + schedule(dramReadEvent, std::max(nextReqTime, curTick())); + } + if (!pktNvmReadWaitIssue[0].empty() && + !waitingToIssueNvmReadEvent.scheduled()) { + schedule(waitingToIssueNvmReadEvent, + std::max(nextReqTime, curTick())); + } + if (!pktNvmRead[0].empty() && !nvmReadEvent.scheduled()) { + schedule(nvmReadEvent, std::max(nextReqTime, curTick())); + } + return; + } + } + + if (drainNvmWrite) { + + if (!nvm->writeRespQueueFull()) { + + nvWrCounter++; + + MemPacketQueue::iterator to_write; + + bool write_found = false; + + bool switched_cmd_type = (busState == DcacheCtrl::READ); + + if (switched_cmd_type) { + stats.rdToWrTurnAround++; + } + + busState = DcacheCtrl::WRITE; + + for (auto queue = pktNvmWrite.rbegin(); + queue != pktNvmWrite.rend(); ++queue) { + to_write = chooseNext((*queue), switched_cmd_type ? + minReadToWriteDataGap() : 0, false); + if (to_write != queue->end()) { + // candidate write found + write_found = true; + break; + } + } + + auto e = pktNvmWrite[0].front(); + + if (write_found) { + e = (*to_write); + } + else { + + schedule(overallWriteEvent, + std::max(nextReqTime, curTick()+nvm->getTBurst())); + + return; + } + + // a series of sanity checks + assert(!e->isDram()); + assert(e->isWrite()); + assert(e->size <= nvm->bytesPerBurst()); + + assert(packetReady(e)); + + Tick cmd_at = doBurstAccess(e); + + long long int stateTick = + (e->readyTime - e->entryTime); + + assert(stateTick > 0); + + stats.timeInNvmWrite += stateTick; + + stats.totNumPktsNvmWr++; + + stats.nvWrDevTime += (e->readyTime - cmd_at); + + long long int stateQT = + (cmd_at - e->entryTime); + + assert(stateQT >= 0); + + stats.nvmWrQingTime += stateQT; + + delete e; + + if (write_found) { + pktNvmWrite[0].erase(to_write); + } + else { + pktNvmWrite[0].erase(pktNvmWrite[0].begin()); + } + + if (nvWrCounter < minWritesPerSwitch && !pktNvmWrite[0].empty()) { + assert(!overallWriteEvent.scheduled()); + + drainNvmWrite = true; + + stallRds = true; + + schedule(overallWriteEvent, std::max(nextReqTime, curTick())); + + return; + } + else if ((pktDramRead[0].empty() && pktNvmReadWaitIssue[0].empty() + && pktNvmRead[0].empty()) && + (!pktDramWrite[0].empty() || !pktNvmWrite[0].empty())) { + + assert(!overallWriteEvent.scheduled()); + + drainDramWrite = false; + + drainNvmWrite = false; + + stallRds = true; + + drWrCounter = 0; + + nvWrCounter = 0; + + schedule(overallWriteEvent, std::max(nextReqTime, curTick())); + + return; + + } + else { + drainNvmWrite = false; + stallRds = false; + drWrCounter = 0; + nvWrCounter = 0; + + if (!pktDramRead[0].empty() && !dramReadEvent.scheduled()) { + schedule(dramReadEvent, std::max(nextReqTime, curTick())); + } + if (!pktNvmReadWaitIssue[0].empty() && + !waitingToIssueNvmReadEvent.scheduled()) { + schedule(waitingToIssueNvmReadEvent, + std::max(nextReqTime, curTick())); + } + if (!pktNvmRead[0].empty() && !nvmReadEvent.scheduled()) { + schedule(nvmReadEvent, std::max(nextReqTime, curTick())); + } + return; + } + } + + if (!overallWriteEvent.scheduled() && + !pktNvmWrite[0].empty() && + nvm->writeRespQueueFull()) { + schedule(overallWriteEvent, std::max(nextReqTime, + nvm->writeRespQueueFront()+1)); + return; + } + } + else if ((!pktDramWrite[0].empty() && drWrCounter < minDrWrPerSwitch) || + (!pktDramWrite[0].empty() && pktNvmWrite[0].empty() && + (drWrCounter+nvWrCounter)end()) { + // candidate write found + write_found = true; + break; + } + } + + if (!write_found) { + + schedule(overallWriteEvent, + std::max(nextReqTime, curTick()+dram->getTBurst())); + + return; + } + + auto e = reqBuffer.at(pktDramWrite[0].front()->getAddr()); + + if (write_found) { + e = reqBuffer.at((*to_write)->getAddr()); + } + + bool canRetry = false; + + assert(e->validEntry); + if (e->owPkt->isRead()) { + assert(!e->isHit); + } + assert(e->dccPkt->isDram()); + assert(e->state == dramWrite); + assert(e->dccPkt->size <= + (e->dccPkt->isDram() ? + dram->bytesPerBurst() : + nvm->bytesPerBurst()) ); + + busState = DcacheCtrl::WRITE; + + assert(packetReady(e->dccPkt)); + + Tick cmd_at = doBurstAccess(e->dccPkt); + + e->dramWrDevTime = e->dccPkt->readyTime - cmd_at; + + if (e->owPkt->isWrite()) { + // log the response + logResponse(DcacheCtrl::WRITE, + e->dccPkt->requestorId(), + e->dccPkt->qosValue(), + e->owPkt->getAddr(), 1, + e->dccPkt->readyTime - + e->dccPkt->entryTime); + } + + // Remove the request from the ORB and + // bring in a conflicting req waiting + // in the CRB, if any. + canRetry = !resumeConflictingReq(e); + + if (write_found) { + pktDramWrite[0].erase(to_write); + } + else { + pktDramWrite[0].erase(pktDramWrite[0].begin()); + } + + if (retry && canRetry) { + retry = false; + port.sendRetryReq(); + } + } + + else if ((!pktNvmWrite[0].empty() && nvWrCounter < minNvWrPerSwitch) || + (!pktNvmWrite[0].empty() && pktDramWrite[0].empty() && + (drWrCounter+nvWrCounter)writeRespQueueFull()) { + + nvWrCounter++; + + MemPacketQueue::iterator to_write; + + bool write_found = false; + + bool switched_cmd_type = (busState == DcacheCtrl::READ); + + if (switched_cmd_type) { + stats.rdToWrTurnAround++; + } + + busState = DcacheCtrl::WRITE; + + for (auto queue = pktNvmWrite.rbegin(); + queue != pktNvmWrite.rend(); ++queue) { + to_write = chooseNext((*queue), switched_cmd_type ? + minReadToWriteDataGap() : 0, false); + if (to_write != queue->end()) { + // candidate write found + write_found = true; + break; + } + } + + auto e = pktNvmWrite[0].front(); + + if (write_found) { + e = (*to_write); + } + else { + + schedule(overallWriteEvent, + std::max(nextReqTime, curTick()+nvm->getTBurst())); + + return; + } + + // a series of sanity checks + assert(!e->isDram()); + assert(e->isWrite()); + assert(e->size <= nvm->bytesPerBurst()); + + assert(packetReady(e)); + + Tick cmd_at = doBurstAccess(e); + + long long int stateTick = + (e->readyTime - e->entryTime); + + assert(stateTick > 0); + + stats.timeInNvmWrite += stateTick; + + stats.totNumPktsNvmWr++; + + stats.nvWrDevTime += (e->readyTime - cmd_at); + + long long int stateQT = + (cmd_at - e->entryTime); + + assert(stateQT >= 0); + + stats.nvmWrQingTime += stateQT; + + delete e; + + if (write_found) { + pktNvmWrite[0].erase(to_write); + } + else { + pktNvmWrite[0].erase(pktNvmWrite[0].begin()); + } + } + + if (!overallWriteEvent.scheduled() && + !pktNvmWrite[0].empty() && + nvm->writeRespQueueFull()) { + schedule(overallWriteEvent, std::max(nextReqTime, + nvm->writeRespQueueFront()+1)); + return; + } + } + + if ( + !overallWriteEvent.scheduled() && + ( + ( + (!pktDramRead[0].empty() || !pktNvmReadWaitIssue[0].empty() + || !pktNvmRead[0].empty()) && + (!pktDramWrite[0].empty() || !pktNvmWrite[0].empty()) && + (drWrCounter + nvWrCounter < minWritesPerSwitch) + ) || + ( + (pktDramRead[0].empty() && pktNvmReadWaitIssue[0].empty() + && pktNvmRead[0].empty()) && + (!pktDramWrite[0].empty() || !pktNvmWrite[0].empty()) + ) + ) + ) { + stallRds = true; + schedule(overallWriteEvent, std::max(nextReqTime, curTick())); + } + else { + stallRds = false; + + drWrCounter = 0; + + nvWrCounter = 0; + + if (!pktDramRead[0].empty() && !dramReadEvent.scheduled()) { + schedule(dramReadEvent, std::max(nextReqTime, curTick())); + } + if (!pktNvmReadWaitIssue[0].empty() && + !waitingToIssueNvmReadEvent.scheduled()) { + schedule(waitingToIssueNvmReadEvent, + std::max(nextReqTime, curTick())); + } + if (!pktNvmRead[0].empty() && !nvmReadEvent.scheduled()) { + schedule(nvmReadEvent, std::max(nextReqTime, curTick())); + } + } +} + + +void +DcacheCtrl::processRespondEvent() +{ + +} + +void +DcacheCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency, bool in_dram) +{ + DPRINTF(DcacheCtrl, "Responding to Address %lld.. \n",pkt->getAddr()); + + bool needsResponse = pkt->needsResponse(); + // do the actual memory access which also turns the packet into a + // response + if (in_dram && dram && dram->getAddrRange().contains(pkt->getAddr())) { + dram->access(pkt); + } else if (!in_dram && nvm && + nvm->getAddrRange().contains(pkt->getAddr())) { + nvm->access(pkt); + } else { + panic("Can't handle address range for packet %s\n", + pkt->print()); + } + + // turn packet around to go back to requestor if response expected + if (needsResponse) { + // access already turned the packet into a response + assert(pkt->isResponse()); + // response_time consumes the static latency and is charged also + // with headerDelay that takes into account the delay provided by + // the xbar and also the payloadDelay that takes into account the + // number of data beats. + Tick response_time = curTick() + static_latency + pkt->headerDelay + + pkt->payloadDelay; + // Here we reset the timing of the packet before sending it out. + pkt->headerDelay = pkt->payloadDelay = 0; + + // queue the packet in the response queue to be sent out after + // the static latency has passed + port.schedTimingResp(pkt, response_time); + } else { + // @todo the packet is going to be deleted, and the MemPacket + // is still having a pointer to it + pendingDelete.reset(pkt); + } + + DPRINTF(DcacheCtrl, "Done\n"); + + return; +} + +void +DcacheCtrl::pruneBurstTick() +{ + auto it = burstTicks.begin(); + while (it != burstTicks.end()) { + auto current_it = it++; + if (curTick() > *current_it) { + DPRINTF(DcacheCtrl, "Removing burstTick for %d\n", *current_it); + burstTicks.erase(current_it); + } + } +} + +Tick +DcacheCtrl::getBurstWindow(Tick cmd_tick) +{ + // get tick aligned to burst window + Tick burst_offset = cmd_tick % commandWindow; + return (cmd_tick - burst_offset); +} + +Tick +DcacheCtrl::verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst) +{ + // start with assumption that there is no contention on command bus + Tick cmd_at = cmd_tick; + + // get tick aligned to burst window + Tick burst_tick = getBurstWindow(cmd_tick); + + // verify that we have command bandwidth to issue the command + // if not, iterate over next window(s) until slot found + while (burstTicks.count(burst_tick) >= max_cmds_per_burst) { + DPRINTF(DcacheCtrl, "Contention found on command bus at %d\n", + burst_tick); + burst_tick += commandWindow; + cmd_at = burst_tick; + } + + // add command into burst window and return corresponding Tick + burstTicks.insert(burst_tick); + return cmd_at; +} + +Tick +DcacheCtrl::verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst, + Tick max_multi_cmd_split) +{ + // start with assumption that there is no contention on command bus + Tick cmd_at = cmd_tick; + + // get tick aligned to burst window + Tick burst_tick = getBurstWindow(cmd_tick); + + // Command timing requirements are from 2nd command + // Start with assumption that 2nd command will issue at cmd_at and + // find prior slot for 1st command to issue + // Given a maximum latency of max_multi_cmd_split between the commands, + // find the burst at the maximum latency prior to cmd_at + Tick burst_offset = 0; + Tick first_cmd_offset = cmd_tick % commandWindow; + while (max_multi_cmd_split > (first_cmd_offset + burst_offset)) { + burst_offset += commandWindow; + } + // get the earliest burst aligned address for first command + // ensure that the time does not go negative + Tick first_cmd_tick = burst_tick - std::min(burst_offset, burst_tick); + + // Can required commands issue? + bool first_can_issue = false; + bool second_can_issue = false; + // verify that we have command bandwidth to issue the command(s) + while (!first_can_issue || !second_can_issue) { + bool same_burst = (burst_tick == first_cmd_tick); + auto first_cmd_count = burstTicks.count(first_cmd_tick); + auto second_cmd_count = same_burst ? first_cmd_count + 1 : + burstTicks.count(burst_tick); + + first_can_issue = first_cmd_count < max_cmds_per_burst; + second_can_issue = second_cmd_count < max_cmds_per_burst; + + if (!second_can_issue) { + DPRINTF(DcacheCtrl, "Contention (cmd2) found on " + "command bus at %d\n", + burst_tick); + burst_tick += commandWindow; + cmd_at = burst_tick; + } + + // Verify max_multi_cmd_split isn't violated when command 2 is shifted + // If commands initially were issued in same burst, they are + // now in consecutive bursts and can still issue B2B + bool gap_violated = !same_burst && + ((burst_tick - first_cmd_tick) > max_multi_cmd_split); + + if (!first_can_issue || (!second_can_issue && gap_violated)) { + DPRINTF(DcacheCtrl, "Contention (cmd1) found on " + "command bus at %d\n", + first_cmd_tick); + first_cmd_tick += commandWindow; + } + } + + // Add command to burstTicks + burstTicks.insert(burst_tick); + burstTicks.insert(first_cmd_tick); + + return cmd_at; +} + +bool +DcacheCtrl::inReadBusState(bool next_state) const +{ + // check the bus state + if (next_state) { + // use busStateNext to get the state that will be used + // for the next burst + return (busStateNext == DcacheCtrl::READ); + } else { + return (busState == DcacheCtrl::READ); + } +} + +bool +DcacheCtrl::inWriteBusState(bool next_state) const +{ + // check the bus state + if (next_state) { + // use busStateNext to get the state that will be used + // for the next burst + return (busStateNext == DcacheCtrl::WRITE); + } else { + return (busState == DcacheCtrl::WRITE); + } +} + +Tick +DcacheCtrl::doBurstAccess(MemPacket* dcc_pkt) +{ + // first clean up the burstTick set, removing old entries + // before adding new entries for next burst + pruneBurstTick(); + + // When was command issued? + Tick cmd_at; + + // Issue the next burst and update bus state to reflect + // when previous command was issued + if (dcc_pkt->isDram()) { + std::tie(cmd_at, nextBurstAt) = + dram->doBurstAccess(dcc_pkt, nextBurstAt);//, queue); + + // Update timing for NVM ranks if NVM is configured on this channel + if (nvm) + nvm->addRankToRankDelay(cmd_at); + + } else { + std::tie(cmd_at, nextBurstAt) = + nvm->doBurstAccess(dcc_pkt, nextBurstAt); + + // Update timing for NVM ranks if NVM is configured on this channel + if (dram) + dram->addRankToRankDelay(cmd_at); + + } + + DPRINTF(DcacheCtrl, "Access to %lld, ready at %lld next burst at %lld.\n", + dcc_pkt->addr, dcc_pkt->readyTime, nextBurstAt); + + // Update the minimum timing between the requests, this is a + // conservative estimate of when we have to schedule the next + // request to not introduce any unecessary bubbles. In most cases + // we will wake up sooner than we have to. + nextReqTime = nextBurstAt - (dram ? dram->commandOffset() : + nvm->commandOffset()); + + + // Update the common bus stats + if (dcc_pkt->pkt != nullptr) { + if (dcc_pkt->isRead()) { + // Update latency stats + stats.requestorReadTotalLat[dcc_pkt->requestorId()] += + dcc_pkt->readyTime - dcc_pkt->entryTime; + stats.requestorReadBytes[dcc_pkt->requestorId()] += dcc_pkt->size; + } else { + stats.requestorWriteBytes[dcc_pkt->requestorId()] += dcc_pkt->size; + stats.requestorWriteTotalLat[dcc_pkt->requestorId()] += + dcc_pkt->readyTime - dcc_pkt->entryTime; + } + } + return cmd_at; +} + +void +DcacheCtrl::processNextReqEvent() +{ + +} + +bool +DcacheCtrl::packetReady(MemPacket* pkt) +{ + return (pkt->isDram() ? + dram->burstReady(pkt) : nvm->burstReadyDCache(pkt)); +} + +Tick +DcacheCtrl::minReadToWriteDataGap() +{ + Tick dram_min = dram ? dram->minReadToWriteDataGap() : MaxTick; + Tick nvm_min = nvm ? nvm->minReadToWriteDataGap() : MaxTick; + return std::min(dram_min, nvm_min); +} + +Tick +DcacheCtrl::minWriteToReadDataGap() +{ + Tick dram_min = dram ? dram->minWriteToReadDataGap() : MaxTick; + Tick nvm_min = nvm ? nvm->minWriteToReadDataGap() : MaxTick; + return std::min(dram_min, nvm_min); +} + +MemPacketQueue::iterator +DcacheCtrl::chooseNext(MemPacketQueue& queue, Tick extra_col_delay, + bool is_dram) +{ + // This method does the arbitration between requests. + + MemPacketQueue::iterator ret = queue.end(); + + if (!queue.empty()) { + if (queue.size() == 1) { + // available rank corresponds to state refresh idle + MemPacket* mem_pkt = *(queue.begin()); + if (packetReady(mem_pkt)) { + ret = queue.begin(); + DPRINTF(DcacheCtrl, "Single request, going to a free rank\n"); + } else { + DPRINTF(DcacheCtrl, "Single request, going to a busy rank\n"); + } + } else if (memSchedPolicy == Enums::fcfs) { + // check if there is a packet going to a free rank + for (auto i = queue.begin(); i != queue.end(); ++i) { + MemPacket* mem_pkt = *i; + if (packetReady(mem_pkt)) { + ret = i; + break; + } + } + } else if (memSchedPolicy == Enums::frfcfs) { + ret = chooseNextFRFCFS(queue, extra_col_delay, is_dram); + } else { + panic("No scheduling policy chosen\n"); + } + } + return ret; +} + +MemPacketQueue::iterator +DcacheCtrl::chooseNextFRFCFS(MemPacketQueue& queue, Tick extra_col_delay, + bool is_dram) +{ + auto selected_pkt_it = queue.end(); + Tick col_allowed_at = MaxTick; + + // time we need to issue a column command to be seamless + const Tick min_col_at = std::max(nextBurstAt + extra_col_delay, curTick()); + + if (is_dram) { + std::tie(selected_pkt_it, col_allowed_at) = + dram->chooseNextFRFCFS(queue, min_col_at); + } else { + std::tie(selected_pkt_it, col_allowed_at) = + nvm->chooseNextFRFCFSDCache(queue, min_col_at); + } + + if (selected_pkt_it == queue.end()) { + DPRINTF(DcacheCtrl, "%s no available packets found\n", __func__); + } + + return selected_pkt_it; +} + +Addr +DcacheCtrl::burstAlign(Addr addr, bool is_dram) const +{ + if (is_dram) + return (addr & ~(Addr(dram->bytesPerBurst() - 1))); + else + return (addr & ~(Addr(nvm->bytesPerBurst() - 1))); +} + +DcacheCtrl::CtrlStats::CtrlStats(DcacheCtrl &_ctrl) + : Stats::Group(&_ctrl), + ctrl(_ctrl), + + ADD_STAT(readReqs, "Number of read requests accepted"), + ADD_STAT(writeReqs, "Number of write requests accepted"), + + ADD_STAT(readBursts, + "Number of controller read bursts, " + "including those serviced by the write queue"), + ADD_STAT(writeBursts, + "Number of controller write bursts, " + "including those merged in the write queue"), + ADD_STAT(servicedByWrQ, + "Number of controller read bursts serviced by the write queue"), + ADD_STAT(mergedWrBursts, + "Number of controller write bursts merged with an existing one"), + + //ADD_STAT(neitherReadNorWriteReqs, + // "Number of requests that are neither read nor write"), + + ADD_STAT(avgRdQLen, "Average read queue length when enqueuing"), + ADD_STAT(avgWrQLen, "Average write queue length when enqueuing"), + + ADD_STAT(numRdRetry, "Number of times read queue was full causing retry"), + ADD_STAT(numWrRetry, "Number of times write queue was full causing retry"), + + ADD_STAT(readPktSize, "Read request sizes (log2)"), + ADD_STAT(writePktSize, "Write request sizes (log2)"), + + ADD_STAT(rdToWrTurnAround, "Read to write switch"), + ADD_STAT(wrToRdTurnAround, "Write to read switch)"), + + //ADD_STAT(rdQLenPdf, "What read queue length does an incoming req see"), + //ADD_STAT(wrQLenPdf, "What write queue length does an incoming req see"), + + //ADD_STAT(rdPerTurnAround, + // "Reads before turning the bus around for writes"), + //ADD_STAT(wrPerTurnAround, + // "Writes before turning the bus around for reads"), + + ADD_STAT(bytesReadWrQ, "Total number of bytes read from write queue"), + ADD_STAT(bytesReadSys, "Total read bytes from the system interface side"), + ADD_STAT(bytesWrittenSys, + "Total written bytes from the system interface side"), + + ADD_STAT(avgRdBWSys, "Average system read bandwidth in MiByte/s"), + ADD_STAT(avgWrBWSys, "Average system write bandwidth in MiByte/s"), + + ADD_STAT(totGap, "Total gap between requests"), + ADD_STAT(avgGap, "Average gap between requests"), + + ADD_STAT(requestorReadBytes, + "Per-requestor bytes read from memory"), + ADD_STAT(requestorWriteBytes, + "Per-requestor bytes write to memory"), + ADD_STAT(requestorReadRate, + "Per-requestor bytes read from memory rate (Bytes/sec)"), + ADD_STAT(requestorWriteRate, + "Per-requestor bytes write to memory rate (Bytes/sec)"), + ADD_STAT(requestorReadAccesses, + "Per-requestor read serviced memory accesses"), + ADD_STAT(requestorWriteAccesses, + "Per-requestor write serviced memory accesses"), + ADD_STAT(requestorReadTotalLat, + "Per-requestor read total memory access latency"), + ADD_STAT(requestorWriteTotalLat, + "Per-requestor write total memory access latency"), + ADD_STAT(requestorReadAvgLat, + "Per-requestor read average memory access latency"), + ADD_STAT(requestorWriteAvgLat, + "Per-requestor write average memory access latency"), + + ADD_STAT(numHits, + "Total number of hits on DRAM cache"), + ADD_STAT(numMisses, + "Total number of misses on DRAM cache"), + ADD_STAT(numRdHits, + "Total number of read hits on DRAM cache"), + ADD_STAT(numWrHits, + "Total number of write hits on DRAM cache"), + ADD_STAT(numRdMisses, + "Total number of read misses on DRAM cache"), + ADD_STAT(numWrMisses, + "Total number of write misses on DRAM cache"), + ADD_STAT(numColdMisses, + "Total number of misses on DRAM cache due to" + " first reference to a cache block"), + ADD_STAT(numHotMisses, + "Total number of misses on DRAM cache that are not cold miss"), + ADD_STAT(numWrBacks, + "Total number of write backs from DRAM cache to main memory"), + ADD_STAT(totNumConf, + "Total number of packets conflicted on DRAM cache"), + ADD_STAT(totNumConfBufFull, + "Total number of packets conflicted and couldn't " + "enter confBuffer"), + + ADD_STAT(timeInDramRead, + "Total time spent in dram read state in ns"), + ADD_STAT(timeInDramWrite, + "Total time spent in dram write state in ns"), + ADD_STAT(timeInWaitingToIssueNvmRead, + "Total time spent in waitingToIssueNvmRead state in ns"), + ADD_STAT(timeInNvmRead, + "Total time spent in nvmRead state in ns"), + ADD_STAT(timeInNvmWrite, + "Total time spent in nvmWrite state in ns"), + + ADD_STAT(drRdQingTime, + "Total time spent as DRAM read queuing time in ns"), + ADD_STAT(drWrQingTime, + "Total time spent as DRAM write queuing time in ns"), + ADD_STAT(nvmRdQingTime, + "Total time spent as NVM read queuing time in ns"), + ADD_STAT(nvmWrQingTime, + "Total time spent as NVM write queuing time in ns"), + + ADD_STAT(drRdDevTime, + "Total time spent as DRAM read device time in Ticks"), + ADD_STAT(drWrDevTime, + "Total time spent as DRAM write device time in Ticks"), + ADD_STAT(nvRdDevTime, + "Total time spent as NVM read device time in Ticks"), + ADD_STAT(nvWrDevTime, + "Total time spent as NVM write device time in Ticks"), + + ADD_STAT(totNumPktsDrRd, + "Total number of packets enterted to Dram read state"), + ADD_STAT(totNumPktsDrWr, + "Total number of packets enterted to Dram write state"), + ADD_STAT(totNumPktsNvmRdWait, + "Total number of packets enterted to NVM waitToIssue state"), + ADD_STAT(totNumPktsNvmRd, + "Total number of packets enterted to NVM read state"), + ADD_STAT(totNumPktsNvmWr, + "Total number of packets enterted to NVM write state"), + + ADD_STAT(maxNumConf, + "Maximum number of packets conflicted on DRAM cache"), + ADD_STAT(maxDrRdEvQ, + "Maximum number of packets in DrRdEvent concurrently"), + ADD_STAT(maxDrRdRespEvQ, + "Maximum number of packets in DrRdRespEvent concurrently"), + ADD_STAT(maxDrWrEvQ, + "Maximum number of packets in DrWrEvent concurrently"), + ADD_STAT(maxNvRdIssEvQ, + "Maximum number of packets in NvRdIssEvent concurrently"), + ADD_STAT(maxNvRdEvQ, + "Maximum number of packets in NvRdEvent concurrently"), + ADD_STAT(maxNvRdRespEvQ, + "Maximum number of packets in NvRdRespEvent concurrently"), + ADD_STAT(maxNvWrEvQ, + "Maximum number of packets in NvWrEvent concurrently") + +{ +} + +void +DcacheCtrl::CtrlStats::regStats() +{ + using namespace Stats; + + assert(ctrl.system()); + const auto max_requestors = ctrl.system()->maxRequestors(); + + avgRdQLen.precision(2); + avgWrQLen.precision(2); + + readPktSize.init(ceilLog2(ctrl.system()->cacheLineSize()) + 1); + writePktSize.init(ceilLog2(ctrl.system()->cacheLineSize()) + 1); + + avgRdBWSys.precision(2); + avgWrBWSys.precision(2); + avgGap.precision(2); + + // per-requestor bytes read and written to memory + requestorReadBytes + .init(max_requestors) + .flags(nozero | nonan); + + requestorWriteBytes + .init(max_requestors) + .flags(nozero | nonan); + + // per-requestor bytes read and written to memory rate + requestorReadRate + .flags(nozero | nonan) + .precision(12); + + requestorReadAccesses + .init(max_requestors) + .flags(nozero); + + requestorWriteAccesses + .init(max_requestors) + .flags(nozero); + + requestorReadTotalLat + .init(max_requestors) + .flags(nozero | nonan); + + requestorReadAvgLat + .flags(nonan) + .precision(2); + + requestorWriteRate + .flags(nozero | nonan) + .precision(12); + + requestorWriteTotalLat + .init(max_requestors) + .flags(nozero | nonan); + + requestorWriteAvgLat + .flags(nonan) + .precision(2); + + for (int i = 0; i < max_requestors; i++) { + const std::string requestor = ctrl.system()->getRequestorName(i); + requestorReadBytes.subname(i, requestor); + requestorReadRate.subname(i, requestor); + requestorWriteBytes.subname(i, requestor); + requestorWriteRate.subname(i, requestor); + requestorReadAccesses.subname(i, requestor); + requestorWriteAccesses.subname(i, requestor); + requestorReadTotalLat.subname(i, requestor); + requestorReadAvgLat.subname(i, requestor); + requestorWriteTotalLat.subname(i, requestor); + requestorWriteAvgLat.subname(i, requestor); + } + + // Formula stats + avgRdBWSys = (bytesReadSys / 1000000) / simSeconds; + avgWrBWSys = (bytesWrittenSys / 1000000) / simSeconds; + + avgGap = totGap / (readReqs + writeReqs); + + requestorReadRate = requestorReadBytes / simSeconds; + requestorWriteRate = requestorWriteBytes / simSeconds; + requestorReadAvgLat = requestorReadTotalLat / requestorReadAccesses; + requestorWriteAvgLat = requestorWriteTotalLat / requestorWriteAccesses; +} + +void +DcacheCtrl::recvFunctional(PacketPtr pkt) +{ + if (dram && dram->getAddrRange().contains(pkt->getAddr())) { + // rely on the abstract memory + dram->functionalAccess(pkt); + } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) { + // rely on the abstract memory + nvm->functionalAccess(pkt); + } else { + panic("Can't handle address range for packet %s\n", + pkt->print()); + } +} + +Port & +DcacheCtrl::getPort(const std::string &if_name, PortID idx) +{ + if (if_name != "port") { + return QoS::MemCtrl::getPort(if_name, idx); + } else { + return port; + } +} + +bool +DcacheCtrl::allIntfDrained() const +{ + // ensure dram is in power down and refresh IDLE states + bool dram_drained = !dram || dram->allRanksDrained(); + // No outstanding NVM writes + // All other queues verified as needed with calling logic + bool nvm_drained = !nvm || nvm->allRanksDrained(); + return (dram_drained && nvm_drained); +} + +DrainState +DcacheCtrl::drain() +{ + // if there is anything in any of our internal queues, keep track + // of that as well + unsigned rdsNum = pktDramRead[0].size() + + pktNvmReadWaitIssue[0].size() + + pktNvmRead[0].size(); + unsigned wrsNum = pktNvmWrite[0].size() + + pktDramWrite[0].size(); + if (!(!wrsNum && !rdsNum && + addrDramRespReady.empty() && + allIntfDrained())) { + + DPRINTF(Drain, "Memory controller not drained, write: %d, read: %d," + " resp: %d\n", wrsNum, rdsNum, + addrDramRespReady.size()); + + // the only queue that is not drained automatically over time + // is the write queue, thus kick things into action if needed + + if (dram) + dram->drainRanks(); + + return DrainState::Draining; + } else { + return DrainState::Drained; + } +} + +void +DcacheCtrl::drainResume() +{ + if (!isTimingMode && system()->isTimingMode()) { + // if we switched to timing mode, kick things into action, + // and behave as if we restored from a checkpoint + startup(); + dram->startup(); + } else if (isTimingMode && !system()->isTimingMode()) { + // if we switch from timing mode, stop the refresh events to + // not cause issues with KVM + if (dram) + dram->suspend(); + } + + // update the mode + isTimingMode = system()->isTimingMode(); +} + +DcacheCtrl::MemoryPort::MemoryPort(const std::string& name, DcacheCtrl& _ctrl) + : QueuedResponsePort(name, &_ctrl, queue), queue(_ctrl, *this, true), + ctrl(_ctrl) +{ } + +AddrRangeList +DcacheCtrl::MemoryPort::getAddrRanges() const +{ + AddrRangeList ranges; + if (ctrl.dram) { + DPRINTF(DRAM, "Pushing DRAM ranges to port\n"); + ranges.push_back(ctrl.dram->getAddrRange()); + } + if (ctrl.nvm) { + DPRINTF(NVM, "Pushing NVM ranges to port\n"); + ranges.push_back(ctrl.nvm->getAddrRange()); + } + return ranges; +} + +void +DcacheCtrl::MemoryPort::recvFunctional(PacketPtr pkt) +{ + pkt->pushLabel(ctrl.name()); + + if (!queue.trySatisfyFunctional(pkt)) { + // Default implementation of SimpleTimingPort::recvFunctional() + // calls recvAtomic() and throws away the latency; we can save a + // little here by just not calculating the latency. + ctrl.recvFunctional(pkt); + } + + pkt->popLabel(); +} + +Tick +DcacheCtrl::MemoryPort::recvAtomic(PacketPtr pkt) +{ + return ctrl.recvAtomic(pkt); +} + +Tick +DcacheCtrl::MemoryPort::recvAtomicBackdoor( + PacketPtr pkt, MemBackdoorPtr &backdoor) +{ + return ctrl.recvAtomicBackdoor(pkt, backdoor); +} + +bool +DcacheCtrl::MemoryPort::recvTimingReq(PacketPtr pkt) +{ + // pass it to the memory controller + return ctrl.recvTimingReq(pkt); +} diff --git a/src/mem/dcache_ctrl.hh b/src/mem/dcache_ctrl.hh new file mode 100644 index 0000000000..c71c28412e --- /dev/null +++ b/src/mem/dcache_ctrl.hh @@ -0,0 +1,769 @@ +/// The copyright needs be modified for UCD/DArchR/the names of the writers + +/* + * Copyright (c) 2012-2020 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2013 Amin Farmahini-Farahani + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * DcacheCtrl declaration + */ + +#ifndef __DCACHE_CTRL_HH__ +#define __DCACHE_CTRL_HH__ + +#include + +#include "mem/mem_ctrl.hh" +#include "params/DcacheCtrl.hh" + +class DRAMInterface; +class NVMInterface; + +class DcacheCtrl : public QoS::MemCtrl +{ + private: + + bool stallRds = false; + bool drainDramWrite = false; + bool drainNvmWrite = false; + + + unsigned maxConf = 0, + maxDrRdEv = 0, maxDrRdRespEv = 0, + maxDrWrEv = 0, + maxNvRdIssEv = 0, maxNvRdEv = 0, maxNvRdRespEv = 0, + maxNvWrEv = 0; + + unsigned numDirtyLinesInDrRdRespQ = 0; + + // For now, make use of a queued response port to avoid dealing with + // flow control for the responses being sent back + class MemoryPort : public QueuedResponsePort + { + RespPacketQueue queue; + DcacheCtrl& ctrl; + public: + MemoryPort(const std::string& name, DcacheCtrl& _ctrl); + protected: + Tick recvAtomic(PacketPtr pkt) override; + Tick recvAtomicBackdoor( + PacketPtr pkt, MemBackdoorPtr &backdoor) override; + void recvFunctional(PacketPtr pkt) override; + bool recvTimingReq(PacketPtr) override; + AddrRangeList getAddrRanges() const override; + }; + + + /** + * Our incoming port, for a multi-ported controller add a crossbar + * in front of it + */ + MemoryPort port; + + /** + * Remember if the memory system is in timing mode + */ + bool isTimingMode; + + /** + * Remember if we have to retry a request when available. + */ + bool retry; + + void printORB(); + void printCRB(); + void printAddrInitRead(); + void printAddrDramRespReady(); + void printNvmWritebackQueue(); + Addr returnTagDC(Addr pkt_addr, unsigned size); + Addr returnIndexDC(Addr pkt_addr, unsigned size); + + template + void clearQueue(Q & q) { + q = Q(); + } + + /** + * Bunch of things requires to setup "events" in gem5 + * When event "respondEvent" occurs for example, the method + * processRespondEvent is called; no parameters are allowed + * in these methods + */ + void processNextReqEvent(); + EventFunctionWrapper nextReqEvent; + + void processRespondEvent(); + EventFunctionWrapper respondEvent; + + /** + * processDramReadEvent() is an event handler which + * schedules the initial DRAM read accesses for every + * received packet by the DRAM Cache Controller. + */ + void processDramReadEvent(); + EventFunctionWrapper dramReadEvent; + + /** + * processRespDramReadEvent() is an event handler which + * handles the responses of the initial DRAM read accesses + * for the received packets by the DRAM Cache Controller. + */ + void processRespDramReadEvent(); + EventFunctionWrapper respDramReadEvent; + + /** + * processWaitingToIssueNvmReadEvent() is an event handler which + * handles the satte in which the packets that missed in DRAM cache + * will wait before being issued, if the NVM read has reached to the + * maximum number allowed for pending reads. + */ + void processWaitingToIssueNvmReadEvent(); + EventFunctionWrapper waitingToIssueNvmReadEvent; + + /** + * processNvmReadEvent() is an event handler which + * schedules the NVM read accesses in the DRAM Cache Controller. + */ + void processNvmReadEvent(); + EventFunctionWrapper nvmReadEvent; + + /** + * processRespNvmReadEvent() is an event handler which + * handles the responses of the NVM read accesses in + * the DRAM Cache Controller. + */ + void processRespNvmReadEvent(); + EventFunctionWrapper respNvmReadEvent; + + /** + * processOverallWriteEvent() is an event handler which + * handles all write accesses to DRAM and NVM. + */ + + void processOverallWriteEvent(); + EventFunctionWrapper overallWriteEvent; + + /** + * Actually do the burst based on media specific access function. + * Update bus statistics when complete. + * + * @param mem_pkt The memory packet created from the outside world pkt + * returns cmd_at tick + */ + Tick doBurstAccess(MemPacket* mem_pkt); + + /** + * When a packet reaches its "readyTime" in the response Q, + * use the "access()" method in AbstractMemory to actually + * create the response packet, and send it back to the outside + * world requestor. + * + * @param pkt The packet from the outside world + * @param static_latency Static latency to add before sending the packet + */ + void accessAndRespond(PacketPtr pkt, Tick static_latency, bool in_dram); + + /** + * Determine if there is a packet that can issue. + * + * @param pkt The packet to evaluate + */ + bool packetReady(MemPacket* pkt); + + /** + * Calculate the minimum delay used when scheduling a read-to-write + * transision. + * @param return minimum delay + */ + Tick minReadToWriteDataGap(); + + /** + * Calculate the minimum delay used when scheduling a write-to-read + * transision. + * @param return minimum delay + */ + Tick minWriteToReadDataGap(); + + + /** + * The memory schduler/arbiter - picks which request needs to + * go next, based on the specified policy such as FCFS or FR-FCFS + * and moves it to the head of the queue. + * Prioritizes accesses to the same rank as previous burst unless + * controller is switching command type. + * + * @param queue Queued requests to consider + * @param extra_col_delay Any extra delay due to a read/write switch + * @return an iterator to the selected packet, else queue.end() + */ + MemPacketQueue::iterator chooseNext(MemPacketQueue& queue, + Tick extra_col_delay, bool is_dram); + + /** + * For FR-FCFS policy reorder the read/write queue depending on row buffer + * hits and earliest bursts available in memory + * + * @param queue Queued requests to consider + * @param extra_col_delay Any extra delay due to a read/write switch + * @return an iterator to the selected packet, else queue.end() + */ + MemPacketQueue::iterator chooseNextFRFCFS(MemPacketQueue& queue, + Tick extra_col_delay, bool is_dram); + + /** + * Calculate burst window aligned tick + * + * @param cmd_tick Initial tick of command + * @return burst window aligned tick + */ + Tick getBurstWindow(Tick cmd_tick); + + /** + * Burst-align an address. + * + * @param addr The potentially unaligned address + * @param is_dram Does this packet access DRAM? + * + * @return An address aligned to a memory burst + */ + Addr burstAlign(Addr addr, bool is_dram) const; + + /** + * To avoid iterating over the outstanding requests buffer + * to check for overlapping transactions, maintain a set + * of burst addresses that are currently queued. + * Since we merge writes to the same location we never + * have more than one address to the same burst address. + */ + std::unordered_set isInWriteQueue; + + struct tagMetaStoreEntry { + // DRAM cache related metadata + Addr tagDC; + Addr indexDC; + // constant to indicate that the cache line is valid + bool validLine = false; + // constant to indicate that the cache line is dirty + bool dirtyLine = false; + Addr nvmAddr; + }; + + /** A storage to keep the tag and metadata for the + * DRAM Cache entries. + */ + std::vector tagMetadataStore; + + /** Different states a packet can transition from one + * to the other while it's process in the DRAM Cache + * Controller. + */ + enum reqState { dramRead, dramWrite, + waitingToIssueNvmRead, nvmRead, nvmWrite}; + + /** + * A class for the entries of the + * outstanding request buffer. + */ + class reqBufferEntry { + public: + bool validEntry; + Tick arrivalTick; + + // DRAM cache related metadata + Addr tagDC; + Addr indexDC; + + // pointer to the outside world (ow) packet received from llc + const PacketPtr owPkt; + // pointer to the dram cache controller (dcc) packet + MemPacket* dccPkt; + + reqState state; + bool isHit; + bool conflict; + + Addr dirtyLineAddr; + bool handleDirtyLine; + + Tick drRd; + Tick drWr; + Tick nvWait; + Tick nvRd; + Tick nvWr; + + Tick nvmIssueReadyTime; + + // Tick dramRdCmdAt; + // Tick dramWrCmdAt; + // Tick nvmRdCmdAt; + // Tick nvmWrCmdAt; + + Tick dramRdDevTime; + Tick dramWrDevTime; + Tick nvmRdDevTime; + //Tick nvmWrDevTime; + + reqBufferEntry( + bool _validEntry, Tick _arrivalTick, + Addr _tagDC, Addr _indexDC, + PacketPtr _owPkt, MemPacket* _dccPkt, + reqState _state, bool _isHit, bool _conflict, + Addr _dirtyLineAddr, bool _handleDirtyLine, + Tick _drRd, Tick _drWr, Tick _nvWait, Tick _nvRd, Tick _nvWr, + Tick _nvmIssueReadyTime, + Tick _dramRdDevTime, Tick _dramWrDevTime, Tick _nvmRdDevTime) + : + validEntry(_validEntry), arrivalTick(_arrivalTick), + tagDC(_tagDC), indexDC(_indexDC), + owPkt( _owPkt), dccPkt(_dccPkt), + state(_state), isHit(_isHit), conflict(_conflict), + dirtyLineAddr(_dirtyLineAddr), handleDirtyLine(_handleDirtyLine), + drRd(_drRd), drWr(_drWr), + nvWait(_nvWait), nvRd(_nvRd), nvWr(_nvWr), + nvmIssueReadyTime(_nvmIssueReadyTime), + dramRdDevTime(_dramRdDevTime), dramWrDevTime(_dramWrDevTime), + nvmRdDevTime( _nvmRdDevTime) + { } + }; + + /** + * This is the outstanding request buffer data + * structure, the main DS within the DRAM Cache + * Controller. The key is the address, for each key + * the map returns a reqBufferEntry which maintains + * the entire info related to that address while it's + * been processed in the DRAM Cache controller. + */ + std::map reqBuffer; + + + typedef std::pair confReqBufferPair; + /** + * This is the second important data structure + * within the Dram Cache controller which hold + * received packets that had conflict with some + * other address(s) in the DRAM Cache that they + * are still under process in the controller. + * Once thoes addresses are finished processing, + * confReqBufferPair is consulted to see if any + * packet can be moved into outstanding request + * buffer and start processing in the DRAM Cache + * controller. + */ + std::vector confReqBuffer; + + /** + * To avoid iterating over the outstanding requests + * buffer for dramReadEvent handler, we maintain the + * required addresses in a fifo queue. + */ + std::deque addrInitRead; + // std::vector pktInitRead; + // MemPacketQueue pktInitRead; + std::vector pktDramRead; + + /** + * To avoid iterating over the outstanding requests + * buffer for respDramReadEvent handler, we maintain the + * required addresses in a fifo queue. + */ + std::deque addrDramRespReady; + + //priority queue ordered by earliest tick + typedef std::pair addrNvmReadPair; + + /** + * To maintain the packets missed in DRAM cache and + * now require to read NVM, this queue holds them in order, + * incase they can't be issued due to reaching to the maximum + * pending number of reads for NVM. + */ + std::priority_queue, + std::greater > addrWaitingToIssueNvmRead; + std::vector pktNvmReadWaitIssue; + + /** + * To avoid iterating over the outstanding requests + * buffer for nvmReadEvent handler, we maintain the + * required addresses in a priority queue. + */ + std::priority_queue, + std::greater > addrNvmRead; + + std::vector pktNvmRead; + + /** + * To avoid iterating over the outstanding requests + * buffer for respNvmReadEvent handler, we maintain the + * required addresses in a fifo queue. + */ + std::deque addrNvmRespReady; + + /** + * To avoid iterating over the outstanding requests + * buffer for dramWriteEvent handler, we maintain the + * required addresses in a fifo queue. + */ + std::deque addrDramFill; + std::vector pktDramWrite; + + /** + * To avoid iterating over the outstanding requests + * buffer for nvmWriteEvent handler, we maintain the + * required addresses in a fifo queue. + */ + typedef std::pair nvmWritePair; + std::priority_queue, + std::greater > nvmWritebackQueue; + std::vector pktNvmWrite; + + + void handleRequestorPkt(PacketPtr pkt); + void checkHitOrMiss(reqBufferEntry* orbEntry); + bool checkDirty(Addr addr); + void handleDirtyCacheLine(reqBufferEntry* orbEntry); + bool checkConflictInDramCache(PacketPtr pkt); + void checkConflictInCRB(reqBufferEntry* orbEntry); + bool resumeConflictingReq(reqBufferEntry* orbEntry); + void logStatsDcache(reqBufferEntry* orbEntry); + Tick earliestDirtyLineInDrRdResp(); + + /** + * Holds count of commands issued in burst window starting at + * defined Tick. This is used to ensure that the command bandwidth + * does not exceed the allowable media constraints. + */ + std::unordered_multiset burstTicks; + + /** + * Create pointer to interface of the actual dram media when connected + */ + DRAMInterface* const dram; + + /** + * Create pointer to interface of the actual nvm media when connected + */ + + NVMInterface* const nvm; + + /** + * The following are basic design parameters of the memory + * controller, and are initialized based on parameter values. + * The rowsPerBank is determined based on the capacity, number of + * ranks and banks, the burst size, and the row buffer size. + */ + unsigned long long dramCacheSize; + unsigned blockSize; + unsigned addrSize; + unsigned orbMaxSize; + unsigned orbSize; + unsigned crbMaxSize; + unsigned crbSize; + + unsigned writeHighThreshold; + unsigned writeLowThreshold; + unsigned minWritesPerSwitch; + float dramWrDrainPerc; + unsigned minDrWrPerSwitch; + unsigned minNvWrPerSwitch; + unsigned drWrCounter; + unsigned nvWrCounter; + + /** + * Memory controller configuration initialized based on parameter + * values. + */ + Enums::MemSched memSchedPolicy; + + /** + * Pipeline latency of the controller frontend. The frontend + * contribution is added to writes (that complete when they are in + * the write buffer) and reads that are serviced the write buffer. + */ + const Tick frontendLatency; + + /** + * Pipeline latency of the backend and PHY. Along with the + * frontend contribution, this latency is added to reads serviced + * by the memory. + */ + const Tick backendLatency; + + /** + * Length of a command window, used to check + * command bandwidth + */ + const Tick commandWindow; + + /** + * Till when must we wait before issuing next RD/WR burst? + */ + Tick nextBurstAt; + + Tick prevArrival; + + /** + * The soonest you have to start thinking about the next request + * is the longest access time that can occur before + * nextBurstAt. Assuming you need to precharge, open a new row, + * and access, it is tRP + tRCD + tCL. + */ + Tick nextReqTime; + + struct CtrlStats : public Stats::Group + { + CtrlStats(DcacheCtrl &ctrl); + + void regStats() override; + + DcacheCtrl &ctrl; + + // All statistics that the model needs to capture + Stats::Scalar readReqs; + Stats::Scalar writeReqs; + Stats::Scalar readBursts; + Stats::Scalar writeBursts; + Stats::Scalar servicedByWrQ; + Stats::Scalar mergedWrBursts; + //Stats::Scalar neitherReadNorWriteReqs; + // Average queue lengths + Stats::Average avgRdQLen; + Stats::Average avgWrQLen; + + Stats::Scalar numRdRetry; + Stats::Scalar numWrRetry; + Stats::Vector readPktSize; + Stats::Vector writePktSize; + //Stats::Vector rdQLenPdf; + //Stats::Vector wrQLenPdf; + //Stats::Histogram rdPerTurnAround; + //Stats::Histogram wrPerTurnAround; + Stats::Scalar rdToWrTurnAround; + Stats::Scalar wrToRdTurnAround; + + Stats::Scalar bytesReadWrQ; + Stats::Scalar bytesReadSys; + Stats::Scalar bytesWrittenSys; + // Average bandwidth + Stats::Formula avgRdBWSys; + Stats::Formula avgWrBWSys; + + Stats::Scalar totGap; + Stats::Formula avgGap; + + // per-requestor bytes read and written to memory + Stats::Vector requestorReadBytes; + Stats::Vector requestorWriteBytes; + + // per-requestor bytes read and written to memory rate + Stats::Formula requestorReadRate; + Stats::Formula requestorWriteRate; + + // per-requestor read and write serviced memory accesses + Stats::Vector requestorReadAccesses; + Stats::Vector requestorWriteAccesses; + + // per-requestor read and write total memory access latency + Stats::Vector requestorReadTotalLat; + Stats::Vector requestorWriteTotalLat; + + // per-requestor raed and write average memory access latency + Stats::Formula requestorReadAvgLat; + Stats::Formula requestorWriteAvgLat; + + Stats::Scalar numHits; + Stats::Scalar numMisses; + Stats::Scalar numRdHits; + Stats::Scalar numWrHits; + Stats::Scalar numRdMisses; + Stats::Scalar numWrMisses; + Stats::Scalar numColdMisses; + Stats::Scalar numHotMisses; + Stats::Scalar numWrBacks; + Stats::Scalar totNumConf; + Stats::Scalar totNumConfBufFull; + + Stats::Scalar timeInDramRead; + Stats::Scalar timeInDramWrite; + Stats::Scalar timeInWaitingToIssueNvmRead; + Stats::Scalar timeInNvmRead; + Stats::Scalar timeInNvmWrite; + + Stats::Scalar drRdQingTime; + Stats::Scalar drWrQingTime; + Stats::Scalar nvmRdQingTime; + Stats::Scalar nvmWrQingTime; + + Stats::Scalar drRdDevTime; + Stats::Scalar drWrDevTime; + Stats::Scalar nvRdDevTime; + Stats::Scalar nvWrDevTime; + + Stats::Scalar totNumPktsDrRd; + Stats::Scalar totNumPktsDrWr; + Stats::Scalar totNumPktsNvmRdWait; + Stats::Scalar totNumPktsNvmRd; + Stats::Scalar totNumPktsNvmWr; + + Stats::Scalar maxNumConf; + Stats::Scalar maxDrRdEvQ; + Stats::Scalar maxDrRdRespEvQ; + Stats::Scalar maxDrWrEvQ; + Stats::Scalar maxNvRdIssEvQ; + Stats::Scalar maxNvRdEvQ; + Stats::Scalar maxNvRdRespEvQ; + Stats::Scalar maxNvWrEvQ; + }; + + CtrlStats stats; + + /** + * Upstream caches need this packet until true is returned, so + * hold it for deletion until a subsequent call + */ + std::unique_ptr pendingDelete; + + /** + * Remove commands that have already issued from burstTicks + */ + void pruneBurstTick(); + + public: + + DcacheCtrl(const DcacheCtrlParams &p); + + + /** + * Ensure that all interfaced have drained commands + * + * @return bool flag, set once drain complete + */ + bool allIntfDrained() const; + + DrainState drain() override; + + /** + * Check for command bus contention for single cycle command. + * If there is contention, shift command to next burst. + * Check verifies that the commands issued per burst is less + * than a defined max number, maxCommandsPerWindow. + * Therefore, contention per cycle is not verified and instead + * is done based on a burst window. + * + * @param cmd_tick Initial tick of command, to be verified + * @param max_cmds_per_burst Number of commands that can issue + * in a burst window + * @return tick for command issue without contention + */ + Tick verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst); + + /** + * Check for command bus contention for multi-cycle (2 currently) + * command. If there is contention, shift command(s) to next burst. + * Check verifies that the commands issued per burst is less + * than a defined max number, maxCommandsPerWindow. + * Therefore, contention per cycle is not verified and instead + * is done based on a burst window. + * + * @param cmd_tick Initial tick of command, to be verified + * @param max_multi_cmd_split Maximum delay between commands + * @param max_cmds_per_burst Number of commands that can issue + * in a burst window + * @return tick for command issue without contention + */ + Tick verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst, + Tick max_multi_cmd_split = 0); + + /** + * Is there a respondEvent scheduled? + * + * @return true if event is scheduled + */ + bool respondEventScheduled() const { return respondEvent.scheduled(); } + + /** + * Is there a read/write burst Event scheduled? + * + * @return true if event is scheduled + */ + bool requestEventScheduled() const { return nextReqEvent.scheduled(); } + + /** + * restart the controller + * This can be used by interfaces to restart the + * scheduler after maintainence commands complete + * + * @param Tick to schedule next event + */ + void restartScheduler(Tick tick) { schedule(nextReqEvent, tick); } + + /** + * Check the current direction of the memory channel + * + * @param next_state Check either the current or next bus state + * @return True when bus is currently in a read state + */ + bool inReadBusState(bool next_state) const; + + /** + * Check the current direction of the memory channel + * + * @param next_state Check either the current or next bus state + * @return True when bus is currently in a write state + */ + bool inWriteBusState(bool next_state) const; + + Port &getPort(const std::string &if_name, + PortID idx=InvalidPortID) override; + + virtual void init() override; + virtual void startup() override; + virtual void drainResume() override; + + protected: + + Tick recvAtomic(PacketPtr pkt); + Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor); + void recvFunctional(PacketPtr pkt); + bool recvTimingReq(PacketPtr pkt); + +}; + +#endif //__DCACHE_CTRL_HH__ diff --git a/src/mem/mem_ctrl.hh b/src/mem/mem_ctrl.hh index dd13e3ce09..cf399cb3ec 100644 --- a/src/mem/mem_ctrl.hh +++ b/src/mem/mem_ctrl.hh @@ -63,6 +63,7 @@ class DRAMInterface; class NVMInterface; + /** * A burst helper helps organize and manage a packet that is larger than * the memory burst size. A system packet that is larger than the burst size @@ -71,17 +72,17 @@ class NVMInterface; */ class BurstHelper { - public: - - /** Number of bursts requred for a system packet **/ - const unsigned int burstCount; +public: + /** Number of bursts requred for a system packet **/ + const unsigned int burstCount; - /** Number of bursts serviced so far for a system packet **/ - unsigned int burstsServiced; + /** Number of bursts serviced so far for a system packet **/ + unsigned int burstsServiced; - BurstHelper(unsigned int _burstCount) - : burstCount(_burstCount), burstsServiced(0) - { } + BurstHelper(unsigned int _burstCount) + : burstCount(_burstCount), burstsServiced(0) + { + } }; /** @@ -90,125 +91,123 @@ class BurstHelper */ class MemPacket { - public: - - /** When did request enter the controller */ - const Tick entryTime; - - /** When will request leave the controller */ - Tick readyTime; - - /** This comes from the outside world */ - const PacketPtr pkt; - - /** RequestorID associated with the packet */ - const RequestorID _requestorId; - - const bool read; - - /** Does this packet access DRAM?*/ - const bool dram; - - /** Will be populated by address decoder */ - const uint8_t rank; - const uint8_t bank; - const uint32_t row; - - /** - * Bank id is calculated considering banks in all the ranks - * eg: 2 ranks each with 8 banks, then bankId = 0 --> rank0, bank0 and - * bankId = 8 --> rank1, bank0 - */ - const uint16_t bankId; - - /** - * The starting address of the packet. - * This address could be unaligned to burst size boundaries. The - * reason is to keep the address offset so we can accurately check - * incoming read packets with packets in the write queue. - */ - Addr addr; - - /** - * The size of this dram packet in bytes - * It is always equal or smaller than the burst size - */ - unsigned int size; - - /** - * A pointer to the BurstHelper if this MemPacket is a split packet - * If not a split packet (common case), this is set to NULL - */ - BurstHelper* burstHelper; - - /** - * QoS value of the encapsulated packet read at queuing time - */ - uint8_t _qosValue; - - /** - * Set the packet QoS value - * (interface compatibility with Packet) - */ - inline void qosValue(const uint8_t qv) { _qosValue = qv; } - - /** - * Get the packet QoS value - * (interface compatibility with Packet) - */ - inline uint8_t qosValue() const { return _qosValue; } - - /** - * Get the packet RequestorID - * (interface compatibility with Packet) - */ - inline RequestorID requestorId() const { return _requestorId; } - - /** - * Get the packet size - * (interface compatibility with Packet) - */ - inline unsigned int getSize() const { return size; } - - /** - * Get the packet address - * (interface compatibility with Packet) - */ - inline Addr getAddr() const { return addr; } - - /** - * Return true if its a read packet - * (interface compatibility with Packet) - */ - inline bool isRead() const { return read; } - - /** - * Return true if its a write packet - * (interface compatibility with Packet) - */ - inline bool isWrite() const { return !read; } - - /** - * Return true if its a DRAM access - */ - inline bool isDram() const { return dram; } - - MemPacket(PacketPtr _pkt, bool is_read, bool is_dram, uint8_t _rank, - uint8_t _bank, uint32_t _row, uint16_t bank_id, Addr _addr, - unsigned int _size) - : entryTime(curTick()), readyTime(curTick()), pkt(_pkt), - _requestorId(pkt->requestorId()), - read(is_read), dram(is_dram), rank(_rank), bank(_bank), row(_row), - bankId(bank_id), addr(_addr), size(_size), burstHelper(NULL), - _qosValue(_pkt->qosValue()) - { } - +public: + /** When did request enter the controller */ + Tick entryTime; + + /** When will request leave the controller */ + Tick readyTime; + + /** This comes from the outside world */ + const PacketPtr pkt; + + /** RequestorID associated with the packet */ + const RequestorID _requestorId; + + const bool read; + + /** Does this packet access DRAM?*/ + const bool dram; + + /** Will be populated by address decoder */ + const uint8_t rank; + const uint8_t bank; + const uint32_t row; + + /** + * Bank id is calculated considering banks in all the ranks + * eg: 2 ranks each with 8 banks, then bankId = 0 --> rank0, bank0 and + * bankId = 8 --> rank1, bank0 + */ + const uint16_t bankId; + + /** + * The starting address of the packet. + * This address could be unaligned to burst size boundaries. The + * reason is to keep the address offset so we can accurately check + * incoming read packets with packets in the write queue. + */ + Addr addr; + + /** + * The size of this dram packet in bytes + * It is always equal or smaller than the burst size + */ + unsigned int size; + + /** + * A pointer to the BurstHelper if this MemPacket is a split packet + * If not a split packet (common case), this is set to NULL + */ + BurstHelper *burstHelper; + + /** + * QoS value of the encapsulated packet read at queuing time + */ + uint8_t _qosValue; + + /** + * Set the packet QoS value + * (interface compatibility with Packet) + */ + inline void qosValue(const uint8_t qv) { _qosValue = qv; } + + /** + * Get the packet QoS value + * (interface compatibility with Packet) + */ + inline uint8_t qosValue() const { return _qosValue; } + + /** + * Get the packet RequestorID + * (interface compatibility with Packet) + */ + inline RequestorID requestorId() const { return _requestorId; } + + /** + * Get the packet size + * (interface compatibility with Packet) + */ + inline unsigned int getSize() const { return size; } + + /** + * Get the packet address + * (interface compatibility with Packet) + */ + inline Addr getAddr() const { return addr; } + + /** + * Return true if its a read packet + * (interface compatibility with Packet) + */ + inline bool isRead() const { return read; } + + /** + * Return true if its a write packet + * (interface compatibility with Packet) + */ + inline bool isWrite() const { return !read; } + + /** + * Return true if its a DRAM access + */ + inline bool isDram() const { return dram; } + + MemPacket(PacketPtr _pkt, bool is_read, bool is_dram, uint8_t _rank, + uint8_t _bank, uint32_t _row, uint16_t bank_id, Addr _addr, + unsigned int _size) + : entryTime(curTick()), readyTime(curTick()), pkt(_pkt), + _requestorId((_pkt != nullptr) ? _pkt->requestorId() : -1), + read(is_read), dram(is_dram), rank(_rank), bank(_bank), row(_row), + bankId(bank_id), addr(_addr), size(_size), burstHelper(NULL), + _qosValue((_pkt != nullptr) ? _pkt->qosValue() : -1) + { + } }; // The memory packets are store in a multiple dequeue structure, // based on their QoS priority -typedef std::deque MemPacketQueue; - +typedef std::deque MemPacketQueue; /** * The memory controller is a single-channel memory controller capturing @@ -233,480 +232,471 @@ typedef std::deque MemPacketQueue; * please cite the paper. * */ + class MemCtrl : public QoS::MemCtrl { - private: - - // For now, make use of a queued response port to avoid dealing with - // flow control for the responses being sent back - class MemoryPort : public QueuedResponsePort +protected: + // For now, make use of a queued response port to avoid dealing with + // flow control for the responses being sent back + class MemoryPort : public QueuedResponsePort { - RespPacketQueue queue; MemCtrl& ctrl; - public: - MemoryPort(const std::string& name, MemCtrl& _ctrl); - protected: - Tick recvAtomic(PacketPtr pkt) override; Tick recvAtomicBackdoor( PacketPtr pkt, MemBackdoorPtr &backdoor) override; - void recvFunctional(PacketPtr pkt) override; - bool recvTimingReq(PacketPtr) override; - AddrRangeList getAddrRanges() const override; - - }; - - /** - * Our incoming port, for a multi-ported controller add a crossbar - * in front of it - */ - MemoryPort port; - - /** - * Remember if the memory system is in timing mode - */ - bool isTimingMode; - - /** - * Remember if we have to retry a request when available. - */ - bool retryRdReq; - bool retryWrReq; - - /** - * Bunch of things requires to setup "events" in gem5 - * When event "respondEvent" occurs for example, the method - * processRespondEvent is called; no parameters are allowed - * in these methods - */ - void processNextReqEvent(); - EventFunctionWrapper nextReqEvent; - - void processRespondEvent(); - EventFunctionWrapper respondEvent; - - /** - * Check if the read queue has room for more entries - * - * @param pkt_count The number of entries needed in the read queue - * @return true if read queue is full, false otherwise - */ - bool readQueueFull(unsigned int pkt_count) const; - - /** - * Check if the write queue has room for more entries - * - * @param pkt_count The number of entries needed in the write queue - * @return true if write queue is full, false otherwise - */ - bool writeQueueFull(unsigned int pkt_count) const; - - /** - * When a new read comes in, first check if the write q has a - * pending request to the same address.\ If not, decode the - * address to populate rank/bank/row, create one or mutliple - * "mem_pkt", and push them to the back of the read queue.\ - * If this is the only - * read request in the system, schedule an event to start - * servicing it. - * - * @param pkt The request packet from the outside world - * @param pkt_count The number of memory bursts the pkt - * @param is_dram Does this packet access DRAM? - * translate to. If pkt size is larger then one full burst, - * then pkt_count is greater than one. - */ - void addToReadQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram); - - /** - * Decode the incoming pkt, create a mem_pkt and push to the - * back of the write queue. \If the write q length is more than - * the threshold specified by the user, ie the queue is beginning - * to get full, stop reads, and start draining writes. - * - * @param pkt The request packet from the outside world - * @param pkt_count The number of memory bursts the pkt - * @param is_dram Does this packet access DRAM? - * translate to. If pkt size is larger then one full burst, - * then pkt_count is greater than one. - */ - void addToWriteQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram); - - /** - * Actually do the burst based on media specific access function. - * Update bus statistics when complete. - * - * @param mem_pkt The memory packet created from the outside world pkt - */ - void doBurstAccess(MemPacket* mem_pkt); - - /** - * When a packet reaches its "readyTime" in the response Q, - * use the "access()" method in AbstractMemory to actually - * create the response packet, and send it back to the outside - * world requestor. - * - * @param pkt The packet from the outside world - * @param static_latency Static latency to add before sending the packet - */ - void accessAndRespond(PacketPtr pkt, Tick static_latency); - - /** - * Determine if there is a packet that can issue. - * - * @param pkt The packet to evaluate - */ - bool packetReady(MemPacket* pkt); - - /** - * Calculate the minimum delay used when scheduling a read-to-write - * transision. - * @param return minimum delay - */ - Tick minReadToWriteDataGap(); - - /** - * Calculate the minimum delay used when scheduling a write-to-read - * transision. - * @param return minimum delay - */ - Tick minWriteToReadDataGap(); - - /** - * The memory schduler/arbiter - picks which request needs to - * go next, based on the specified policy such as FCFS or FR-FCFS - * and moves it to the head of the queue. - * Prioritizes accesses to the same rank as previous burst unless - * controller is switching command type. - * - * @param queue Queued requests to consider - * @param extra_col_delay Any extra delay due to a read/write switch - * @return an iterator to the selected packet, else queue.end() - */ - MemPacketQueue::iterator chooseNext(MemPacketQueue& queue, - Tick extra_col_delay); - - /** - * For FR-FCFS policy reorder the read/write queue depending on row buffer - * hits and earliest bursts available in memory - * - * @param queue Queued requests to consider - * @param extra_col_delay Any extra delay due to a read/write switch - * @return an iterator to the selected packet, else queue.end() - */ - MemPacketQueue::iterator chooseNextFRFCFS(MemPacketQueue& queue, - Tick extra_col_delay); - - /** - * Calculate burst window aligned tick - * - * @param cmd_tick Initial tick of command - * @return burst window aligned tick - */ - Tick getBurstWindow(Tick cmd_tick); - - /** - * Used for debugging to observe the contents of the queues. - */ - void printQs() const; - - /** - * Burst-align an address. - * - * @param addr The potentially unaligned address - * @param is_dram Does this packet access DRAM? - * - * @return An address aligned to a memory burst - */ - Addr burstAlign(Addr addr, bool is_dram) const; - - /** - * The controller's main read and write queues, - * with support for QoS reordering - */ - std::vector readQueue; - std::vector writeQueue; - - /** - * To avoid iterating over the write queue to check for - * overlapping transactions, maintain a set of burst addresses - * that are currently queued. Since we merge writes to the same - * location we never have more than one address to the same burst - * address. - */ - std::unordered_set isInWriteQueue; - - /** - * Response queue where read packets wait after we're done working - * with them, but it's not time to send the response yet. The - * responses are stored separately mostly to keep the code clean - * and help with events scheduling. For all logical purposes such - * as sizing the read queue, this and the main read queue need to - * be added together. - */ - std::deque respQueue; - - /** - * Holds count of commands issued in burst window starting at - * defined Tick. This is used to ensure that the command bandwidth - * does not exceed the allowable media constraints. - */ - std::unordered_multiset burstTicks; - - /** - * Create pointer to interface of the actual dram media when connected - */ - DRAMInterface* const dram; - - /** - * Create pointer to interface of the actual nvm media when connected - */ - NVMInterface* const nvm; - - /** - * The following are basic design parameters of the memory - * controller, and are initialized based on parameter values. - * The rowsPerBank is determined based on the capacity, number of - * ranks and banks, the burst size, and the row buffer size. - */ - const uint32_t readBufferSize; - const uint32_t writeBufferSize; - const uint32_t writeHighThreshold; - const uint32_t writeLowThreshold; - const uint32_t minWritesPerSwitch; - uint32_t writesThisTime; - uint32_t readsThisTime; - - /** - * Memory controller configuration initialized based on parameter - * values. - */ - Enums::MemSched memSchedPolicy; - - /** - * Pipeline latency of the controller frontend. The frontend - * contribution is added to writes (that complete when they are in - * the write buffer) and reads that are serviced the write buffer. - */ - const Tick frontendLatency; - - /** - * Pipeline latency of the backend and PHY. Along with the - * frontend contribution, this latency is added to reads serviced - * by the memory. - */ - const Tick backendLatency; - - /** - * Length of a command window, used to check - * command bandwidth - */ - const Tick commandWindow; - - /** - * Till when must we wait before issuing next RD/WR burst? - */ - Tick nextBurstAt; - - Tick prevArrival; - - /** - * The soonest you have to start thinking about the next request - * is the longest access time that can occur before - * nextBurstAt. Assuming you need to precharge, open a new row, - * and access, it is tRP + tRCD + tCL. - */ - Tick nextReqTime; - - struct CtrlStats : public Stats::Group - { - CtrlStats(MemCtrl &ctrl); - - void regStats() override; - - MemCtrl &ctrl; - - // All statistics that the model needs to capture - Stats::Scalar readReqs; - Stats::Scalar writeReqs; - Stats::Scalar readBursts; - Stats::Scalar writeBursts; - Stats::Scalar servicedByWrQ; - Stats::Scalar mergedWrBursts; - Stats::Scalar neitherReadNorWriteReqs; - // Average queue lengths - Stats::Average avgRdQLen; - Stats::Average avgWrQLen; - - Stats::Scalar numRdRetry; - Stats::Scalar numWrRetry; - Stats::Vector readPktSize; - Stats::Vector writePktSize; - Stats::Vector rdQLenPdf; - Stats::Vector wrQLenPdf; - Stats::Histogram rdPerTurnAround; - Stats::Histogram wrPerTurnAround; - - Stats::Scalar bytesReadWrQ; - Stats::Scalar bytesReadSys; - Stats::Scalar bytesWrittenSys; - // Average bandwidth - Stats::Formula avgRdBWSys; - Stats::Formula avgWrBWSys; - - Stats::Scalar totGap; - Stats::Formula avgGap; - - // per-requestor bytes read and written to memory - Stats::Vector requestorReadBytes; - Stats::Vector requestorWriteBytes; - - // per-requestor bytes read and written to memory rate - Stats::Formula requestorReadRate; - Stats::Formula requestorWriteRate; - - // per-requestor read and write serviced memory accesses - Stats::Vector requestorReadAccesses; - Stats::Vector requestorWriteAccesses; - - // per-requestor read and write total memory access latency - Stats::Vector requestorReadTotalLat; - Stats::Vector requestorWriteTotalLat; - - // per-requestor raed and write average memory access latency - Stats::Formula requestorReadAvgLat; - Stats::Formula requestorWriteAvgLat; - }; - - CtrlStats stats; - - /** - * Upstream caches need this packet until true is returned, so - * hold it for deletion until a subsequent call - */ - std::unique_ptr pendingDelete; - - /** - * Select either the read or write queue - * - * @param is_read The current burst is a read, select read queue - * @return a reference to the appropriate queue - */ - std::vector& selQueue(bool is_read) - { - return (is_read ? readQueue : writeQueue); }; - /** - * Remove commands that have already issued from burstTicks - */ - void pruneBurstTick(); - - public: - - MemCtrl(const MemCtrlParams &p); - - /** - * Ensure that all interfaced have drained commands - * - * @return bool flag, set once drain complete - */ - bool allIntfDrained() const; - - DrainState drain() override; - - /** - * Check for command bus contention for single cycle command. - * If there is contention, shift command to next burst. - * Check verifies that the commands issued per burst is less - * than a defined max number, maxCommandsPerWindow. - * Therefore, contention per cycle is not verified and instead - * is done based on a burst window. - * - * @param cmd_tick Initial tick of command, to be verified - * @param max_cmds_per_burst Number of commands that can issue - * in a burst window - * @return tick for command issue without contention - */ - Tick verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst); - - /** - * Check for command bus contention for multi-cycle (2 currently) - * command. If there is contention, shift command(s) to next burst. - * Check verifies that the commands issued per burst is less - * than a defined max number, maxCommandsPerWindow. - * Therefore, contention per cycle is not verified and instead - * is done based on a burst window. - * - * @param cmd_tick Initial tick of command, to be verified - * @param max_multi_cmd_split Maximum delay between commands - * @param max_cmds_per_burst Number of commands that can issue - * in a burst window - * @return tick for command issue without contention - */ - Tick verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst, - Tick max_multi_cmd_split = 0); - - /** - * Is there a respondEvent scheduled? - * - * @return true if event is scheduled - */ - bool respondEventScheduled() const { return respondEvent.scheduled(); } - - /** - * Is there a read/write burst Event scheduled? - * - * @return true if event is scheduled - */ - bool requestEventScheduled() const { return nextReqEvent.scheduled(); } - - /** - * restart the controller - * This can be used by interfaces to restart the - * scheduler after maintainence commands complete - * - * @param Tick to schedule next event - */ - void restartScheduler(Tick tick) { schedule(nextReqEvent, tick); } - - /** - * Check the current direction of the memory channel - * - * @param next_state Check either the current or next bus state - * @return True when bus is currently in a read state - */ - bool inReadBusState(bool next_state) const; - - /** - * Check the current direction of the memory channel - * - * @param next_state Check either the current or next bus state - * @return True when bus is currently in a write state - */ - bool inWriteBusState(bool next_state) const; - - Port &getPort(const std::string &if_name, - PortID idx=InvalidPortID) override; - - virtual void init() override; - virtual void startup() override; - virtual void drainResume() override; - - protected: - - Tick recvAtomic(PacketPtr pkt); - Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor); - void recvFunctional(PacketPtr pkt); - bool recvTimingReq(PacketPtr pkt); + /** + * Our incoming port, for a multi-ported controller add a crossbar + * in front of it + */ + MemoryPort port; + + /** + * Remember if the memory system is in timing mode + */ + bool isTimingMode; + + /** + * Remember if we have to retry a request when available. + */ + bool retryRdReq; + bool retryWrReq; + + /** + * Bunch of things requires to setup "events" in gem5 + * When event "respondEvent" occurs for example, the method + * processRespondEvent is called; no parameters are allowed + * in these methods + */ + void processNextReqEvent(); + EventFunctionWrapper nextReqEvent; + + void processRespondEvent(); + EventFunctionWrapper respondEvent; + + /** + * Check if the read queue has room for more entries + * + * @param pkt_count The number of entries needed in the read queue + * @return true if read queue is full, false otherwise + */ + bool readQueueFull(unsigned int pkt_count) const; + + /** + * Check if the write queue has room for more entries + * + * @param pkt_count The number of entries needed in the write queue + * @return true if write queue is full, false otherwise + */ + bool writeQueueFull(unsigned int pkt_count) const; + + /** + * When a new read comes in, first check if the write q has a + * pending request to the same address.\ If not, decode the + * address to populate rank/bank/row, create one or mutliple + * "mem_pkt", and push them to the back of the read queue.\ + * If this is the only + * read request in the system, schedule an event to start + * servicing it. + * + * @param pkt The request packet from the outside world + * @param pkt_count The number of memory bursts the pkt + * @param is_dram Does this packet access DRAM? + * translate to. If pkt size is larger then one full burst, + * then pkt_count is greater than one. + */ + void addToReadQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram); + + /** + * Decode the incoming pkt, create a mem_pkt and push to the + * back of the write queue. \If the write q length is more than + * the threshold specified by the user, ie the queue is beginning + * to get full, stop reads, and start draining writes. + * + * @param pkt The request packet from the outside world + * @param pkt_count The number of memory bursts the pkt + * @param is_dram Does this packet access DRAM? + * translate to. If pkt size is larger then one full burst, + * then pkt_count is greater than one. + */ + void addToWriteQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram); + + /** + * Actually do the burst based on media specific access function. + * Update bus statistics when complete. + * + * @param mem_pkt The memory packet created from the outside world pkt + */ + void doBurstAccess(MemPacket *mem_pkt); + + /** + * When a packet reaches its "readyTime" in the response Q, + * use the "access()" method in AbstractMemory to actually + * create the response packet, and send it back to the outside + * world requestor. + * + * @param pkt The packet from the outside world + * @param static_latency Static latency to add before sending the packet + */ + void accessAndRespond(PacketPtr pkt, Tick static_latency); + + /** + * Determine if there is a packet that can issue. + * + * @param pkt The packet to evaluate + */ + bool packetReady(MemPacket *pkt); + + /** + * Calculate the minimum delay used when scheduling a read-to-write + * transision. + * @param return minimum delay + */ + Tick minReadToWriteDataGap(); + + /** + * Calculate the minimum delay used when scheduling a write-to-read + * transision. + * @param return minimum delay + */ + Tick minWriteToReadDataGap(); + + /** + * The memory schduler/arbiter - picks which request needs to + * go next, based on the specified policy such as FCFS or FR-FCFS + * and moves it to the head of the queue. + * Prioritizes accesses to the same rank as previous burst unless + * controller is switching command type. + * + * @param queue Queued requests to consider + * @param extra_col_delay Any extra delay due to a read/write switch + * @return an iterator to the selected packet, else queue.end() + */ + MemPacketQueue::iterator chooseNext(MemPacketQueue &queue, + Tick extra_col_delay); + + /** + * For FR-FCFS policy reorder the read/write queue depending on row buffer + * hits and earliest bursts available in memory + * + * @param queue Queued requests to consider + * @param extra_col_delay Any extra delay due to a read/write switch + * @return an iterator to the selected packet, else queue.end() + */ + MemPacketQueue::iterator chooseNextFRFCFS(MemPacketQueue &queue, + Tick extra_col_delay); + + /** + * Calculate burst window aligned tick + * + * @param cmd_tick Initial tick of command + * @return burst window aligned tick + */ + Tick getBurstWindow(Tick cmd_tick); + + /** + * Used for debugging to observe the contents of the queues. + */ + void printQs() const; + + /** + * Burst-align an address. + * + * @param addr The potentially unaligned address + * @param is_dram Does this packet access DRAM? + * + * @return An address aligned to a memory burst + */ + Addr burstAlign(Addr addr, bool is_dram) const; + + /** + * The controller's main read and write queues, + * with support for QoS reordering + */ + std::vector readQueue; + std::vector writeQueue; + + /** + * To avoid iterating over the write queue to check for + * overlapping transactions, maintain a set of burst addresses + * that are currently queued. Since we merge writes to the same + * location we never have more than one address to the same burst + * address. + */ + std::unordered_set isInWriteQueue; + + /** + * Response queue where read packets wait after we're done working + * with them, but it's not time to send the response yet. The + * responses are stored separately mostly to keep the code clean + * and help with events scheduling. For all logical purposes such + * as sizing the read queue, this and the main read queue need to + * be added together. + */ + std::deque respQueue; + + /** + * Holds count of commands issued in burst window starting at + * defined Tick. This is used to ensure that the command bandwidth + * does not exceed the allowable media constraints. + */ + std::unordered_multiset burstTicks; + + /** + * Create pointer to interface of the actual dram media when connected + */ + DRAMInterface *const dram; + + /** + * Create pointer to interface of the actual nvm media when connected + */ + NVMInterface *const nvm; + + /** + * The following are basic design parameters of the memory + * controller, and are initialized based on parameter values. + * The rowsPerBank is determined based on the capacity, number of + * ranks and banks, the burst size, and the row buffer size. + */ + const uint32_t readBufferSize; + const uint32_t writeBufferSize; + const uint32_t writeHighThreshold; + const uint32_t writeLowThreshold; + const uint32_t minWritesPerSwitch; + uint32_t writesThisTime; + uint32_t readsThisTime; + + /** + * Memory controller configuration initialized based on parameter + * values. + */ + Enums::MemSched memSchedPolicy; + + /** + * Pipeline latency of the controller frontend. The frontend + * contribution is added to writes (that complete when they are in + * the write buffer) and reads that are serviced the write buffer. + */ + const Tick frontendLatency; + + /** + * Pipeline latency of the backend and PHY. Along with the + * frontend contribution, this latency is added to reads serviced + * by the memory. + */ + const Tick backendLatency; + + /** + * Length of a command window, used to check + * command bandwidth + */ + const Tick commandWindow; + + /** + * Till when must we wait before issuing next RD/WR burst? + */ + Tick nextBurstAt; + + Tick prevArrival; + + /** + * The soonest you have to start thinking about the next request + * is the longest access time that can occur before + * nextBurstAt. Assuming you need to precharge, open a new row, + * and access, it is tRP + tRCD + tCL. + */ + Tick nextReqTime; + + struct CtrlStats : public Stats::Group + { + CtrlStats(MemCtrl &ctrl); + + void regStats() override; + + MemCtrl &ctrl; + + // All statistics that the model needs to capture + Stats::Scalar readReqs; + Stats::Scalar writeReqs; + Stats::Scalar readBursts; + Stats::Scalar writeBursts; + Stats::Scalar servicedByWrQ; + Stats::Scalar mergedWrBursts; + Stats::Scalar neitherReadNorWriteReqs; + // Average queue lengths + Stats::Average avgRdQLen; + Stats::Average avgWrQLen; + + Stats::Scalar numRdRetry; + Stats::Scalar numWrRetry; + Stats::Vector readPktSize; + Stats::Vector writePktSize; + Stats::Vector rdQLenPdf; + Stats::Vector wrQLenPdf; + Stats::Histogram rdPerTurnAround; + Stats::Histogram wrPerTurnAround; + + Stats::Scalar bytesReadWrQ; + Stats::Scalar bytesReadSys; + Stats::Scalar bytesWrittenSys; + // Average bandwidth + Stats::Formula avgRdBWSys; + Stats::Formula avgWrBWSys; + + Stats::Scalar totGap; + Stats::Formula avgGap; + + // per-requestor bytes read and written to memory + Stats::Vector requestorReadBytes; + Stats::Vector requestorWriteBytes; + + // per-requestor bytes read and written to memory rate + Stats::Formula requestorReadRate; + Stats::Formula requestorWriteRate; + + // per-requestor read and write serviced memory accesses + Stats::Vector requestorReadAccesses; + Stats::Vector requestorWriteAccesses; + + // per-requestor read and write total memory access latency + Stats::Vector requestorReadTotalLat; + Stats::Vector requestorWriteTotalLat; + + // per-requestor raed and write average memory access latency + Stats::Formula requestorReadAvgLat; + Stats::Formula requestorWriteAvgLat; + }; + + CtrlStats stats; + + /** + * Upstream caches need this packet until true is returned, so + * hold it for deletion until a subsequent call + */ + std::unique_ptr pendingDelete; + + /** + * Select either the read or write queue + * + * @param is_read The current burst is a read, select read queue + * @return a reference to the appropriate queue + */ + std::vector &selQueue(bool is_read) + { + return (is_read ? readQueue : writeQueue); + }; + + /** + * Remove commands that have already issued from burstTicks + */ + void pruneBurstTick(); + +public: + MemCtrl(const MemCtrlParams &p); + + /** + * Ensure that all interfaced have drained commands + * + * @return bool flag, set once drain complete + */ + bool allIntfDrained() const; + + DrainState drain() override; + + /** + * Check for command bus contention for single cycle command. + * If there is contention, shift command to next burst. + * Check verifies that the commands issued per burst is less + * than a defined max number, maxCommandsPerWindow. + * Therefore, contention per cycle is not verified and instead + * is done based on a burst window. + * + * @param cmd_tick Initial tick of command, to be verified + * @param max_cmds_per_burst Number of commands that can issue + * in a burst window + * @return tick for command issue without contention + */ + Tick verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst) override; + + /** + * Check for command bus contention for multi-cycle (2 currently) + * command. If there is contention, shift command(s) to next burst. + * Check verifies that the commands issued per burst is less + * than a defined max number, maxCommandsPerWindow. + * Therefore, contention per cycle is not verified and instead + * is done based on a burst window. + * + * @param cmd_tick Initial tick of command, to be verified + * @param max_multi_cmd_split Maximum delay between commands + * @param max_cmds_per_burst Number of commands that can issue + * in a burst window + * @return tick for command issue without contention + */ + Tick verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst, + Tick max_multi_cmd_split = 0) override; + + /** + * Is there a respondEvent scheduled? + * + * @return true if event is scheduled + */ + bool respondEventScheduled() const override + { return respondEvent.scheduled(); } + + /** + * Is there a read/write burst Event scheduled? + * + * @return true if event is scheduled + */ + bool requestEventScheduled() const override + { return nextReqEvent.scheduled(); } + + /** + * restart the controller + * This can be used by interfaces to restart the + * scheduler after maintainence commands complete + * + * @param Tick to schedule next event + */ + void restartScheduler(Tick tick) override { schedule(nextReqEvent, tick); } + + /** + * Check the current direction of the memory channel + * + * @param next_state Check either the current or next bus state + * @return True when bus is currently in a read state + */ + bool inReadBusState(bool next_state) const override; + + /** + * Check the current direction of the memory channel + * + * @param next_state Check either the current or next bus state + * @return True when bus is currently in a write state + */ + bool inWriteBusState(bool next_state) const override; + + Port &getPort(const std::string &if_name, + PortID idx = InvalidPortID) override; + + virtual void init() override; + virtual void startup() override; + virtual void drainResume() override; + +protected: + Tick recvAtomic(PacketPtr pkt); + Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor); + void recvFunctional(PacketPtr pkt); + bool recvTimingReq(PacketPtr pkt); }; #endif //__MEM_CTRL_HH__ diff --git a/src/mem/mem_interface.cc b/src/mem/mem_interface.cc index d81d34c59d..1a1ccc231f 100644 --- a/src/mem/mem_interface.cc +++ b/src/mem/mem_interface.cc @@ -73,9 +73,13 @@ MemInterface::MemInterface(const MemInterfaceParams &_p) {} void -MemInterface::setCtrl(MemCtrl* _ctrl, unsigned int command_window) +MemInterface::setCtrl(QoS::MemCtrl* _ctrl, unsigned int command_window) { - ctrl = _ctrl; + if (dynamic_cast(_ctrl) != nullptr) { + ctrl = dynamic_cast(_ctrl); + } else { + ctrl = dynamic_cast(_ctrl); + } maxCommandsPerWindow = command_window / tCK; } @@ -713,6 +717,266 @@ DRAMInterface::doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at, return std::make_pair(cmd_at, cmd_at + burst_gap); } +std::pair +DRAMInterface::doBurstAccess(MemPacket* dcc_pkt, Tick next_burst_at) + //,const std::vector& queue) +{ + DPRINTF(DRAM, "Timing access to addr %lld, rank/bank/row %d %d %d\n", + dcc_pkt->addr, dcc_pkt->rank, dcc_pkt->bank, dcc_pkt->row); + + // get the rank + Rank& rank_ref = *ranks[dcc_pkt->rank]; + assert(rank_ref.inRefIdleState()); + + // are we in or transitioning to a low-power state and have not scheduled + // a power-up event? + // if so, wake up from power down to issue RD/WR burst + if (rank_ref.inLowPowerState) { + assert(rank_ref.pwrState != PWR_SREF); + rank_ref.scheduleWakeUpEvent(tXP); + } + + // get the bank + Bank& bank_ref = rank_ref.banks[dcc_pkt->bank]; + + // for the state we need to track if it is a row hit or not + bool row_hit = true; + + // Determine the access latency and update the bank state + if (bank_ref.openRow == dcc_pkt->row) { + // nothing to do + } else { + row_hit = false; + + // If there is a page open, precharge it. + if (bank_ref.openRow != Bank::NO_ROW) { + prechargeBank(rank_ref, bank_ref, std::max(bank_ref.preAllowedAt, + curTick())); + } + + // next we need to account for the delay in activating the page + Tick act_tick = std::max(bank_ref.actAllowedAt, curTick()); + + // Record the activation and deal with all the global timing + // constraints caused be a new activation (tRRD and tXAW) + activateBank(rank_ref, bank_ref, act_tick, dcc_pkt->row); + } + + // respect any constraints on the command (e.g. tRCD or tCCD) + const Tick col_allowed_at = dcc_pkt->isRead() ? + bank_ref.rdAllowedAt : bank_ref.wrAllowedAt; + + // we need to wait until the bus is available before we can issue + // the command; need to ensure minimum bus delay requirement is met + Tick cmd_at = std::max({col_allowed_at, next_burst_at, curTick()}); + + // verify that we have command bandwidth to issue the burst + // if not, shift to next burst window + if (dataClockSync && ((cmd_at - rank_ref.lastBurstTick) > clkResyncDelay)) + cmd_at = ctrl->verifyMultiCmd(cmd_at, maxCommandsPerWindow, tCK); + else + cmd_at = ctrl->verifySingleCmd(cmd_at, maxCommandsPerWindow); + + // if we are interleaving bursts, ensure that + // 1) we don't double interleave on next burst issue + // 2) we are at an interleave boundary; if not, shift to next boundary + Tick burst_gap = tBURST_MIN; + if (burstInterleave) { + if (cmd_at == (rank_ref.lastBurstTick + tBURST_MIN)) { + // already interleaving, push next command to end of full burst + burst_gap = tBURST; + } else if (cmd_at < (rank_ref.lastBurstTick + tBURST)) { + // not at an interleave boundary after bandwidth check + // Shift command to tBURST boundary to avoid data contention + // Command will remain in the same burst window given that + // tBURST is less than tBURST_MAX + cmd_at = rank_ref.lastBurstTick + tBURST; + } + } + DPRINTF(DRAM, "Schedule RD/WR burst at tick %d\n", cmd_at); + + // update the packet ready time + dcc_pkt->readyTime = cmd_at + tCL + tBURST; + + rank_ref.lastBurstTick = cmd_at; + + // update the time for the next read/write burst for each + // bank (add a max with tCCD/tCCD_L/tCCD_L_WR here) + Tick dly_to_rd_cmd; + Tick dly_to_wr_cmd; + for (int j = 0; j < ranksPerChannel; j++) { + for (int i = 0; i < banksPerRank; i++) { + if (dcc_pkt->rank == j) { + if (bankGroupArch && + (bank_ref.bankgr == ranks[j]->banks[i].bankgr)) { + // bank group architecture requires longer delays between + // RD/WR burst commands to the same bank group. + // tCCD_L is default requirement for same BG timing + // tCCD_L_WR is required for write-to-write + // Need to also take bus turnaround delays into account + dly_to_rd_cmd = dcc_pkt->isRead() ? + tCCD_L : std::max(tCCD_L, wrToRdDlySameBG); + dly_to_wr_cmd = dcc_pkt->isRead() ? + std::max(tCCD_L, rdToWrDlySameBG) : + tCCD_L_WR; + } else { + // tBURST is default requirement for diff BG timing + // Need to also take bus turnaround delays into account + dly_to_rd_cmd = dcc_pkt->isRead() ? burst_gap : + writeToReadDelay(); + dly_to_wr_cmd = dcc_pkt->isRead() ? readToWriteDelay() : + burst_gap; + } + } else { + // different rank is by default in a different bank group and + // doesn't require longer tCCD or additional RTW, WTR delays + // Need to account for rank-to-rank switching + dly_to_wr_cmd = rankToRankDelay(); + dly_to_rd_cmd = rankToRankDelay(); + } + ranks[j]->banks[i].rdAllowedAt = std::max(cmd_at + dly_to_rd_cmd, + ranks[j]->banks[i].rdAllowedAt); + ranks[j]->banks[i].wrAllowedAt = std::max(cmd_at + dly_to_wr_cmd, + ranks[j]->banks[i].wrAllowedAt); + } + } + + // Save rank of current access + activeRank = dcc_pkt->rank; + + // If this is a write, we also need to respect the write recovery + // time before a precharge, in the case of a read, respect the + // read to precharge constraint + bank_ref.preAllowedAt = std::max(bank_ref.preAllowedAt, + dcc_pkt->isRead() ? cmd_at + tRTP : + dcc_pkt->readyTime + tWR); + + // increment the bytes accessed and the accesses per row + bank_ref.bytesAccessed += burstSize; + ++bank_ref.rowAccesses; + + // if we reached the max, then issue with an auto-precharge + bool auto_precharge = pageMgmt == Enums::close || + bank_ref.rowAccesses == maxAccessesPerRow; + + // if we did not hit the limit, we might still want to + // auto-precharge + /*if (!auto_precharge && + (pageMgmt == Enums::open_adaptive || + pageMgmt == Enums::close_adaptive)) { + // a twist on the open and close page policies: + // 1) open_adaptive page policy does not blindly keep the + // page open, but close it if there are no row hits, and there + // are bank conflicts in the queue + // 2) close_adaptive page policy does not blindly close the + // page, but closes it only if there are no row hits in the queue. + // In this case, only force an auto precharge when there + // are no same page hits in the queue + bool got_more_hits = false; + bool got_bank_conflict = false; + + for (uint8_t i = 0; i < ctrl->numPriorities(); ++i) { + auto p = queue[i].begin(); + // keep on looking until we find a hit or reach the end of the + // queue + // 1) if a hit is found, then both open and close adaptive + // policies keep the page open + // 2) if no hit is found, got_bank_conflict is set to true if a + // bank conflict request is waiting in the queue + // 3) make sure we are not considering the packet that we are + // currently dealing with + while (!got_more_hits && p != queue[i].end()) { + if (dcc_pkt != (*p)) { + bool same_rank_bank = (dcc_pkt->rank == (*p)->rank) && + (dcc_pkt->bank == (*p)->bank); + + bool same_row = dcc_pkt->row == (*p)->row; + got_more_hits |= same_rank_bank && same_row; + got_bank_conflict |= same_rank_bank && !same_row; + } + ++p; + } + + if (got_more_hits) + break; + } + + // auto pre-charge when either + // 1) open_adaptive policy, we have not got any more hits, and + // have a bank conflict + // 2) close_adaptive policy and we have not got any more hits + auto_precharge = !got_more_hits && + (got_bank_conflict || pageMgmt == Enums::close_adaptive); + }*/ + + // DRAMPower trace command to be written + std::string mem_cmd = dcc_pkt->isRead() ? "RD" : "WR"; + + // MemCommand required for DRAMPower library + MemCommand::cmds command = (mem_cmd == "RD") ? MemCommand::RD : + MemCommand::WR; + + rank_ref.cmdList.push_back(Command(command, dcc_pkt->bank, cmd_at)); + + DPRINTF(DRAMPower, "%llu,%s,%d,%d\n", divCeil(cmd_at, tCK) - + timeStampOffset, mem_cmd, dcc_pkt->bank, dcc_pkt->rank); + + // if this access should use auto-precharge, then we are + // closing the row after the read/write burst + if (auto_precharge) { + // if auto-precharge push a PRE command at the correct tick to the + // list used by DRAMPower library to calculate power + prechargeBank(rank_ref, bank_ref, std::max(curTick(), + bank_ref.preAllowedAt), true); + + DPRINTF(DRAM, "Auto-precharged bank: %d\n", dcc_pkt->bankId); + } + + // Update the stats and schedule the next request + if (dcc_pkt->isRead()) { + // Every respQueue which will generate an event, increment count + ++rank_ref.outstandingEvents; + + stats.readBursts++; + if (row_hit) + stats.readRowHits++; + stats.bytesRead += burstSize; + stats.perBankRdBursts[dcc_pkt->bankId]++; + + // Update latency stats + stats.totMemAccLat += dcc_pkt->readyTime - dcc_pkt->entryTime; + stats.totQLat += cmd_at - dcc_pkt->entryTime; + stats.totBusLat += tBURST; + } else { + // Schedule write done event to decrement event count + // after the readyTime has been reached + // Only schedule latest write event to minimize events + // required; only need to ensure that final event scheduled covers + // the time that writes are outstanding and bus is active + // to holdoff power-down entry events + if (!rank_ref.writeDoneEvent.scheduled()) { + schedule(rank_ref.writeDoneEvent, dcc_pkt->readyTime); + // New event, increment count + ++rank_ref.outstandingEvents; + + } else if (rank_ref.writeDoneEvent.when() < dcc_pkt->readyTime) { + reschedule(rank_ref.writeDoneEvent, dcc_pkt->readyTime); + } + // will remove write from queue when returned to parent function + // decrement count for DRAM rank + --rank_ref.writeEntries; + + stats.writeBursts++; + if (row_hit) + stats.writeRowHits++; + stats.bytesWritten += burstSize; + stats.perBankWrBursts[dcc_pkt->bankId]++; + + } + // Update bus state to reflect when previous command was issued + return std::make_pair(cmd_at, cmd_at + burst_gap); +} + void DRAMInterface::addRankToRankDelay(Tick cmd_at) { @@ -754,6 +1018,8 @@ DRAMInterface::DRAMInterface(const DRAMInterfaceParams &_p) enableDRAMPowerdown(_p.enable_dram_powerdown), lastStatsResetTick(0), stats(*this) + //rescheduleRead_udcc(false), + //rescheduleWrite_udcc(false) { DPRINTF(DRAM, "Setting up DRAM Interface\n"); @@ -1677,6 +1943,20 @@ DRAMInterface::Rank::processPowerEvent() " rank %d\n", rank); dram.ctrl->restartScheduler(curTick()); } + + // if (dram.rescheduleRead_udcc) { + // DPRINTF(DRAM, "Scheduling next DRAM read after refreshing" + // " rank %d\n", rank); + // dram.ctrl->restartDramReadScheduler(curTick()); + // dram.rescheduleRead_udcc = false; + // } + + // if (dram.rescheduleWrite_udcc) { + // DPRINTF(DRAM, "Scheduling next DRAM write after refreshing" + // " rank %d\n", rank); + // dram.ctrl->restartDramWriteScheduler(curTick()); + // dram.rescheduleWrite_udcc = false; + // } } if ((pwrState == PWR_ACT) && (refreshState == REF_PD_EXIT)) { @@ -1767,6 +2047,14 @@ DRAMInterface::Rank::processPowerEvent() } +Tick +DRAMInterface::Rank::getRefreshEventSchdTick() +{ + assert(refreshEvent.scheduled()); + return(refreshEvent.when()); +} + + void DRAMInterface::Rank::updatePowerStats() { @@ -2113,6 +2401,62 @@ NVMInterface::chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const return std::make_pair(selected_pkt_it, selected_col_at); } +std::pair +NVMInterface::chooseNextFRFCFSDCache(MemPacketQueue& queue, Tick min_col_at) +{ + // remember if we found a hit, but one that cannit issue seamlessly + bool found_prepped_pkt = false; + + auto selected_pkt_it = queue.end(); + Tick selected_col_at = MaxTick; + + for (auto i = queue.begin(); i != queue.end() ; ++i) { + MemPacket* pkt = *i; + + // select optimal NVM packet in Q + if (!pkt->isDram()) { + const Bank& bank = ranks[pkt->rank]->banks[pkt->bank]; + const Tick col_allowed_at = pkt->isRead() ? bank.rdAllowedAt : + bank.wrAllowedAt; + + // check if rank is not doing a refresh and thus is available, + // if not, jump to the next packet + if (burstReadyDCache(pkt)) { + DPRINTF(NVM, "%s bank %d - Rank %d available\n", __func__, + pkt->bank, pkt->rank); + + // no additional rank-to-rank or media delays + if (col_allowed_at <= min_col_at) { + // FCFS within entries that can issue without + // additional delay, such as same rank accesses + // or media delay requirements + selected_pkt_it = i; + selected_col_at = col_allowed_at; + // no need to look through the remaining queue entries + DPRINTF(NVM, "%s Seamless buffer hit\n", __func__); + break; + } else if (!found_prepped_pkt) { + // packet is to prepped region but cannnot issue + // seamlessly; remember this one and continue + selected_pkt_it = i; + selected_col_at = col_allowed_at; + DPRINTF(NVM, "%s Prepped packet found \n", __func__); + found_prepped_pkt = true; + } + } else { + DPRINTF(NVM, "%s bank %d - Rank %d not available\n", __func__, + pkt->bank, pkt->rank); + } + } + } + + if (selected_pkt_it == queue.end()) { + DPRINTF(NVM, "%s no available NVM ranks found\n", __func__); + } + + return std::make_pair(selected_pkt_it, selected_col_at); +} + void NVMInterface::chooseRead(MemPacketQueue& queue) { @@ -2208,6 +2552,96 @@ NVMInterface::chooseRead(MemPacketQueue& queue) } } +void +NVMInterface::processReadPkt(MemPacket* pkt) +{ + Tick cmd_at = std::max(curTick(), nextReadAt); + + assert(numReadsToIssue > 0); + + numReadsToIssue--; + + assert(pkt->readyTime == MaxTick); + assert(!pkt->isDram()); + assert(pkt->isRead()); + + // get the bank + Bank& bank_ref = ranks[pkt->rank]->banks[pkt->bank]; + + // issueing a read, inc counter and verify we haven't overrun + numPendingReads++; + assert(numPendingReads <= maxPendingReads); + + // increment the bytes accessed and the accesses per row + bank_ref.bytesAccessed += burstSize; + + // Verify command bandiwth to issue + // Host can issue read immediately uith buffering closer + // to the NVM. The actual execution at the NVM may be delayed + // due to busy resources + if (twoCycleRdWr) { + cmd_at = ctrl->verifyMultiCmd(cmd_at, + maxCommandsPerWindow, tCK); + } else { + cmd_at = ctrl->verifySingleCmd(cmd_at, + maxCommandsPerWindow); + } + + // Update delay to next read + // Ensures single read command issued per cycle + nextReadAt = cmd_at + tCK; + + // If accessing a new location in this bank, update timing + // and stats + if (bank_ref.openRow != pkt->row) { + // update the open bank, re-using row field + bank_ref.openRow = pkt->row; + + // sample the bytes accessed to a buffer in this bank + // here when we are re-buffering the data + stats.bytesPerBank.sample(bank_ref.bytesAccessed); + // start counting anew + bank_ref.bytesAccessed = 0; + + // holdoff next command to this bank until the read completes + // and the data has been successfully buffered + // can pipeline accesses to the same bank, sending them + // across the interface B2B, but will incur full access + // delay between data ready responses to different buffers + // in a bank + bank_ref.actAllowedAt = std::max(cmd_at, + bank_ref.actAllowedAt) + tREAD; + } + // update per packet readyTime to holdoff burst read operation + // overloading readyTime, which will be updated again when the + // burst is issued + pkt->readyTime = std::max(cmd_at, bank_ref.actAllowedAt); + DPRINTF(NVM, "Issuing NVM Read to bank %d at tick %d. " + "Data ready at %d\n", + bank_ref.bank, cmd_at, pkt->readyTime); + + // Insert into read ready queue. It will be handled after + // the media delay has been met + if (readReadyQueue.empty()) { + assert(!readReadyEvent.scheduled()); + schedule(readReadyEvent, pkt->readyTime); + } else if (readReadyEvent.when() > pkt->readyTime) { + // move it sooner in time, to the first read with data + reschedule(readReadyEvent, pkt->readyTime); + } else { + assert(readReadyEvent.scheduled()); + } + readReadyQueue.push_back(pkt->readyTime); + +} + +Tick +NVMInterface::nextReadReadyEventTick() +{ + assert(readReadyEvent.scheduled()); + return readReadyEvent.when(); +} + void NVMInterface::processReadReadyEvent() { @@ -2262,6 +2696,15 @@ NVMInterface::burstReady(MemPacket* pkt) const { return (read_rdy || write_rdy); } +bool +NVMInterface::burstReadyDCache(MemPacket* pkt) { + bool read_rdy = pkt->isRead() && (ctrl->inReadBusState(false)) && + (pkt->readyTime <= curTick()) && (numReadDataReady > 0); + bool write_rdy = !pkt->isRead() && !ctrl->inReadBusState(false) && + !writeRespQueueFull(); + return (read_rdy || write_rdy); +} + std::pair NVMInterface::doBurstAccess(MemPacket* pkt, Tick next_burst_at) { diff --git a/src/mem/mem_interface.hh b/src/mem/mem_interface.hh index d1bf671e88..9c61812ab7 100644 --- a/src/mem/mem_interface.hh +++ b/src/mem/mem_interface.hh @@ -52,14 +52,20 @@ #include #include +#include "base/compiler.hh" #include "base/statistics.hh" #include "enums/AddrMap.hh" #include "enums/PageManage.hh" #include "mem/abstract_mem.hh" -#include "mem/drampower.hh" + + +#include "mem/dcache_ctrl.hh" #include "mem/mem_ctrl.hh" -#include "params/DRAMInterface.hh" +#include "mem/drampower.hh" +#include "mem/qos/mem_ctrl.hh" + #include "params/MemInterface.hh" +#include "params/DRAMInterface.hh" #include "params/NVMInterface.hh" #include "sim/eventq.hh" @@ -107,7 +113,7 @@ class MemInterface : public AbstractMemory /** * A pointer to the parent MemCtrl instance */ - MemCtrl* ctrl; + QoS::MemCtrl* ctrl; /** * Number of commands that can issue in the defined controller @@ -179,7 +185,7 @@ class MemInterface : public AbstractMemory * @param command_window size of command window used to * check command bandwidth */ - void setCtrl(MemCtrl* _ctrl, unsigned int command_window); + void setCtrl(QoS::MemCtrl* _ctrl, unsigned int command_window); /** * Get an address in a dense range which starts from 0. The input @@ -697,6 +703,8 @@ class DRAMInterface : public MemInterface void processWakeUpEvent(); EventFunctionWrapper wakeUpEvent; + Tick getRefreshEventSchdTick(); + protected: RankStats stats; }; @@ -882,6 +890,7 @@ class DRAMInterface : public MemInterface } public: + /** * Initialize the DRAM interface and verify parameters */ @@ -926,6 +935,8 @@ class DRAMInterface : public MemInterface */ Tick commandOffset() const override { return (tRP + tRCD); } + Tick getTBurst() { return tBURST; } + /* * Function to calulate unloaded, closed bank access latency */ @@ -960,6 +971,22 @@ class DRAMInterface : public MemInterface doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at, const std::vector& queue); + + /** + * Specifically for dcache_ctrlr only. + * Actually do the burst - figure out the latency it + * will take to service the req based on bank state, channel state etc + * and then update those states to account for this request. Based + * on this, update the packet's "readyTime" and move it to the + * response q from where it will eventually go back to the outside + * world. + * + * @param dcc_pkt The packet created from the outside world pkt + * @param next_burst_at Minimum bus timing requirement from controller + */ + std::pair + doBurstAccess(MemPacket* dcc_pkt, Tick next_burst_at); + /** * Check if a burst operation can be issued to the DRAM * @@ -973,6 +1000,12 @@ class DRAMInterface : public MemInterface return ranks[pkt->rank]->inRefIdleState(); } + Tick + getRankRefEventSchdTick(MemPacket* pkt) + { + return ranks[pkt->rank]->getRefreshEventSchdTick(); + } + /** * This function checks if ranks are actively refreshing and * therefore busy. The function also checks if ranks are in @@ -1009,6 +1042,11 @@ class DRAMInterface : public MemInterface */ void checkRefreshState(uint8_t rank); + //bool rescheduleRead_udcc; + + //bool rescheduleWrite_udcc; + + DRAMInterface(const DRAMInterfaceParams &_p); }; @@ -1123,13 +1161,6 @@ class NVMInterface : public MemInterface std::deque readReadyQueue; - /** - * Check if the write response queue is empty - * - * @param Return true if empty - */ - bool writeRespQueueEmpty() const { return writeRespQueue.empty(); } - /** * Till when must we wait before issuing next read command? */ @@ -1173,6 +1204,8 @@ class NVMInterface : public MemInterface */ Tick commandOffset() const override { return tBURST; } + Tick getTBurst() { return tBURST; } + /** * Check if a burst operation can be issued to the NVM * @@ -1183,6 +1216,8 @@ class NVMInterface : public MemInterface */ bool burstReady(MemPacket* pkt) const override; + bool burstReadyDCache(MemPacket* pkt); + /** * This function checks if ranks are busy. * This state is true when either: @@ -1207,6 +1242,9 @@ class NVMInterface : public MemInterface std::pair chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const override; + std::pair + chooseNextFRFCFSDCache(MemPacketQueue& queue, Tick min_col_at); + /** * Add rank to rank delay to bus timing to all NVM banks in alli ranks * when access to an alternate interface is issued @@ -1222,6 +1260,10 @@ class NVMInterface : public MemInterface */ void chooseRead(MemPacketQueue& queue); + void processReadPkt(MemPacket* pkt); + + Tick nextReadReadyEventTick(); + /* * Function to calulate unloaded access latency */ @@ -1238,6 +1280,34 @@ class NVMInterface : public MemInterface return writeRespQueue.size() == maxPendingWrites; } + /** + * Check if the write response queue is empty + * + * @param Return true if empty + */ + bool writeRespQueueEmpty() const { return writeRespQueue.empty(); } + + + uint32_t + getMaxPendingWrites() + { + return maxPendingWrites; + } + + + Tick + writeRespQueueFront() + { + return writeRespQueue.front(); + } + + unsigned + writeRespQueueSize() + { + return writeRespQueue.size(); + } + + bool readsWaitingToIssue() const { diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 4ad8d46b6c..35e15d6c3e 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -824,7 +824,8 @@ class Packet : public Printable */ Packet(const RequestPtr &_req, MemCmd _cmd) : cmd(_cmd), id((PacketId)_req.get()), req(_req), - data(nullptr), addr(0), _isSecure(false), size(0), + data(nullptr), + _isSecure(false), size(0), _qosValue(0), htmReturnReason(HtmCacheFailure::NO_FAIL), htmTransactionUid(0), diff --git a/src/mem/qos/mem_ctrl.hh b/src/mem/qos/mem_ctrl.hh index 02954d20d5..491dab7525 100644 --- a/src/mem/qos/mem_ctrl.hh +++ b/src/mem/qos/mem_ctrl.hh @@ -268,6 +268,12 @@ class MemCtrl : public ClockedObject virtual ~MemCtrl(); + virtual Tick getNextBurstTick() + { + panic("QoS::MemCtrl getNextBurstTick should not be called \n"); + return curTick(); + }; + /** * Gets the current bus state * @@ -275,6 +281,55 @@ class MemCtrl : public ClockedObject */ BusState getBusState() const { return busState; } + /**some virtual functions + * that will be implemented in the mem + * controllers + */ + virtual Tick verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst, + Tick max_multi_cmd_split = 0) { + panic("QoS::MemCtrl verifyMultiCmd should not be called \n"); + return curTick(); + }; + + virtual Tick verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst) { + panic("QoS::MemCtrl verifySingleCmd should not be called \n"); + return curTick(); + }; + + virtual bool inReadBusState(bool next_state) const { + panic("QoS::MemCtrl inReadBusState should not be called \n"); + return false; + }; + + virtual bool inWriteBusState(bool next_state) const { + panic("QoS::MemCtrl inWriteBusState should not be called \n"); + return false; + }; + + virtual bool requestEventScheduled() const { + panic("QoS::MemCtrl requestEventScheduled wrongly called \n"); + return false; + }; + + virtual bool respondEventScheduled() const { + panic("QoS::MemCtrl respondEventScheduled wrongly called \n"); + return false; + }; + + virtual void restartScheduler(Tick tick) { + panic("QoS::MemCtrl restartScheduler should not be called \n"); + }; + + virtual void restartDramReadScheduler(Tick tick) { + panic("QoS::MemCtrl restartDramReadScheduler " + "should not be called \n"); + }; + + virtual void restartDramWriteScheduler(Tick tick) { + panic("QoS::MemCtrl restartDramWriteScheduler " + "should not be called \n"); + }; + /** * Gets the next bus state * diff --git a/traffGen.py b/traffGen.py new file mode 100644 index 0000000000..0ef30b3a4f --- /dev/null +++ b/traffGen.py @@ -0,0 +1,129 @@ +from m5.objects import * +import m5 +import argparse +from m5.objects.DRAMInterface import * +from m5.objects.NVMInterface import * + + +args = argparse.ArgumentParser() + +# This scipt takes these arguments [device model for dram cache] +# [dram cache size] [maximum orb size] +# [traffic mode] [duration of simulation in ticks] +# [max address] [request injection period in ticks] [rd percentage] +# min address is 0, data limit is 0, block size is 64B. +# crb_max_size is 32 by default. + +# sample cmd: gem5.opt traffGen.py DDR3_1600_8x8 16MiB +# 32 linear 100000000 128MiB 1000 100 +# sample cmd: gem5.opt traffGen.py DDR4_2400_16x4 1GB +# 32 random 100000000 2GB 1000 100 + +args.add_argument( + "device", + type = str, + help = "Memory device to use as a dram cache" +) + +args.add_argument( + "dram_cache_size", + type = str, + help = "Duration of simulation" +) + +args.add_argument( + "max_orb", + type = int, + help = "Duration of simulation" +) + +args.add_argument( + "traffic_mode", + type = str, + help = "Traffic type to use" +) + +args.add_argument( + "duration", + type = int, + help = "Duration of simulation" +) + +args.add_argument( + "max_address", + type=str, + help="End address of the range to be accessed", +) + +args.add_argument( + "inj_period", + type = int, + help = "Period to inject reqs" +) + +args.add_argument( + "rd_prct", + type=int, + help="Read Percentage", +) + +options = args.parse_args() + +system = System() +system.clk_domain = SrcClockDomain() +system.clk_domain.clock = "4GHz" +system.clk_domain.voltage_domain = VoltageDomain() +system.mem_mode = 'timing' + +system.generator = PyTrafficGen() + +system.mem_ctrl = DcacheCtrl() +system.mem_ctrl.dram = eval(options.device)(range=AddrRange('8GB'), + in_addr_map=False) +system.mem_ctrl.nvm = NVM_2400_1x64(range=AddrRange('8GB')) + +#system.mem_ctrl.dram.tREFI = "200" +system.mem_ctrl.dram_cache_size = options.dram_cache_size +system.mem_ctrl.orb_max_size = options.max_orb +system.mem_ctrl.crb_max_size = "32" + +system.mem_ranges = [AddrRange('8GB')] + +system.generator.port = system.mem_ctrl.port + +def createRandomTraffic(tgen): + yield tgen.createRandom(options.duration, # duration + 0, # min_addr + AddrRange(options.max_address).end, # max_adr + 64, # block_size + options.inj_period, # min_period + options.inj_period, # max_period + options.rd_prct, # rd_perc + 0) # data_limit + yield tgen.createExit(0) + +def createLinearTraffic(tgen): + yield tgen.createLinear(options.duration, # duration + 0, # min_addr + AddrRange(options.max_address).end, # max_adr + 64, # block_size + options.inj_period, # min_period + options.inj_period, # max_period + options.rd_prct, # rd_perc + 0) # data_limit + yield tgen.createExit(0) + + +root = Root(full_system=False, system=system) + +m5.instantiate() + +if options.traffic_mode == 'linear': + system.generator.start(createLinearTraffic(system.generator)) +elif options.traffic_mode == 'random': + system.generator.start(createRandomTraffic(system.generator)) +else: + print('Wrong traffic type! Exiting!') + exit() + +exit_event = m5.simulate() \ No newline at end of file