diff --git a/src/mem/DcacheCtrl.py b/src/mem/DcacheCtrl.py
new file mode 100644
index 0000000000..23c14f0485
--- /dev/null
+++ b/src/mem/DcacheCtrl.py
@@ -0,0 +1,109 @@
+### The copyright needs be modified for UCD/DArchR/the names of the writers
+
+
+# Copyright (c) 2012-2020 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Copyright (c) 2013 Amin Farmahini-Farahani
+# Copyright (c) 2015 University of Kaiserslautern
+# Copyright (c) 2015 The University of Bologna
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.params import *
+from m5.proxy import *
+from m5.objects.QoSMemCtrl import *
+
+# Enum for memory scheduling algorithms, currently First-Come
+# First-Served and a First-Row Hit then First-Come First-Served
+class MemSched(Enum): vals = ['fcfs', 'frfcfs']
+
+# MemCtrl is a single-channel single-ported Memory controller model
+# that aims to model the most important system-level performance
+# effects of a memory controller, interfacing with media specific
+# interfaces
+class DcacheCtrl(QoSMemCtrl):
+    type = 'DcacheCtrl'
+    cxx_header = "mem/dcache_ctrl.hh"
+
+    # single-ported on the system interface side, instantiate with a
+    # bus in front of the controller for multiple ports
+    port = ResponsePort("This port responds to memory requests")
+
+    # Interface to volatile, DRAM media
+    dram = Param.DRAMInterface("DRAM interface")
+
+    # Interface to non-volatile media
+    nvm = Param.NVMInterface("NVM interface")
+
+    dram_cache_size = Param.MemorySize('512MiB',
+        "DRAM cache block size in bytes")
+    block_size = Param.Unsigned('64',
+        "DRAM cache block size in bytes")
+    addr_size = Param.Unsigned('64',
+        "Addr size of the request from outside world")
+    orb_max_size = Param.Unsigned(256, "Outstanding Requests Buffer size")
+    crb_max_size = Param.Unsigned(64, "Conflicting Requests Buffer size")
+
+    # JASON: We need to think about this a bit
+    # The dram interface is a abstract memory, but we don't need the backing
+    # store. So, null should be true, in_addr_map should be false,
+    # kvm_map false, and conf_table_reported false
+
+    # read and write buffer depths are set in the interface
+    # the controller will read these values when instantiated
+
+    # threshold in percent for when to forcefully trigger writes and
+    # start emptying the write buffer
+    write_high_thresh_perc = Param.Percent(85, "Threshold to force writes")
+
+    # threshold in percentage for when to start writes if the read
+    # queue is empty
+    write_low_thresh_perc = Param.Percent(50, "Threshold to start writes")
+
+    # minimum write bursts to schedule before switching back to reads
+    min_writes_per_switch = Param.Unsigned(16, "Minimum write bursts before "
+                                           "switching to reads")
+
+    # scheduler, address map and page policy
+    mem_sched_policy = Param.MemSched('frfcfs', "Memory scheduling policy")
+
+    # pipeline latency of the controller and PHY, split into a
+    # frontend part and a backend part, with reads and writes serviced
+    # by the queues only seeing the frontend contribution, and reads
+    # serviced by the memory seeing the sum of the two
+    static_frontend_latency = Param.Latency("10ns", "Static frontend latency")
+    static_backend_latency = Param.Latency("10ns", "Static backend latency")
+
+    command_window = Param.Latency("10ns", "Static backend latency")
diff --git a/src/mem/SConscript b/src/mem/SConscript
index cf7adc8668..cbe01368f8 100644
--- a/src/mem/SConscript
+++ b/src/mem/SConscript
@@ -47,9 +47,13 @@ SimObject('AbstractMemory.py')
 SimObject('AddrMapper.py')
 SimObject('Bridge.py')
 SimObject('MemCtrl.py')
+SimObject('DcacheCtrl.py')
 SimObject('MemInterface.py')
 SimObject('DRAMInterface.py')
 SimObject('NVMInterface.py')
+SimObject('DCMemInterface.py')
+SimObject('DRAMDCInterface.py')
+SimObject('NVMDCInterface.py')
 SimObject('ExternalMaster.py')
 SimObject('ExternalSlave.py')
 SimObject('MemObject.py')
@@ -64,10 +68,13 @@ Source('addr_mapper.cc')
 Source('bridge.cc')
 Source('coherent_xbar.cc')
 Source('drampower.cc')
+Source('dramcachepower.cc')
 Source('external_master.cc')
 Source('external_slave.cc')
 Source('mem_ctrl.cc')
+Source('dcache_ctrl.cc')
 Source('mem_interface.cc')
+Source('dcmem_interface.cc')
 Source('noncoherent_xbar.cc')
 Source('packet.cc')
 Source('port.cc')
@@ -116,12 +123,14 @@ DebugFlag('Bridge')
 DebugFlag('CommMonitor')
 DebugFlag('DRAM')
 DebugFlag('DRAMPower')
+DebugFlag('DRAMDCPower')
 DebugFlag('DRAMState')
 DebugFlag('NVM')
 DebugFlag('ExternalPort')
 DebugFlag('HtmMem', 'Hardware Transactional Memory (Mem side)')
 DebugFlag('LLSC')
 DebugFlag('MemCtrl')
+DebugFlag('DcacheCtrl')
 DebugFlag('MMU')
 DebugFlag('MemoryAccess')
 DebugFlag('PacketQueue')
diff --git a/src/mem/dcache_ctrl.cc b/src/mem/dcache_ctrl.cc
new file mode 100644
index 0000000000..c8a53b4ebd
--- /dev/null
+++ b/src/mem/dcache_ctrl.cc
@@ -0,0 +1,2932 @@
+/// The copyright needs be modified for UCD/DArchR/the names of the writers
+/*
+ * Copyright (c) 2010-2020 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2013 Amin Farmahini-Farahani
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "mem/dcache_ctrl.hh"
+
+#include "base/trace.hh"
+#include "debug/DRAM.hh"
+#include "debug/DcacheCtrl.hh"
+#include "debug/Drain.hh"
+#include "debug/NVM.hh"
+#include "debug/QOS.hh"
+#include "mem/mem_interface.hh"
+#include "sim/system.hh"
+
+DcacheCtrl::DcacheCtrl(const DcacheCtrlParams &p) :
+    QoS::MemCtrl(p),
+    port(name() + ".port", *this), isTimingMode(false),
+    retry(false),
+    nextReqEvent([this]{ processNextReqEvent(); }, name()),
+    respondEvent([this]{ processRespondEvent(); }, name()),
+    dramReadEvent([this]{ processDramReadEvent(); }, name()),
+    respDramReadEvent([this]{ processRespDramReadEvent(); }, name()),
+    waitingToIssueNvmReadEvent([this]
+    { processWaitingToIssueNvmReadEvent(); }, name()),
+    nvmReadEvent([this]{ processNvmReadEvent(); }, name()),
+    respNvmReadEvent([this]{ processRespNvmReadEvent(); }, name()),
+    overallWriteEvent([this]{ processOverallWriteEvent(); }, name()),
+    dram(p.dram), nvm(p.nvm),
+    dramCacheSize(p.dram_cache_size),
+    blockSize(p.block_size),
+    addrSize(p.addr_size),
+    orbMaxSize(p.orb_max_size), orbSize(0),
+    crbMaxSize(p.crb_max_size), crbSize(0),
+    writeHighThreshold(p.write_high_thresh_perc * p.orb_max_size / 100.0),
+    writeLowThreshold(p.write_low_thresh_perc* p.orb_max_size / 100.0),
+    minWritesPerSwitch(p.min_writes_per_switch),
+    memSchedPolicy(p.mem_sched_policy),
+    frontendLatency(p.static_frontend_latency),
+    backendLatency(p.static_backend_latency),
+    commandWindow(p.command_window),
+    nextBurstAt(0), prevArrival(0),
+    nextReqTime(0),
+    stats(*this)
+{
+    DPRINTF(DcacheCtrl, "Setting up controller\n");
+
+    pktDramRead.resize(1);
+    pktNvmReadWaitIssue.resize(1);
+    pktNvmRead.resize(1);
+    pktDramWrite.resize(1);
+    pktNvmWrite.resize(1);
+
+    stallRds = false;
+    drainDramWrite = false;
+    drainNvmWrite = false;
+
+    if (orbMaxSize>512) {
+        dramWrDrainPerc = 0.25;
+    }
+    else {
+        dramWrDrainPerc = 0.5;
+    }
+    // NVM Write Drain is defined by the write queue size
+    // defined by the NVM interface
+
+    if (orbMaxSize == 1) {
+        writeHighThreshold = 1;
+    }
+
+    // if (orbMaxSize == 1) {
+    //     minWritesPerSwitch = 2;
+    //     minDrWrPerSwitch = 1;
+    //     minNvWrPerSwitch = 1;
+    // }
+    // else {
+    //     minWritesPerSwitch = orbMaxSize * 0.2;
+    //     minDrWrPerSwitch = 0.7 * minWritesPerSwitch;
+    //     minNvWrPerSwitch = minWritesPerSwitch - minDrWrPerSwitch;
+    // }
+
+    minDrWrPerSwitch = 0.7 * minWritesPerSwitch;
+    minNvWrPerSwitch = minWritesPerSwitch - minDrWrPerSwitch;
+
+    drWrCounter = 0;
+    nvWrCounter = 0;
+
+    // dramCacheSize = dram->dramDeviceCapacity;
+    // dramCacheSize = dramCacheSize*1024*1024;
+
+    tagMetadataStore.resize(dramCacheSize/blockSize);
+
+    // Hook up interfaces to the controller
+    if (dram)
+        dram->setCtrl(this, commandWindow);
+    if (nvm)
+        nvm->setCtrl(this, commandWindow);
+
+    fatal_if(!dram && !nvm, "Memory controller must have an interface");
+
+    // perform a basic check of the write thresholds
+    if (p.write_low_thresh_perc >= p.write_high_thresh_perc)
+        fatal("Write buffer low threshold %d must be smaller than the "
+              "high threshold %d\n", p.write_low_thresh_perc,
+              p.write_high_thresh_perc);
+}
+
+void
+DcacheCtrl::init()
+{
+   if (!port.isConnected()) {
+        fatal("DcacheCtrl %s is unconnected!\n", name());
+    } else {
+        port.sendRangeChange();
+    }
+}
+
+void
+DcacheCtrl::startup()
+{
+    // remember the memory system mode of operation
+    isTimingMode = system()->isTimingMode();
+
+    if (isTimingMode) {
+        // shift the bus busy time sufficiently far ahead that we never
+        // have to worry about negative values when computing the time for
+        // the next request, this will add an insignificant bubble at the
+        // start of simulation
+        nextBurstAt = curTick() + (dram ? dram->commandOffset() :
+                                          nvm->commandOffset());
+    }
+}
+
+Tick
+DcacheCtrl::recvAtomic(PacketPtr pkt)
+{
+    DPRINTF(DcacheCtrl, "recvAtomic: %s 0x%x\n",
+                     pkt->cmdString(), pkt->getAddr());
+
+    panic_if(pkt->cacheResponding(), "Should not see packets where cache "
+             "is responding");
+
+    Tick latency = 0;
+    // do the actual memory access and turn the packet into a response
+    if (dram && dram->getAddrRange().contains(pkt->getAddr())) {
+        dram->access(pkt);
+
+        if (pkt->hasData()) {
+            // this value is not supposed to be accurate, just enough to
+            // keep things going, mimic a closed page
+            latency = dram->accessLatency();
+        }
+    } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) {
+        nvm->access(pkt);
+
+        if (pkt->hasData()) {
+            // this value is not supposed to be accurate, just enough to
+            // keep things going, mimic a closed page
+            latency = nvm->accessLatency();
+        }
+    } else {
+        panic("Can't handle address range for packet %s\n",
+              pkt->print());
+    }
+
+    return latency;
+}
+
+Tick
+DcacheCtrl::recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor)
+{
+    Tick latency = recvAtomic(pkt);
+    if (dram) {
+        dram->getBackdoor(backdoor);
+    } else if (nvm) {
+        nvm->getBackdoor(backdoor);
+    }
+    return latency;
+}
+
+Addr
+DcacheCtrl::returnTagDC(Addr request_addr, unsigned size)
+{
+    int index_bits = ceilLog2(dramCacheSize/blockSize);
+    int block_bits = ceilLog2(size);
+    return bits(request_addr, addrSize-1, (index_bits+block_bits));
+}
+
+Addr
+DcacheCtrl::returnIndexDC(Addr request_addr, unsigned size)
+{
+    return bits(request_addr, ceilLog2(size) +
+            ceilLog2(dramCacheSize/blockSize)-1, ceilLog2(size));
+}
+
+void
+DcacheCtrl::checkHitOrMiss(reqBufferEntry* orbEntry)
+{
+    // access the tagMetadataStore data structure to
+    // check if it's hit or miss
+    orbEntry->isHit =
+    tagMetadataStore.at(orbEntry->indexDC).validLine &&
+    (orbEntry->tagDC == tagMetadataStore.at(orbEntry->indexDC).tagDC);
+
+    if (!tagMetadataStore.at(orbEntry->indexDC).validLine &&
+        !orbEntry->isHit) {
+        stats.numColdMisses++;
+    }
+    else if (tagMetadataStore.at(orbEntry->indexDC).validLine &&
+             !orbEntry->isHit) {
+        stats.numHotMisses++;
+    }
+
+    // always hit
+    // orbEntry->isHit = true;
+
+    // always miss
+    // orbEntry->isHit = false;
+}
+
+bool
+DcacheCtrl::checkDirty(Addr addr)
+{
+    Addr index = returnIndexDC(addr, blockSize);
+    return (tagMetadataStore.at(index).validLine &&
+            tagMetadataStore.at(index).dirtyLine);
+
+
+    // always dirty
+    //return true;
+
+    // always clean
+    //return false;
+}
+
+void
+DcacheCtrl::handleDirtyCacheLine(reqBufferEntry* orbEntry)
+{
+    assert(orbEntry->dirtyLineAddr != -1);
+
+    MemPacket* wbDccPkt = nvm->decodePacket(nullptr,
+                            orbEntry->dirtyLineAddr,
+                            orbEntry->owPkt->getSize(),
+                            false, false);
+
+    nvm->setupRank(wbDccPkt->rank, false);
+
+    pktNvmWrite[0].push_back(wbDccPkt);
+
+    if (pktNvmWrite[0].size() >= nvm->getMaxPendingWrites()) {
+        stallRds = true;
+        drainNvmWrite = true;
+        if (!overallWriteEvent.scheduled()) {
+            schedule(overallWriteEvent, std::max(nextReqTime, curTick()));
+        }
+    }
+
+    unsigned rdsNum = pktDramRead[0].size() +
+                      pktNvmReadWaitIssue[0].size() +
+                      pktNvmRead[0].size();
+    unsigned wrsNum = pktNvmWrite[0].size() +
+                      pktDramWrite[0].size();
+
+    if ((rdsNum == 0 && wrsNum != 0) ||
+        (wrsNum >= writeHighThreshold)) {
+        stallRds = true;
+        if (!overallWriteEvent.scheduled()) {
+            schedule(overallWriteEvent, std::max(nextReqTime, curTick()));
+        }
+    }
+
+    if (pktNvmWrite[0].size() > maxNvWrEv) {
+        maxNvWrEv = pktNvmWrite[0].size();
+        stats.maxNvWrEvQ = pktNvmWrite[0].size();
+    }
+
+    stats.numWrBacks++;
+
+    // no need to call nvm->access for the dirty line.
+    // Because, we already have written it in nvm, while
+    // we were processing it into dram cache.
+}
+
+void
+DcacheCtrl::handleRequestorPkt(PacketPtr pkt)
+{
+    // Set is_read and is_dram to
+    // "true", to do initial dram Read
+    MemPacket* dcc_pkt = dram->decodePacket(pkt,
+                                            pkt->getAddr(),
+                                            pkt->getSize(),
+                                            true,
+                                            true);
+
+    // pass the second argument "true", for
+    // initial DRAM Read for all the received packets
+    dram->setupRank(dcc_pkt->rank, true);
+
+    reqBufferEntry* entry = new reqBufferEntry(
+                                true, curTick(),
+                                returnTagDC(pkt->getAddr(), pkt->getSize()),
+                                returnIndexDC(pkt->getAddr(), pkt->getSize()),
+                                pkt, dcc_pkt,
+                                dramRead, false, false,
+                                -1, false,
+                                curTick(), MaxTick,
+                                MaxTick, MaxTick, MaxTick,
+                                MaxTick,
+                                MaxTick, MaxTick, MaxTick
+                            );
+
+    reqBuffer.emplace(pkt->getAddr(), entry);
+
+    if (pkt->isRead()) {
+        logRequest(DcacheCtrl::READ, pkt->requestorId(), pkt->qosValue(),
+                   pkt->getAddr(), 1);
+    }
+    else {
+        //copying the packet
+        PacketPtr copyOwPkt = new Packet(pkt, false, pkt->isRead());
+
+        accessAndRespond(pkt, frontendLatency, false);
+
+        reqBuffer.at(copyOwPkt->getAddr()) =  new reqBufferEntry(
+                                entry->validEntry, entry->arrivalTick,
+                                entry->tagDC, entry->indexDC,
+                                copyOwPkt, entry->dccPkt,
+                                entry->state, entry->isHit, entry->conflict,
+                                entry->dirtyLineAddr,
+                                entry->handleDirtyLine,
+                                entry->drRd,
+                                entry->drWr,
+                                entry->nvWait,
+                                entry->nvRd,
+                                entry->nvWr,
+                                entry->nvmIssueReadyTime,
+                                entry->dramRdDevTime,
+                                entry->dramWrDevTime,
+                                entry->nvmRdDevTime
+                          );
+        delete entry;
+
+        entry = reqBuffer.at(copyOwPkt->getAddr());
+
+        logRequest(DcacheCtrl::WRITE, copyOwPkt->requestorId(),
+                   copyOwPkt->qosValue(),
+                   copyOwPkt->getAddr(), 1);
+    }
+
+    checkHitOrMiss(entry);
+
+    if (checkDirty(entry->owPkt->getAddr()) && !entry->isHit) {
+        entry->dirtyLineAddr = tagMetadataStore.at(entry->indexDC).nvmAddr;
+        entry->handleDirtyLine = true;
+    }
+
+    // Updating Tag & Metadata
+    tagMetadataStore.at(entry->indexDC).tagDC = entry->tagDC;
+    tagMetadataStore.at(entry->indexDC).indexDC = entry->indexDC;
+    tagMetadataStore.at(entry->indexDC).validLine = true;
+
+    if (entry->owPkt->isRead()) {
+        if (entry->isHit) {
+            tagMetadataStore.at(entry->indexDC).dirtyLine =
+            tagMetadataStore.at(entry->indexDC).dirtyLine;
+        }
+        else {
+            tagMetadataStore.at(entry->indexDC).dirtyLine = false;
+        }
+    }
+    else {
+        tagMetadataStore.at(entry->indexDC).dirtyLine = true;
+    }
+
+    tagMetadataStore.at(entry->indexDC).nvmAddr =
+                        entry->owPkt->getAddr();
+
+    if (entry->owPkt->isRead()) {
+        stats.readReqs++;
+    }
+    else {
+        stats.writeReqs++;
+    }
+}
+
+bool
+DcacheCtrl::checkConflictInDramCache(PacketPtr pkt)
+{
+    unsigned indexDC = returnIndexDC(pkt->getAddr(), pkt->getSize());
+
+    for (auto e = reqBuffer.begin(); e != reqBuffer.end(); ++e) {
+        if (indexDC == e->second->indexDC
+            && e->second->validEntry
+            //&& confReqBuffer.size() < crbMaxSize
+            ) {
+
+            e->second->conflict = true;
+
+            return true;
+        }
+    }
+
+    return false;
+}
+
+void
+DcacheCtrl::checkConflictInCRB(reqBufferEntry* orbEntry)
+{
+    for (auto e = confReqBuffer.begin(); e != confReqBuffer.end(); ++e) {
+
+        auto entry = *e;
+
+        if (returnIndexDC(entry.second->getAddr(),entry.second->getSize())
+            == orbEntry->indexDC) {
+                orbEntry->conflict = true;
+                break;
+        }
+    }
+}
+
+void
+DcacheCtrl::logStatsDcache(reqBufferEntry* orbEntry)
+{
+    if (orbEntry->owPkt->isRead()) {
+        if (orbEntry->isHit) {
+            assert(orbEntry->drRd != MaxTick);
+            assert(orbEntry->dramRdDevTime != MaxTick);
+
+            long long int stateTick1 = (curTick() - orbEntry->drRd);
+            assert(stateTick1 > 0);
+            stats.timeInDramRead += stateTick1;
+
+            stats.totNumPktsDrRd++;
+            stats.drRdDevTime += orbEntry->dramRdDevTime;
+
+            long long int stateQT1 = stateTick1 -
+                                     (orbEntry->dramRdDevTime);
+            assert(stateQT1 >= 0);
+            stats.drRdQingTime += stateQT1;
+
+            stats.numHits++;
+            stats.numRdHits++;
+        }
+        else {
+            assert(orbEntry->drRd != MaxTick);
+            assert(orbEntry->nvWait != MaxTick);
+            assert(orbEntry->nvRd != MaxTick);
+            assert(orbEntry->drWr != MaxTick);
+            assert(orbEntry->nvmIssueReadyTime != MaxTick);
+            assert(orbEntry->dramRdDevTime != MaxTick);
+            assert(orbEntry->nvmRdDevTime != MaxTick);
+            assert(orbEntry->dramWrDevTime != MaxTick);
+
+            long long int  stateTick1 =
+                           (orbEntry->nvWait - orbEntry->drRd);
+            assert(stateTick1 > 0);
+            stats.timeInDramRead += stateTick1;
+
+            stats.totNumPktsDrRd++;
+            stats.drRdDevTime += orbEntry->dramRdDevTime;
+
+            long long int  stateQT1 = stateTick1 -
+                                      (orbEntry->dramRdDevTime);
+            assert(stateQT1 >= 0);
+            stats.drRdQingTime += stateQT1;
+
+            long long int  stateTick2 =
+            (orbEntry->nvRd - orbEntry->nvWait);
+            assert(stateTick2 >= 0);
+            stats.timeInWaitingToIssueNvmRead += stateTick2;
+
+            if (stateTick2 > 0) {
+                stats.totNumPktsNvmRdWait++;
+            }
+
+            long long int  stateTick3 =
+                           (orbEntry->drWr - orbEntry->nvRd);
+            assert(stateTick3 > 0);
+            stats.timeInNvmRead += stateTick3;
+
+            stats.totNumPktsNvmRd++;
+            stats.nvRdDevTime += orbEntry->nvmRdDevTime;
+
+            long long int  stateQT2 = stateTick2 + stateTick3 -
+                                      (orbEntry->nvmRdDevTime);
+            assert(stateQT2 >= 0);
+            stats.nvmRdQingTime += stateQT2;
+
+            long long int  stateTick4 =
+            (orbEntry->dccPkt->readyTime - curTick());
+            assert(stateTick4 > 0);
+            stats.timeInDramWrite += stateTick4;
+
+            stats.totNumPktsDrWr++;
+            stats.drWrDevTime += orbEntry->dramWrDevTime;
+
+            long long int  stateQT3 = stateTick4 -
+                                      (orbEntry->dramWrDevTime);
+            assert(stateQT3 >= 0);
+            stats.drWrQingTime += stateQT3;
+
+            stats.numMisses++;
+            stats.numRdMisses++;
+        }
+    }
+    else {
+        if (orbEntry->isHit) {
+            assert(orbEntry->drRd != MaxTick);
+            assert(orbEntry->drWr != MaxTick);
+            assert(orbEntry->dramRdDevTime != MaxTick);
+            assert(orbEntry->dramWrDevTime != MaxTick);
+
+            long long int  stateTick1 =
+                           (orbEntry->drWr - orbEntry->drRd);
+            assert(stateTick1 > 0);
+            stats.timeInDramRead += stateTick1;
+
+            stats.totNumPktsDrRd++;
+            stats.drRdDevTime += orbEntry->dramRdDevTime;
+
+            long long int  stateQT1 = stateTick1 -
+                                      (orbEntry->dramRdDevTime);
+            assert(stateQT1 >= 0);
+            stats.drRdQingTime += stateQT1;
+
+            long long int  stateTick2 =
+            (orbEntry->dccPkt->readyTime - curTick());
+            assert(stateTick2 > 0);
+            stats.timeInDramWrite += stateTick2;
+
+            stats.totNumPktsDrWr++;
+            stats.drWrDevTime += orbEntry->dramWrDevTime;
+
+            long long int  stateQT2 =
+                           stateTick2 - (orbEntry->dramWrDevTime);
+            assert(stateQT2 >= 0);
+            stats.drWrQingTime += stateQT2;
+
+            stats.numHits++;
+            stats.numWrHits++;
+        }
+        else {
+            assert(orbEntry->drRd != MaxTick);
+            assert(orbEntry->nvWait != MaxTick);
+            assert(orbEntry->nvRd != MaxTick);
+            assert(orbEntry->drWr != MaxTick);
+            assert(orbEntry->nvmIssueReadyTime != MaxTick);
+            assert(orbEntry->dramRdDevTime != MaxTick);
+            assert(orbEntry->nvmRdDevTime != MaxTick);
+            assert(orbEntry->dramWrDevTime != MaxTick);
+
+            long long int  stateTick1 =
+                            (orbEntry->nvWait - orbEntry->drRd);
+            assert(stateTick1 > 0);
+            stats.timeInDramRead += stateTick1;
+
+            stats.totNumPktsDrRd++;
+            stats.drRdDevTime += orbEntry->dramRdDevTime;
+
+            long long int  stateQT1 = stateTick1 -
+                                      (orbEntry->dramRdDevTime);
+            assert(stateQT1 >= 0);
+            stats.drRdQingTime += stateQT1;
+
+            long long int  stateTick2 =
+            (orbEntry->nvRd - orbEntry->nvWait);
+
+            assert(stateTick2 >= 0);
+            stats.timeInWaitingToIssueNvmRead += stateTick2;
+
+            if (stateTick2 > 0) {
+                stats.totNumPktsNvmRdWait++;
+            }
+
+            long long int  stateTick3 =
+                            (orbEntry->drWr - orbEntry->nvRd);
+            assert(stateTick3 > 0);
+            stats.timeInNvmRead += stateTick3;
+
+            stats.totNumPktsNvmRd++;
+            stats.nvRdDevTime += orbEntry->nvmRdDevTime;
+
+            long long int  stateQT2 = stateTick2 + stateTick3 -
+                                      (orbEntry->nvmRdDevTime);
+            assert(stateQT2 >= 0);
+            stats.nvmRdQingTime += stateQT2;
+
+            long long int  stateTick4 =
+                (orbEntry->dccPkt->readyTime - curTick());
+            assert(stateTick4 > 0);
+            stats.timeInDramWrite += stateTick4;
+
+            stats.totNumPktsDrWr++;
+            stats.drWrDevTime += orbEntry->dramWrDevTime;
+
+            long long int  stateQT3 = stateTick4 -
+                                      (orbEntry->dramWrDevTime);
+            assert(stateQT3 >= 0);
+            stats.drWrQingTime += stateQT3;
+
+            stats.numMisses++;
+            stats.numWrMisses++;
+        }
+    }
+}
+
+bool
+DcacheCtrl::resumeConflictingReq(reqBufferEntry* orbEntry)
+{
+    bool conflictFound = false;
+
+    if (orbEntry->owPkt->isWrite()) {
+        isInWriteQueue.erase(orbEntry->owPkt->getAddr());
+    }
+
+    logStatsDcache(orbEntry);
+
+    for (auto e = confReqBuffer.begin(); e != confReqBuffer.end(); ++e) {
+
+        auto entry = *e;
+
+        if (returnIndexDC(entry.second->getAddr(), entry.second->getSize())
+            == orbEntry->indexDC) {
+
+                conflictFound = true;
+
+                Addr confAddr = entry.second->getAddr();
+
+                reqBuffer.erase(orbEntry->owPkt->getAddr());
+
+                delete orbEntry->owPkt;
+
+                delete orbEntry->dccPkt;
+
+                delete orbEntry;
+
+                handleRequestorPkt(entry.second);
+
+                reqBuffer.at(confAddr)->arrivalTick = entry.first;
+
+                confReqBuffer.erase(e);
+
+                checkConflictInCRB(reqBuffer.at(confAddr));
+
+                if (pktDramRead[0].empty() && !stallRds) {
+                    assert(!dramReadEvent.scheduled());
+                    schedule(dramReadEvent, std::max(nextReqTime, curTick()));
+                } else {
+                    assert(dramReadEvent.scheduled() || stallRds);
+                }
+
+                pktDramRead[0].push_back(reqBuffer.at(confAddr)->dccPkt);
+
+                if (pktDramRead[0].size() > maxDrRdEv) {
+                    maxDrRdEv = pktDramRead[0].size();
+                    stats.maxDrRdEvQ = pktDramRead[0].size();
+                }
+
+                break;
+        }
+
+    }
+
+    if (!conflictFound) {
+
+        reqBuffer.erase(orbEntry->owPkt->getAddr());
+
+        delete orbEntry->owPkt;
+
+        delete orbEntry->dccPkt;
+
+        delete orbEntry;
+    }
+
+    return conflictFound;
+}
+
+Tick
+DcacheCtrl::earliestDirtyLineInDrRdResp()
+{
+    for (int i=0; i<addrDramRespReady.size(); i++) {
+        if (reqBuffer.at(addrDramRespReady.at(i))->handleDirtyLine) {
+            return reqBuffer.at(addrDramRespReady.at(i))->dccPkt->readyTime;
+        }
+    }
+    return MaxTick;
+}
+
+bool
+DcacheCtrl::recvTimingReq(PacketPtr pkt)
+{
+    // This is where we enter from the outside world
+
+    DPRINTF(DcacheCtrl, "recvTimingReq: request %s addr %lld size %d\n",
+            pkt->cmdString(), pkt->getAddr(), pkt->getSize());
+
+    panic_if(pkt->cacheResponding(), "Should not see packets where cache "
+             "is responding");
+
+    panic_if(!(pkt->isRead() || pkt->isWrite()),
+             "Should only see read and writes at memory controller\n");
+
+    // Calc avg gap between requests
+    if (prevArrival != 0) {
+        stats.totGap += curTick() - prevArrival;
+    }
+    prevArrival = curTick();
+
+    // What type of media does this packet access?
+    // We set a flag to make sure every single packet
+    // checks DRAM first.
+    // bool is_dram = true;
+
+    // Validate that pkt's address maps to the dram and nvm
+    assert(nvm && nvm->getAddrRange().contains(pkt->getAddr()));
+    //assert(dram && dram->getAddrRange().contains(pkt->getAddr()));
+
+
+    // Find out how many memory packets a pkt translates to
+    // If the burst size is equal or larger than the pkt size, then a pkt
+    // translates to only one memory packet. Otherwise, a pkt translates to
+    // multiple memory packets
+
+    Addr addr = pkt->getAddr();
+
+    unsigned burst_size = dram->bytesPerBurst();
+
+    unsigned size = std::min((addr | (burst_size - 1)) + 1,
+                             addr + pkt->getSize()) - addr;
+
+    // process merging for writes
+    if (!pkt->isRead()) {
+        stats.writePktSize[ceilLog2(size)]++;
+        stats.writeBursts++;
+        stats.requestorWriteAccesses[pkt->requestorId()]++;
+
+        assert(pkt->getSize() != 0);
+
+        bool merged = isInWriteQueue.find(pkt->getAddr()) !=
+            isInWriteQueue.end();
+
+        if (merged) {
+
+            stats.mergedWrBursts++;
+
+            accessAndRespond(pkt, frontendLatency, false);
+
+            return true;
+        }
+    }
+
+    // process forwarding for reads
+    bool foundInORB = false;
+    bool foundInCRB = false;
+    bool foundInNWB = false;
+
+    if (pkt->isRead()) {
+        stats.readPktSize[ceilLog2(size)]++;
+        stats.readBursts++;
+        stats.requestorReadAccesses[pkt->requestorId()]++;
+
+        assert(pkt->getSize() != 0);
+
+        if (isInWriteQueue.find(pkt->getAddr()) != isInWriteQueue.end()) {
+
+            if (!reqBuffer.empty()) {
+                for (const auto& e : reqBuffer) {
+
+                    // check if the read is subsumed in the write queue
+                    // packet we are looking at
+                    if (e.second->validEntry &&
+                        e.second->owPkt->isWrite() &&
+                        e.second->owPkt->getAddr() <= addr &&
+                        ((addr + size) <=
+                        (e.second->owPkt->getAddr() +
+                        e.second->owPkt->getSize()))) {
+
+                        foundInORB = true;
+
+                        stats.servicedByWrQ++;
+
+                        stats.bytesReadWrQ += burst_size;
+
+                        break;
+                    }
+                }
+            }
+
+            if (!foundInORB && !confReqBuffer.empty()) {
+                for (const auto& e : confReqBuffer) {
+
+                    // check if the read is subsumed in the write queue
+                    // packet we are looking at
+                    if (e.second->isWrite() &&
+                        e.second->getAddr() <= addr &&
+                        ((addr + size) <=
+                        (e.second->getAddr() + e.second->getSize()))) {
+
+                        foundInCRB = true;
+
+                        stats.servicedByWrQ++;
+
+                        stats.bytesReadWrQ += burst_size;
+
+                        break;
+                    }
+                }
+            }
+
+            if (!foundInORB && !foundInCRB && !pktNvmWrite[0].empty()) {
+                for (int i=0; i<pktNvmWrite[0].size(); i++) {
+                    // check if the read is subsumed in the write queue
+                    // packet we are looking at
+                    if (pktNvmWrite[0].at(i)->getAddr() <= addr &&
+                        ((addr + size) <=
+                        (pktNvmWrite[0].at(i)->getAddr() +
+                         pktNvmWrite[0].at(i)->getSize()))) {
+
+                        foundInNWB = true;
+
+                        stats.servicedByWrQ++;
+
+                        stats.bytesReadWrQ += burst_size;
+
+                        break;
+                    }
+                }
+            }
+        }
+
+        if (foundInORB || foundInCRB || foundInNWB) {
+
+            accessAndRespond(pkt, frontendLatency, false);
+
+            return true;
+        }
+    }
+
+    // process conflicting requests
+    // calculate dram address: ignored for now (because Dsize=Nsize)
+    if (checkConflictInDramCache(pkt)) {
+
+        stats.totNumConf++;
+
+        if (confReqBuffer.size()>=crbMaxSize) {
+
+            stats.totNumConfBufFull++;
+
+            retry = true;
+
+            if (pkt->isRead()) {
+                stats.numRdRetry++;
+            }
+            else {
+                stats.numWrRetry++;
+            }
+
+            return false;
+        }
+
+        confReqBuffer.push_back(std::make_pair(curTick(), pkt));
+
+        if (pkt->isWrite()) {
+            isInWriteQueue.insert(pkt->getAddr());
+        }
+
+        if (confReqBuffer.size() > maxConf) {
+            maxConf = confReqBuffer.size();
+            stats.maxNumConf = confReqBuffer.size();
+        }
+
+        return true;
+    }
+
+    // process cases where ORB is full
+    if (reqBuffer.size() >= orbMaxSize) {
+
+        retry = true;
+
+        if (pkt->isRead()) {
+            stats.numRdRetry++;
+        }
+        else {
+            stats.numWrRetry++;
+        }
+
+        return false;
+    }
+
+    // if none of the above cases happens,
+    // then add the pkt to the outstanding requests buffer
+
+    handleRequestorPkt(pkt);
+
+    if (pkt->isWrite()) {
+        isInWriteQueue.insert(pkt->getAddr());
+    }
+
+    if (pktDramRead[0].empty() && !stallRds) {
+
+        assert(!dramReadEvent.scheduled());
+
+        schedule(dramReadEvent, std::max(nextReqTime, curTick()));
+
+    } else {
+        assert(dramReadEvent.scheduled() || stallRds);
+    }
+
+    pktDramRead[0].push_back(reqBuffer.at(pkt->getAddr())->dccPkt);
+
+    if (pktDramRead[0].size() > maxDrRdEv) {
+        maxDrRdEv = pktDramRead[0].size();
+        stats.maxDrRdEvQ = pktDramRead[0].size();
+    }
+
+    return true;
+}
+
+void
+DcacheCtrl::processDramReadEvent()
+{
+    if (stallRds) {
+        return;
+    }
+
+    assert(!pktDramRead[0].empty());
+
+    MemPacketQueue::iterator to_read;
+
+    bool read_found = false;
+
+    bool switched_cmd_type = (busState == DcacheCtrl::WRITE);
+
+    if (switched_cmd_type) {
+        stats.wrToRdTurnAround++;
+    }
+
+    for (auto queue = pktDramRead.rbegin();
+                 queue != pktDramRead.rend(); ++queue) {
+        to_read = chooseNext((*queue), switched_cmd_type ?
+                                minWriteToReadDataGap() : 0, true);
+        if (to_read != queue->end()) {
+            // candidate read found
+            read_found = true;
+            break;
+        }
+    }
+
+    if (!read_found) {
+
+        schedule(dramReadEvent,
+                 std::max(nextReqTime, curTick()+dram->getTBurst()));
+
+        return;
+    }
+
+    reqBufferEntry* orbEntry = reqBuffer.at((*to_read)->getAddr());
+
+    // sanity check for the chosen packet
+    assert(orbEntry->validEntry);
+    assert(orbEntry->dccPkt->isDram());
+    assert(orbEntry->dccPkt->isRead());
+    assert(orbEntry->state == dramRead);
+
+    if (orbEntry->handleDirtyLine) {
+        if (pktNvmWrite[0].size() >= nvm->getMaxPendingWrites()) {
+            stallRds = true;
+            drainNvmWrite = true;
+            if (!overallWriteEvent.scheduled()) {
+                    schedule(overallWriteEvent,
+                             std::max(nextReqTime, curTick()));
+            }
+            return;
+        }
+
+        if (numDirtyLinesInDrRdRespQ >= nvm->getMaxPendingWrites()) {
+            Tick schedTick = earliestDirtyLineInDrRdResp();
+            assert(schedTick != MaxTick);
+            schedule(dramReadEvent, std::max(nextReqTime, schedTick+1));
+            return;
+        }
+
+        if (nvm->writeRespQueueFull()) {
+            assert(!dramReadEvent.scheduled());
+            schedule(dramReadEvent, std::max(nextReqTime,
+                                    nvm->writeRespQueueFront()+2));
+            return;
+        }
+    }
+
+    busState = DcacheCtrl::READ;
+
+    assert(packetReady(orbEntry->dccPkt));
+
+    Tick cmd_at = doBurstAccess(orbEntry->dccPkt);
+
+    orbEntry->dramRdDevTime = orbEntry->dccPkt->readyTime - cmd_at;
+
+    // sanity check
+    assert(orbEntry->dccPkt->size <= (orbEntry->dccPkt->isDram() ?
+                                        dram->bytesPerBurst() :
+                                        nvm->bytesPerBurst()));
+    assert(orbEntry->dccPkt->readyTime >= curTick());
+
+    if (orbEntry->owPkt->isRead() && orbEntry->isHit) {
+        logResponse(DcacheCtrl::READ,
+                    orbEntry->dccPkt->requestorId(),
+                    orbEntry->dccPkt->qosValue(),
+                    orbEntry->owPkt->getAddr(), 1,
+                    orbEntry->dccPkt->readyTime - orbEntry->dccPkt->entryTime);
+    }
+
+    if (addrDramRespReady.empty()) {
+        assert(!respDramReadEvent.scheduled());
+        schedule(respDramReadEvent, orbEntry->dccPkt->readyTime);
+    }
+    else {
+        assert(reqBuffer.at(addrDramRespReady.back())->dccPkt->readyTime
+                            <= orbEntry->dccPkt->readyTime);
+
+        assert(respDramReadEvent.scheduled());
+    }
+
+    addrDramRespReady.push_back(orbEntry->owPkt->getAddr());
+
+    if (addrDramRespReady.size() > maxDrRdRespEv) {
+        maxDrRdRespEv = addrDramRespReady.size();
+        stats.maxDrRdRespEvQ = addrDramRespReady.size();
+    }
+
+    if (orbEntry->handleDirtyLine) {
+        numDirtyLinesInDrRdRespQ++;
+    }
+
+    //** keep the state as it is, no transition
+    orbEntry->state = dramRead;
+
+    pktDramRead[0].erase(to_read);
+
+    if (!pktDramRead[0].empty()) {
+
+        assert(!dramReadEvent.scheduled());
+
+        schedule(dramReadEvent, std::max(nextReqTime, curTick()));
+    }
+
+    unsigned rdsNum = pktDramRead[0].size() +
+                      pktNvmReadWaitIssue[0].size() +
+                      pktNvmRead[0].size();
+    unsigned wrsNum = pktNvmWrite[0].size() +
+                      pktDramWrite[0].size();
+
+    if ((rdsNum == 0 && wrsNum != 0) ||
+        (wrsNum >= writeHighThreshold)) {
+
+        stallRds = true;
+
+        if (!overallWriteEvent.scheduled()) {
+            schedule(overallWriteEvent, std::max(nextReqTime, curTick()));
+        }
+    }
+}
+
+void
+DcacheCtrl::processRespDramReadEvent()
+{
+    assert(!addrDramRespReady.empty());
+
+    reqBufferEntry* orbEntry = reqBuffer.at(addrDramRespReady.front());
+
+    // A series of sanity check
+    assert(orbEntry->validEntry);
+    assert(orbEntry->dccPkt->isDram());
+    assert(orbEntry->dccPkt->isRead());
+    assert(orbEntry->state == dramRead);
+    assert(orbEntry->dccPkt->readyTime == curTick());
+
+    if (orbEntry->handleDirtyLine) {
+        handleDirtyCacheLine(orbEntry);
+    }
+
+    // A flag which is used for retrying read requests
+    // in case one slot in ORB becomes available here
+    // (happens only for read hits)
+    bool canRetry = false;
+
+    dram->respondEvent(orbEntry->dccPkt->rank);
+
+    // Read Hit
+    if (orbEntry->owPkt->isRead() &&
+        orbEntry->dccPkt->isDram() &&
+        orbEntry->isHit) {
+            PacketPtr copyOwPkt = new Packet(orbEntry->owPkt,
+                                             false,
+                                             orbEntry->owPkt->isRead());
+
+            accessAndRespond(orbEntry->owPkt,
+                             frontendLatency + backendLatency,
+                             false);
+            reqBuffer.at(copyOwPkt->getAddr()) =  new reqBufferEntry(
+                                orbEntry->validEntry,
+                                orbEntry->arrivalTick,
+                                orbEntry->tagDC, orbEntry->indexDC,
+                                copyOwPkt,
+                                orbEntry->dccPkt,
+                                orbEntry->state,
+                                orbEntry->isHit,
+                                orbEntry->conflict,
+                                orbEntry->dirtyLineAddr,
+                                orbEntry->handleDirtyLine,
+                                orbEntry->drRd,
+                                orbEntry->drWr,
+                                orbEntry->nvWait,
+                                orbEntry->nvRd,
+                                orbEntry->nvWr,
+                                orbEntry->nvmIssueReadyTime,
+                                orbEntry->dramRdDevTime,
+                                orbEntry->dramWrDevTime,
+                                orbEntry->nvmRdDevTime);
+            delete orbEntry;
+
+            orbEntry = reqBuffer.at(addrDramRespReady.front());
+    }
+
+    // Write Hit
+    if (orbEntry->owPkt->isWrite() &&
+        orbEntry->dccPkt->isRead() &&
+        orbEntry->dccPkt->isDram() &&
+        orbEntry->isHit) {
+            // This is a write request in initial read state.
+            // Delete its dcc packet which is read and create
+            // a new one which is write.
+            delete orbEntry->dccPkt;
+
+            orbEntry->dccPkt = dram->decodePacket(orbEntry->owPkt,
+                                                  orbEntry->owPkt->getAddr(),
+                                                  orbEntry->owPkt->getSize(),
+                                                  false, true);
+            orbEntry->dccPkt->entryTime = orbEntry->arrivalTick;
+
+            // pass the second argument "false" to
+            // indicate a write access to dram
+            dram->setupRank(orbEntry->dccPkt->rank, false);
+
+            //** transition to dramWrite
+            orbEntry->state = dramWrite;
+            orbEntry->drWr = curTick();
+
+            pktDramWrite[0].push_back(orbEntry->dccPkt);
+
+            if (pktDramWrite[0].size() >= (orbMaxSize*dramWrDrainPerc)) {
+                stallRds = true;
+                drainDramWrite = true;
+                if (!overallWriteEvent.scheduled()) {
+                    schedule(overallWriteEvent,
+                             std::max(nextReqTime, curTick()));
+                }
+            }
+
+            unsigned rdsNum = pktDramRead[0].size() +
+                              pktNvmReadWaitIssue[0].size() +
+                              pktNvmRead[0].size();
+            unsigned wrsNum = pktNvmWrite[0].size() +
+                              pktDramWrite[0].size();
+
+            if ((rdsNum == 0 && wrsNum != 0) ||
+                (wrsNum >= writeHighThreshold)) {
+
+                stallRds = true;
+
+                if (!overallWriteEvent.scheduled()) {
+                    schedule(overallWriteEvent,
+                             std::max(nextReqTime, curTick()));
+                }
+
+            }
+
+            if (pktDramWrite[0].size() > maxDrWrEv) {
+                maxDrWrEv = pktDramWrite[0].size();
+                stats.maxDrWrEvQ = pktDramWrite[0].size();
+            }
+    }
+
+    // Miss
+    if ((orbEntry->owPkt->isRead() &&
+         orbEntry->dccPkt->isRead() &&
+         orbEntry->dccPkt->isDram() &&
+         !orbEntry->isHit) ||
+        (orbEntry->owPkt->isWrite() &&
+         orbEntry->dccPkt->isRead() &&
+         orbEntry->dccPkt->isDram() &&
+         !orbEntry->isHit)) {
+        // initiate a NVM read
+        // delete the current dcc pkt which is dram read.
+        delete orbEntry->dccPkt;
+
+        // creating an nvm read dcc-pkt
+        orbEntry->dccPkt = nvm->decodePacket(orbEntry->owPkt,
+                                             orbEntry->owPkt->getAddr(),
+                                             orbEntry->owPkt->getSize(),
+                                             true, false);
+        orbEntry->dccPkt->entryTime = orbEntry->arrivalTick;
+
+        // pass the second argument "true" to
+        // indicate a read access to nvm
+        nvm->setupRank(orbEntry->dccPkt->rank, true);
+
+        // ready time will be calculated later in doBurstAccess
+        // in processNvmReadEvent
+        orbEntry->dccPkt->readyTime = MaxTick;
+
+        //** transition to waitingToIssueNvmRead
+        // setting the state to waitingToIssueNvmRead
+        orbEntry->state = waitingToIssueNvmRead;
+        orbEntry->nvWait = curTick();
+
+        if (pktNvmReadWaitIssue[0].empty() && !stallRds) {
+            assert(!waitingToIssueNvmReadEvent.scheduled());
+            schedule(waitingToIssueNvmReadEvent, curTick());
+        }
+        else {
+            assert(waitingToIssueNvmReadEvent.scheduled() || stallRds);
+        }
+
+        pktNvmReadWaitIssue[0].push_back(orbEntry->dccPkt);
+
+        if (pktNvmReadWaitIssue[0].size() > maxNvRdIssEv) {
+            maxNvRdIssEv = pktNvmReadWaitIssue[0].size();
+            stats.maxNvRdIssEvQ = pktNvmReadWaitIssue[0].size();
+        }
+    }
+
+    if (orbEntry->handleDirtyLine) {
+        numDirtyLinesInDrRdRespQ--;
+    }
+
+    addrDramRespReady.pop_front();
+
+    if (!addrDramRespReady.empty()) {
+        assert(reqBuffer.at(addrDramRespReady.front())->dccPkt->readyTime
+                >= curTick());
+        assert(!respDramReadEvent.scheduled());
+        schedule(respDramReadEvent,
+                 reqBuffer.at(addrDramRespReady.front())->dccPkt->readyTime);
+    } else {
+
+        unsigned rdsNum = pktDramRead[0].size() +
+                          pktNvmReadWaitIssue[0].size() +
+                          pktNvmRead[0].size();
+        unsigned wrsNum = pktNvmWrite[0].size() +
+                          pktDramWrite[0].size();
+
+        // if there is nothing left in any queue, signal a drain
+        if (drainState() == DrainState::Draining &&
+            !wrsNum && !rdsNum &&
+            allIntfDrained()) {
+            DPRINTF(Drain, "Controller done draining\n");
+            signalDrainDone();
+        } else if (orbEntry->owPkt->isRead() &&
+                   orbEntry->dccPkt->isDram() &&
+                   orbEntry->isHit) {
+            // check the refresh state and kick the refresh event loop
+            // into action again if banks already closed and just waiting
+            // for read to complete
+            dram->checkRefreshState(orbEntry->dccPkt->rank);
+        }
+    }
+
+    if (orbEntry->owPkt->isRead() &&
+        orbEntry->dccPkt->isDram() &&
+        orbEntry->isHit) {
+            // Remove the request from the ORB and
+            // bring in a conflicting req waiting
+            // in the CRB, if any.
+            canRetry = !resumeConflictingReq(orbEntry);
+    }
+
+    if (retry && canRetry) {
+        retry = false;
+        port.sendRetryReq();
+    }
+}
+
+void
+DcacheCtrl::processWaitingToIssueNvmReadEvent()
+{
+    if (stallRds) {
+        return;
+    }
+
+    if (nvm->readsWaitingToIssue()) {
+        assert(!pktNvmReadWaitIssue[0].empty());
+
+        MemPacketQueue::iterator to_read;
+
+        bool read_found = false;
+
+        bool switched_cmd_type = (busState == DcacheCtrl::WRITE);
+
+        if (switched_cmd_type) {
+            stats.wrToRdTurnAround++;
+        }
+
+        for (auto queue = pktNvmReadWaitIssue.rbegin();
+                    queue != pktNvmReadWaitIssue.rend(); ++queue) {
+            to_read = chooseNext((*queue), switched_cmd_type ?
+                                    minWriteToReadDataGap() : 0, false);
+            if (to_read != queue->end()) {
+                // candidate read found
+                read_found = true;
+                break;
+            }
+        }
+
+        auto e = reqBuffer.at(pktNvmReadWaitIssue[0].front()->getAddr());
+
+        if (read_found) {
+            e = reqBuffer.at((*to_read)->getAddr());
+        }
+
+        assert(e->validEntry);
+        assert(e->state == waitingToIssueNvmRead);
+        assert(!e->dccPkt->isDram());
+        assert(!e->isHit);
+
+        nvm->processReadPkt(e->dccPkt);
+
+        e->nvmIssueReadyTime = e->dccPkt->readyTime;
+
+        //** transition to nvmread
+        e->state = nvmRead;
+        e->nvRd = e->dccPkt->readyTime;
+
+        if (pktNvmRead[0].empty()) {
+            assert(!nvmReadEvent.scheduled());
+            schedule(nvmReadEvent, std::max(nextReqTime,
+                                    e->dccPkt->readyTime+1));
+        } else if (nvmReadEvent.when() > e->dccPkt->readyTime) {
+            // move it sooner in time, to the first read with data
+            reschedule(nvmReadEvent, std::max(nextReqTime,
+                                    e->dccPkt->readyTime+1));
+        } else {
+            assert(nvmReadEvent.scheduled());
+        }
+
+        pktNvmRead[0].push_back(e->dccPkt);
+
+        if (pktNvmRead[0].size() > maxNvRdEv) {
+            maxNvRdEv = pktNvmRead[0].size();
+            stats.maxNvRdEvQ = pktNvmRead[0].size();
+        }
+
+        if (read_found) {
+            pktNvmReadWaitIssue[0].erase(to_read);
+        }
+        else {
+            pktNvmReadWaitIssue[0].erase(pktNvmReadWaitIssue[0].begin());
+        }
+
+        unsigned rdsNum = pktDramRead[0].size() +
+                        pktNvmReadWaitIssue[0].size() +
+                        pktNvmRead[0].size();
+        unsigned wrsNum = pktNvmWrite[0].size() +
+                        pktDramWrite[0].size();
+
+        if ((rdsNum == 0 && wrsNum != 0) ||
+            (wrsNum >= writeHighThreshold)) {
+
+            stallRds = true;
+
+            if (!overallWriteEvent.scheduled()) {
+                schedule(overallWriteEvent, std::max(nextReqTime, curTick()));
+            }
+        }
+    }
+
+    else {
+        assert(!pktNvmRead[0].empty());
+        schedule(waitingToIssueNvmReadEvent, nvmReadEvent.when()+2);
+        return;
+    }
+
+    if (!waitingToIssueNvmReadEvent.scheduled() &&
+        !pktNvmReadWaitIssue[0].empty()) {
+        schedule(waitingToIssueNvmReadEvent, curTick());
+    }
+
+}
+
+void
+DcacheCtrl::processNvmReadEvent()
+{
+    if (stallRds) {
+        return;
+    }
+
+    assert(!pktNvmRead[0].empty());
+
+    MemPacketQueue::iterator to_read;
+
+    bool read_found = false;
+
+    bool switched_cmd_type = (busState == DcacheCtrl::WRITE);
+
+    if (switched_cmd_type) {
+        stats.wrToRdTurnAround++;
+    }
+
+    busState = DcacheCtrl::READ;
+
+    for (auto queue = pktNvmRead.rbegin();
+                 queue != pktNvmRead.rend(); ++queue) {
+        to_read = chooseNext((*queue), switched_cmd_type ?
+                                minWriteToReadDataGap() : 0, false);
+        if (to_read != queue->end()) {
+            // candidate read found
+            read_found = true;
+            break;
+        }
+    }
+
+    reqBufferEntry* e;
+
+    int index = 0;
+
+    if (read_found) {
+        e = reqBuffer.at((*to_read)->getAddr());
+    }
+    else {
+        schedule(nvmReadEvent,
+                 std::max(nextReqTime, curTick()+nvm->getTBurst()));
+
+        return;
+    }
+
+    assert(e->validEntry);
+    assert(!e->isHit);
+    assert(!e->dccPkt->isDram());
+    assert(e->state == nvmRead);
+
+    assert(packetReady(e->dccPkt));
+
+    Tick cmd_at = doBurstAccess(e->dccPkt);
+
+    e->nvmRdDevTime = e->dccPkt->readyTime - cmd_at;
+
+    // sanity check
+    assert(e->dccPkt->size <= (e->dccPkt->isDram() ?
+                                dram->bytesPerBurst() :
+                                nvm->bytesPerBurst()));
+    assert(e->dccPkt->readyTime >= curTick());
+
+    if (e->owPkt->isRead() && !e->isHit) {
+        logResponse(DcacheCtrl::READ,
+                    e->dccPkt->requestorId(),
+                    e->dccPkt->qosValue(),
+                    e->owPkt->getAddr(), 1,
+                    e->dccPkt->readyTime - e->dccPkt->entryTime);
+    }
+
+    if (addrNvmRespReady.empty()) {
+        assert(!respNvmReadEvent.scheduled());
+        schedule(respNvmReadEvent, e->dccPkt->readyTime);
+    }
+    else {
+        assert(reqBuffer.at(addrNvmRespReady.back())->dccPkt->readyTime
+                            <= e->dccPkt->readyTime);
+
+        assert(respNvmReadEvent.scheduled());
+    }
+
+    addrNvmRespReady.push_back(e->owPkt->getAddr());
+
+    if (addrNvmRespReady.size() > maxNvRdRespEv) {
+        maxNvRdRespEv = addrNvmRespReady.size();
+        stats.maxNvRdRespEvQ = addrNvmRespReady.size();
+    }
+
+    //** keeping the state as it is, no transition
+    e->state = nvmRead;
+
+    if (read_found) {
+        pktNvmRead[0].erase(to_read);
+    }
+    else {
+        pktNvmRead[0].erase(pktNvmRead[0].begin()+index);
+    }
+
+    if (!pktNvmRead[0].empty()) {
+        assert(!nvmReadEvent.scheduled());
+        auto min = pktNvmRead[0].front();
+        //index = 0;
+        for (int i=0; i<pktNvmRead[0].size(); i++) {
+            if (min->readyTime > pktNvmRead[0].at(i)->readyTime &&
+                min->readyTime != pktNvmRead[0].at(i)->readyTime) {
+                min = pktNvmRead[0].at(i);
+                //index = i;
+            }
+        }
+        Tick maxTick = std::max(nextReqTime, curTick());
+        schedule(nvmReadEvent, std::max(maxTick, min->readyTime+1));
+    }
+
+    unsigned rdsNum = pktDramRead[0].size() +
+                      pktNvmReadWaitIssue[0].size() +
+                      pktNvmRead[0].size();
+    unsigned wrsNum = pktNvmWrite[0].size() +
+                      pktDramWrite[0].size();
+
+    if (//(rdsNum == 0 && wrsNum != 0 && wrsNum >= writeLowThreshold) ||
+        (rdsNum == 0 && wrsNum != 0) ||
+        (wrsNum >= writeHighThreshold)) {
+
+        stallRds = true;
+
+        if (!overallWriteEvent.scheduled()) {
+            schedule(overallWriteEvent, std::max(nextReqTime, curTick()));
+        }
+    }
+}
+
+void
+DcacheCtrl::processRespNvmReadEvent()
+{
+    assert(!addrNvmRespReady.empty());
+
+    reqBufferEntry* orbEntry = reqBuffer.at(addrNvmRespReady.front());
+
+    // A series of sanity check
+    assert(orbEntry->validEntry);
+    assert(orbEntry->dccPkt->isRead());
+    assert(!orbEntry->dccPkt->isDram());
+    assert(orbEntry->state == nvmRead);
+    assert(!orbEntry->isHit);
+    assert(orbEntry->dccPkt->readyTime == curTick());
+
+    // Read miss from dram cache, now is available
+    // to send the response back to requestor
+    if (orbEntry->owPkt->isRead() && !orbEntry->isHit) {
+
+        PacketPtr copyOwPkt = new Packet(orbEntry->owPkt,
+                                         false,
+                                         orbEntry->owPkt->isRead());
+
+        accessAndRespond(orbEntry->owPkt,
+                         frontendLatency + backendLatency,
+                         false);
+        reqBuffer.at(copyOwPkt->getAddr()) =  new reqBufferEntry(
+                            orbEntry->validEntry,
+                            orbEntry->arrivalTick,
+                            orbEntry->tagDC, orbEntry->indexDC,
+                            copyOwPkt,
+                            orbEntry->dccPkt,
+                            orbEntry->state,
+                            orbEntry->isHit,
+                            orbEntry->conflict,
+                            orbEntry->dirtyLineAddr,
+                            orbEntry->handleDirtyLine,
+                            orbEntry->drRd,
+                            orbEntry->drWr,
+                            orbEntry->nvWait,
+                            orbEntry->nvRd,
+                            orbEntry->nvWr,
+                            orbEntry->nvmIssueReadyTime,
+                            orbEntry->dramRdDevTime,
+                            orbEntry->dramWrDevTime,
+                            orbEntry->nvmRdDevTime);
+        delete orbEntry;
+        orbEntry = reqBuffer.at(addrNvmRespReady.front());
+
+    }
+
+    // There has been a DRAM cache miss,
+    // initiate a DRAM write to bring it to DRAM cache
+    delete orbEntry->dccPkt;
+
+    // creating a new dram write dcc-pkt
+    orbEntry->dccPkt = dram->decodePacket(orbEntry->owPkt,
+                                            orbEntry->owPkt->getAddr(),
+                                            orbEntry->owPkt->getSize(),
+                                            false,
+                                            true);
+    orbEntry->dccPkt->entryTime = orbEntry->arrivalTick;
+
+    // pass the second argument "false" to
+    // indicate a write access to dram
+    dram->setupRank(orbEntry->dccPkt->rank, false);
+
+    //** transition to dramWrite
+    // update the state of the orb entry
+    orbEntry->state = dramWrite;
+    orbEntry->drWr = curTick();
+
+    assert(orbEntry->dccPkt->isDram());
+    assert(orbEntry->dccPkt->isWrite());
+    assert(orbEntry->state==dramWrite);
+    pktDramWrite[0].push_back(reqBuffer.at(addrNvmRespReady.front())->dccPkt);
+
+    if (pktDramWrite[0].size() >= (orbMaxSize*dramWrDrainPerc)) {
+
+        stallRds = true;
+
+        drainDramWrite = true;
+
+        if (!overallWriteEvent.scheduled()) {
+            schedule(overallWriteEvent, std::max(nextReqTime, curTick()));
+        }
+    }
+
+    unsigned rdsNum = pktDramRead[0].size() +
+                      pktNvmReadWaitIssue[0].size() +
+                      pktNvmRead[0].size();
+    unsigned wrsNum = pktNvmWrite[0].size() +
+                      pktDramWrite[0].size();
+
+    if ((rdsNum == 0 && wrsNum != 0) ||
+        (wrsNum >= writeHighThreshold)) {
+
+        stallRds = true;
+
+        if (!overallWriteEvent.scheduled()) {
+            schedule(overallWriteEvent, std::max(nextReqTime, curTick()));
+        }
+    }
+
+    if (pktDramWrite[0].size() > maxDrWrEv) {
+        maxDrWrEv = pktDramWrite[0].size();
+        stats.maxDrWrEvQ = pktDramWrite[0].size();
+    }
+
+    addrNvmRespReady.pop_front();
+
+    if (!addrNvmRespReady.empty()) {
+        assert(reqBuffer.at(addrNvmRespReady.front())->dccPkt->readyTime
+                >= curTick());
+        assert(!respNvmReadEvent.scheduled());
+        schedule(respNvmReadEvent,
+        reqBuffer.at(addrNvmRespReady.front())->dccPkt->readyTime);
+    }
+}
+
+void
+DcacheCtrl::processOverallWriteEvent()
+{
+    assert(stallRds);
+
+    assert(!pktDramWrite[0].empty() || !pktNvmWrite[0].empty());
+
+    if (drainDramWrite) {
+
+        drWrCounter++;
+
+        MemPacketQueue::iterator to_write;
+
+        bool write_found = false;
+
+        bool switched_cmd_type = (busState == DcacheCtrl::READ);
+
+        if (switched_cmd_type) {
+            stats.rdToWrTurnAround++;
+        }
+
+        for (auto queue = pktDramWrite.rbegin();
+                    queue != pktDramWrite.rend(); ++queue) {
+            to_write = chooseNext((*queue), switched_cmd_type ?
+                                    minReadToWriteDataGap() : 0, true);
+            if (to_write != queue->end()) {
+                // candidate write found
+                write_found = true;
+                break;
+            }
+        }
+
+        if (!write_found) {
+
+            schedule(overallWriteEvent,
+                     std::max(nextReqTime, curTick()+dram->getTBurst()));
+
+            return;
+        }
+
+        auto e = reqBuffer.at(pktDramWrite[0].front()->getAddr());
+
+        if (write_found) {
+            e = reqBuffer.at((*to_write)->getAddr());
+        }
+
+        bool canRetry = false;
+
+        assert(e->validEntry);
+        if (e->owPkt->isRead()) {
+            assert(!e->isHit);
+        }
+        assert(e->dccPkt->isDram());
+        assert(e->state == dramWrite);
+        assert(e->dccPkt->size <=
+                                (e->dccPkt->isDram() ?
+                                dram->bytesPerBurst() :
+                                nvm->bytesPerBurst()) );
+
+        busState = DcacheCtrl::WRITE;
+
+        assert(packetReady(e->dccPkt));
+
+        Tick cmd_at = doBurstAccess(e->dccPkt);
+
+        e->dramWrDevTime = e->dccPkt->readyTime - cmd_at;
+
+        if (e->owPkt->isWrite()) {
+            // log the response
+            logResponse(DcacheCtrl::WRITE,
+                        e->dccPkt->requestorId(),
+                        e->dccPkt->qosValue(),
+                        e->owPkt->getAddr(), 1,
+                        e->dccPkt->readyTime -
+                        e->dccPkt->entryTime);
+        }
+
+        // Remove the request from the ORB and
+        // bring in a conflicting req waiting
+        // in the CRB, if any.
+        canRetry = !resumeConflictingReq(e);
+
+        if (write_found) {
+            pktDramWrite[0].erase(to_write);
+        }
+        else {
+            pktDramWrite[0].erase(pktDramWrite[0].begin());
+        }
+
+        if (retry && canRetry) {
+            retry = false;
+            port.sendRetryReq();
+        }
+
+        if (drWrCounter < minWritesPerSwitch && !pktDramWrite[0].empty()) {
+
+            assert(!overallWriteEvent.scheduled());
+
+            drainDramWrite = true;
+
+            stallRds = true;
+
+            schedule(overallWriteEvent, std::max(nextReqTime, curTick()));
+
+            return;
+        }
+        else if (drainNvmWrite) {
+
+            assert(!pktNvmWrite[0].empty());
+
+            assert(!overallWriteEvent.scheduled());
+
+            drainDramWrite = false;
+
+            stallRds = true;
+
+            schedule(overallWriteEvent, std::max(nextReqTime, curTick()));
+
+            return;
+        }
+        else if ((pktDramRead[0].empty() && pktNvmReadWaitIssue[0].empty()
+                    && pktNvmRead[0].empty()) &&
+                 (!pktDramWrite[0].empty() || !pktNvmWrite[0].empty())) {
+
+            assert(!overallWriteEvent.scheduled());
+
+            drainDramWrite = false;
+
+            drainNvmWrite = false;
+
+            stallRds = true;
+
+            drWrCounter = 0;
+
+            nvWrCounter = 0;
+
+            schedule(overallWriteEvent, std::max(nextReqTime, curTick()));
+
+            return;
+
+        }
+        else {
+            drainDramWrite = false;
+            stallRds = false;
+            drWrCounter = 0;
+            nvWrCounter = 0;
+
+            if (!pktDramRead[0].empty() && !dramReadEvent.scheduled()) {
+                schedule(dramReadEvent, std::max(nextReqTime, curTick()));
+            }
+            if (!pktNvmReadWaitIssue[0].empty() &&
+                !waitingToIssueNvmReadEvent.scheduled()) {
+                schedule(waitingToIssueNvmReadEvent,
+                std::max(nextReqTime, curTick()));
+            }
+            if (!pktNvmRead[0].empty() && !nvmReadEvent.scheduled()) {
+                schedule(nvmReadEvent, std::max(nextReqTime, curTick()));
+            }
+            return;
+        }
+    }
+
+    if (drainNvmWrite) {
+
+        if (!nvm->writeRespQueueFull()) {
+
+            nvWrCounter++;
+
+            MemPacketQueue::iterator to_write;
+
+            bool write_found = false;
+
+            bool switched_cmd_type = (busState == DcacheCtrl::READ);
+
+            if (switched_cmd_type) {
+                stats.rdToWrTurnAround++;
+            }
+
+            busState = DcacheCtrl::WRITE;
+
+            for (auto queue = pktNvmWrite.rbegin();
+                        queue != pktNvmWrite.rend(); ++queue) {
+                to_write = chooseNext((*queue), switched_cmd_type ?
+                                        minReadToWriteDataGap() : 0, false);
+                if (to_write != queue->end()) {
+                    // candidate write found
+                    write_found = true;
+                    break;
+                }
+            }
+
+            auto e = pktNvmWrite[0].front();
+
+            if (write_found) {
+                e = (*to_write);
+            }
+            else {
+
+                schedule(overallWriteEvent,
+                         std::max(nextReqTime, curTick()+nvm->getTBurst()));
+
+                return;
+            }
+
+            // a series of sanity checks
+            assert(!e->isDram());
+            assert(e->isWrite());
+            assert(e->size <= nvm->bytesPerBurst());
+
+            assert(packetReady(e));
+
+            Tick cmd_at = doBurstAccess(e);
+
+            long long int stateTick =
+                (e->readyTime - e->entryTime);
+
+            assert(stateTick > 0);
+
+            stats.timeInNvmWrite += stateTick;
+
+            stats.totNumPktsNvmWr++;
+
+            stats.nvWrDevTime += (e->readyTime - cmd_at);
+
+            long long int  stateQT =
+                    (cmd_at - e->entryTime);
+
+            assert(stateQT >= 0);
+
+            stats.nvmWrQingTime += stateQT;
+
+            delete e;
+
+            if (write_found) {
+                pktNvmWrite[0].erase(to_write);
+            }
+            else {
+                pktNvmWrite[0].erase(pktNvmWrite[0].begin());
+            }
+
+            if (nvWrCounter < minWritesPerSwitch && !pktNvmWrite[0].empty()) {
+                assert(!overallWriteEvent.scheduled());
+
+                drainNvmWrite = true;
+
+                stallRds = true;
+
+                schedule(overallWriteEvent, std::max(nextReqTime, curTick()));
+
+                return;
+            }
+            else if ((pktDramRead[0].empty() && pktNvmReadWaitIssue[0].empty()
+                        && pktNvmRead[0].empty()) &&
+                    (!pktDramWrite[0].empty() || !pktNvmWrite[0].empty())) {
+
+                assert(!overallWriteEvent.scheduled());
+
+                drainDramWrite = false;
+
+                drainNvmWrite = false;
+
+                stallRds = true;
+
+                drWrCounter = 0;
+
+                nvWrCounter = 0;
+
+                schedule(overallWriteEvent, std::max(nextReqTime, curTick()));
+
+                return;
+
+            }
+            else {
+                drainNvmWrite = false;
+                stallRds = false;
+                drWrCounter = 0;
+                nvWrCounter = 0;
+
+                if (!pktDramRead[0].empty() && !dramReadEvent.scheduled()) {
+                    schedule(dramReadEvent, std::max(nextReqTime, curTick()));
+                }
+                if (!pktNvmReadWaitIssue[0].empty() &&
+                    !waitingToIssueNvmReadEvent.scheduled()) {
+                    schedule(waitingToIssueNvmReadEvent,
+                    std::max(nextReqTime, curTick()));
+                }
+                if (!pktNvmRead[0].empty() && !nvmReadEvent.scheduled()) {
+                    schedule(nvmReadEvent, std::max(nextReqTime, curTick()));
+                }
+                return;
+            }
+        }
+
+        if (!overallWriteEvent.scheduled() &&
+            !pktNvmWrite[0].empty() &&
+            nvm->writeRespQueueFull()) {
+                schedule(overallWriteEvent, std::max(nextReqTime,
+                            nvm->writeRespQueueFront()+1));
+                return;
+        }
+    }
+    else if ((!pktDramWrite[0].empty() && drWrCounter < minDrWrPerSwitch) ||
+        (!pktDramWrite[0].empty() && pktNvmWrite[0].empty() &&
+        (drWrCounter+nvWrCounter)<minWritesPerSwitch) ||
+        (pktDramRead[0].empty() && pktNvmReadWaitIssue[0].empty()
+        && pktNvmRead[0].empty()
+        && pktNvmWrite[0].empty() && !pktDramWrite[0].empty())) {
+
+        drWrCounter++;
+
+        MemPacketQueue::iterator to_write;
+
+        bool write_found = false;
+
+        bool switched_cmd_type = (busState == DcacheCtrl::READ);
+
+        if (switched_cmd_type) {
+            stats.rdToWrTurnAround++;
+        }
+
+        for (auto queue = pktDramWrite.rbegin();
+                    queue != pktDramWrite.rend(); ++queue) {
+            to_write = chooseNext((*queue), switched_cmd_type ?
+                                    minReadToWriteDataGap() : 0, true);
+            if (to_write != queue->end()) {
+                // candidate write found
+                write_found = true;
+                break;
+            }
+        }
+
+        if (!write_found) {
+
+            schedule(overallWriteEvent,
+                     std::max(nextReqTime, curTick()+dram->getTBurst()));
+
+            return;
+        }
+
+        auto e = reqBuffer.at(pktDramWrite[0].front()->getAddr());
+
+        if (write_found) {
+            e = reqBuffer.at((*to_write)->getAddr());
+        }
+
+        bool canRetry = false;
+
+        assert(e->validEntry);
+        if (e->owPkt->isRead()) {
+            assert(!e->isHit);
+        }
+        assert(e->dccPkt->isDram());
+        assert(e->state == dramWrite);
+        assert(e->dccPkt->size <=
+                                (e->dccPkt->isDram() ?
+                                dram->bytesPerBurst() :
+                                nvm->bytesPerBurst()) );
+
+        busState = DcacheCtrl::WRITE;
+
+        assert(packetReady(e->dccPkt));
+
+        Tick cmd_at = doBurstAccess(e->dccPkt);
+
+        e->dramWrDevTime = e->dccPkt->readyTime - cmd_at;
+
+        if (e->owPkt->isWrite()) {
+            // log the response
+            logResponse(DcacheCtrl::WRITE,
+                        e->dccPkt->requestorId(),
+                        e->dccPkt->qosValue(),
+                        e->owPkt->getAddr(), 1,
+                        e->dccPkt->readyTime -
+                        e->dccPkt->entryTime);
+        }
+
+        // Remove the request from the ORB and
+        // bring in a conflicting req waiting
+        // in the CRB, if any.
+        canRetry = !resumeConflictingReq(e);
+
+        if (write_found) {
+            pktDramWrite[0].erase(to_write);
+        }
+        else {
+            pktDramWrite[0].erase(pktDramWrite[0].begin());
+        }
+
+        if (retry && canRetry) {
+            retry = false;
+            port.sendRetryReq();
+        }
+    }
+
+    else if ((!pktNvmWrite[0].empty() && nvWrCounter < minNvWrPerSwitch) ||
+             (!pktNvmWrite[0].empty() && pktDramWrite[0].empty() &&
+             (drWrCounter+nvWrCounter)<minWritesPerSwitch) ||
+        (pktDramRead[0].empty() && pktNvmReadWaitIssue[0].empty()
+        && pktNvmRead[0].empty()
+        && pktDramWrite[0].empty() && !pktNvmWrite[0].empty())) {
+
+        if (!nvm->writeRespQueueFull()) {
+
+            nvWrCounter++;
+
+            MemPacketQueue::iterator to_write;
+
+            bool write_found = false;
+
+            bool switched_cmd_type = (busState == DcacheCtrl::READ);
+
+            if (switched_cmd_type) {
+                stats.rdToWrTurnAround++;
+            }
+
+            busState = DcacheCtrl::WRITE;
+
+            for (auto queue = pktNvmWrite.rbegin();
+                        queue != pktNvmWrite.rend(); ++queue) {
+                to_write = chooseNext((*queue), switched_cmd_type ?
+                                        minReadToWriteDataGap() : 0, false);
+                if (to_write != queue->end()) {
+                    // candidate write found
+                    write_found = true;
+                    break;
+                }
+            }
+
+            auto e = pktNvmWrite[0].front();
+
+            if (write_found) {
+                e = (*to_write);
+            }
+            else {
+
+                schedule(overallWriteEvent,
+                         std::max(nextReqTime, curTick()+nvm->getTBurst()));
+
+                return;
+            }
+
+            // a series of sanity checks
+            assert(!e->isDram());
+            assert(e->isWrite());
+            assert(e->size <= nvm->bytesPerBurst());
+
+            assert(packetReady(e));
+
+            Tick cmd_at = doBurstAccess(e);
+
+            long long int stateTick =
+                (e->readyTime - e->entryTime);
+
+            assert(stateTick > 0);
+
+            stats.timeInNvmWrite += stateTick;
+
+            stats.totNumPktsNvmWr++;
+
+            stats.nvWrDevTime += (e->readyTime - cmd_at);
+
+            long long int  stateQT =
+                    (cmd_at - e->entryTime);
+
+            assert(stateQT >= 0);
+
+            stats.nvmWrQingTime += stateQT;
+
+            delete e;
+
+            if (write_found) {
+                pktNvmWrite[0].erase(to_write);
+            }
+            else {
+                pktNvmWrite[0].erase(pktNvmWrite[0].begin());
+            }
+        }
+
+        if (!overallWriteEvent.scheduled() &&
+            !pktNvmWrite[0].empty() &&
+            nvm->writeRespQueueFull()) {
+                schedule(overallWriteEvent, std::max(nextReqTime,
+                            nvm->writeRespQueueFront()+1));
+                return;
+        }
+    }
+
+    if (
+            !overallWriteEvent.scheduled() &&
+            (
+                (
+                    (!pktDramRead[0].empty() || !pktNvmReadWaitIssue[0].empty()
+                        || !pktNvmRead[0].empty()) &&
+                    (!pktDramWrite[0].empty() || !pktNvmWrite[0].empty()) &&
+                    (drWrCounter + nvWrCounter < minWritesPerSwitch)
+                ) ||
+                (
+                    (pktDramRead[0].empty() && pktNvmReadWaitIssue[0].empty()
+                        && pktNvmRead[0].empty()) &&
+                    (!pktDramWrite[0].empty() || !pktNvmWrite[0].empty())
+                )
+            )
+        ) {
+        stallRds = true;
+        schedule(overallWriteEvent, std::max(nextReqTime, curTick()));
+    }
+    else {
+        stallRds = false;
+
+        drWrCounter = 0;
+
+        nvWrCounter = 0;
+
+        if (!pktDramRead[0].empty() && !dramReadEvent.scheduled()) {
+            schedule(dramReadEvent, std::max(nextReqTime, curTick()));
+        }
+        if (!pktNvmReadWaitIssue[0].empty() &&
+            !waitingToIssueNvmReadEvent.scheduled()) {
+            schedule(waitingToIssueNvmReadEvent,
+            std::max(nextReqTime, curTick()));
+        }
+        if (!pktNvmRead[0].empty() && !nvmReadEvent.scheduled()) {
+            schedule(nvmReadEvent, std::max(nextReqTime, curTick()));
+        }
+    }
+}
+
+
+void
+DcacheCtrl::processRespondEvent()
+{
+
+}
+
+void
+DcacheCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency, bool in_dram)
+{
+    DPRINTF(DcacheCtrl, "Responding to Address %lld.. \n",pkt->getAddr());
+
+    bool needsResponse = pkt->needsResponse();
+    // do the actual memory access which also turns the packet into a
+    // response
+    if (in_dram && dram && dram->getAddrRange().contains(pkt->getAddr())) {
+        dram->access(pkt);
+    } else if (!in_dram && nvm &&
+                nvm->getAddrRange().contains(pkt->getAddr())) {
+        nvm->access(pkt);
+    } else {
+        panic("Can't handle address range for packet %s\n",
+              pkt->print());
+    }
+
+    // turn packet around to go back to requestor if response expected
+    if (needsResponse) {
+        // access already turned the packet into a response
+        assert(pkt->isResponse());
+        // response_time consumes the static latency and is charged also
+        // with headerDelay that takes into account the delay provided by
+        // the xbar and also the payloadDelay that takes into account the
+        // number of data beats.
+        Tick response_time = curTick() + static_latency + pkt->headerDelay +
+                             pkt->payloadDelay;
+        // Here we reset the timing of the packet before sending it out.
+        pkt->headerDelay = pkt->payloadDelay = 0;
+
+        // queue the packet in the response queue to be sent out after
+        // the static latency has passed
+        port.schedTimingResp(pkt, response_time);
+    } else {
+        // @todo the packet is going to be deleted, and the MemPacket
+        // is still having a pointer to it
+        pendingDelete.reset(pkt);
+    }
+
+    DPRINTF(DcacheCtrl, "Done\n");
+
+    return;
+}
+
+void
+DcacheCtrl::pruneBurstTick()
+{
+    auto it = burstTicks.begin();
+    while (it != burstTicks.end()) {
+        auto current_it = it++;
+        if (curTick() > *current_it) {
+            DPRINTF(DcacheCtrl, "Removing burstTick for %d\n", *current_it);
+            burstTicks.erase(current_it);
+        }
+    }
+}
+
+Tick
+DcacheCtrl::getBurstWindow(Tick cmd_tick)
+{
+    // get tick aligned to burst window
+    Tick burst_offset = cmd_tick % commandWindow;
+    return (cmd_tick - burst_offset);
+}
+
+Tick
+DcacheCtrl::verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst)
+{
+    // start with assumption that there is no contention on command bus
+    Tick cmd_at = cmd_tick;
+
+    // get tick aligned to burst window
+    Tick burst_tick = getBurstWindow(cmd_tick);
+
+    // verify that we have command bandwidth to issue the command
+    // if not, iterate over next window(s) until slot found
+    while (burstTicks.count(burst_tick) >= max_cmds_per_burst) {
+        DPRINTF(DcacheCtrl, "Contention found on command bus at %d\n",
+                burst_tick);
+        burst_tick += commandWindow;
+        cmd_at = burst_tick;
+    }
+
+    // add command into burst window and return corresponding Tick
+    burstTicks.insert(burst_tick);
+    return cmd_at;
+}
+
+Tick
+DcacheCtrl::verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst,
+                         Tick max_multi_cmd_split)
+{
+    // start with assumption that there is no contention on command bus
+    Tick cmd_at = cmd_tick;
+
+    // get tick aligned to burst window
+    Tick burst_tick = getBurstWindow(cmd_tick);
+
+    // Command timing requirements are from 2nd command
+    // Start with assumption that 2nd command will issue at cmd_at and
+    // find prior slot for 1st command to issue
+    // Given a maximum latency of max_multi_cmd_split between the commands,
+    // find the burst at the maximum latency prior to cmd_at
+    Tick burst_offset = 0;
+    Tick first_cmd_offset = cmd_tick % commandWindow;
+    while (max_multi_cmd_split > (first_cmd_offset + burst_offset)) {
+        burst_offset += commandWindow;
+    }
+    // get the earliest burst aligned address for first command
+    // ensure that the time does not go negative
+    Tick first_cmd_tick = burst_tick - std::min(burst_offset, burst_tick);
+
+    // Can required commands issue?
+    bool first_can_issue = false;
+    bool second_can_issue = false;
+    // verify that we have command bandwidth to issue the command(s)
+    while (!first_can_issue || !second_can_issue) {
+        bool same_burst = (burst_tick == first_cmd_tick);
+        auto first_cmd_count = burstTicks.count(first_cmd_tick);
+        auto second_cmd_count = same_burst ? first_cmd_count + 1 :
+                                   burstTicks.count(burst_tick);
+
+        first_can_issue = first_cmd_count < max_cmds_per_burst;
+        second_can_issue = second_cmd_count < max_cmds_per_burst;
+
+        if (!second_can_issue) {
+            DPRINTF(DcacheCtrl, "Contention (cmd2) found on "
+                                "command bus at %d\n",
+                    burst_tick);
+            burst_tick += commandWindow;
+            cmd_at = burst_tick;
+        }
+
+        // Verify max_multi_cmd_split isn't violated when command 2 is shifted
+        // If commands initially were issued in same burst, they are
+        // now in consecutive bursts and can still issue B2B
+        bool gap_violated = !same_burst &&
+             ((burst_tick - first_cmd_tick) > max_multi_cmd_split);
+
+        if (!first_can_issue || (!second_can_issue && gap_violated)) {
+            DPRINTF(DcacheCtrl, "Contention (cmd1) found on "
+                                "command bus at %d\n",
+                    first_cmd_tick);
+            first_cmd_tick += commandWindow;
+        }
+    }
+
+    // Add command to burstTicks
+    burstTicks.insert(burst_tick);
+    burstTicks.insert(first_cmd_tick);
+
+    return cmd_at;
+}
+
+bool
+DcacheCtrl::inReadBusState(bool next_state) const
+{
+    // check the bus state
+    if (next_state) {
+        // use busStateNext to get the state that will be used
+        // for the next burst
+        return (busStateNext == DcacheCtrl::READ);
+    } else {
+        return (busState == DcacheCtrl::READ);
+    }
+}
+
+bool
+DcacheCtrl::inWriteBusState(bool next_state) const
+{
+    // check the bus state
+    if (next_state) {
+        // use busStateNext to get the state that will be used
+        // for the next burst
+        return (busStateNext == DcacheCtrl::WRITE);
+    } else {
+        return (busState == DcacheCtrl::WRITE);
+    }
+}
+
+Tick
+DcacheCtrl::doBurstAccess(MemPacket* dcc_pkt)
+{
+    // first clean up the burstTick set, removing old entries
+    // before adding new entries for next burst
+    pruneBurstTick();
+
+    // When was command issued?
+    Tick cmd_at;
+
+    // Issue the next burst and update bus state to reflect
+    // when previous command was issued
+    if (dcc_pkt->isDram()) {
+        std::tie(cmd_at, nextBurstAt) =
+                 dram->doBurstAccess(dcc_pkt, nextBurstAt);//, queue);
+
+        // Update timing for NVM ranks if NVM is configured on this channel
+        if (nvm)
+            nvm->addRankToRankDelay(cmd_at);
+
+    } else {
+        std::tie(cmd_at, nextBurstAt) =
+                 nvm->doBurstAccess(dcc_pkt, nextBurstAt);
+
+        // Update timing for NVM ranks if NVM is configured on this channel
+        if (dram)
+            dram->addRankToRankDelay(cmd_at);
+
+    }
+
+    DPRINTF(DcacheCtrl, "Access to %lld, ready at %lld next burst at %lld.\n",
+            dcc_pkt->addr, dcc_pkt->readyTime, nextBurstAt);
+
+    // Update the minimum timing between the requests, this is a
+    // conservative estimate of when we have to schedule the next
+    // request to not introduce any unecessary bubbles. In most cases
+    // we will wake up sooner than we have to.
+    nextReqTime = nextBurstAt - (dram ? dram->commandOffset() :
+                                        nvm->commandOffset());
+
+
+    // Update the common bus stats
+    if (dcc_pkt->pkt != nullptr) {
+        if (dcc_pkt->isRead()) {
+            // Update latency stats
+            stats.requestorReadTotalLat[dcc_pkt->requestorId()] +=
+                dcc_pkt->readyTime - dcc_pkt->entryTime;
+            stats.requestorReadBytes[dcc_pkt->requestorId()] += dcc_pkt->size;
+        } else {
+            stats.requestorWriteBytes[dcc_pkt->requestorId()] += dcc_pkt->size;
+            stats.requestorWriteTotalLat[dcc_pkt->requestorId()] +=
+                dcc_pkt->readyTime - dcc_pkt->entryTime;
+        }
+    }
+    return cmd_at;
+}
+
+void
+DcacheCtrl::processNextReqEvent()
+{
+
+}
+
+bool
+DcacheCtrl::packetReady(MemPacket* pkt)
+{
+    return (pkt->isDram() ?
+        dram->burstReady(pkt) : nvm->burstReadyDCache(pkt));
+}
+
+Tick
+DcacheCtrl::minReadToWriteDataGap()
+{
+    Tick dram_min = dram ?  dram->minReadToWriteDataGap() : MaxTick;
+    Tick nvm_min = nvm ?  nvm->minReadToWriteDataGap() : MaxTick;
+    return std::min(dram_min, nvm_min);
+}
+
+Tick
+DcacheCtrl::minWriteToReadDataGap()
+{
+    Tick dram_min = dram ? dram->minWriteToReadDataGap() : MaxTick;
+    Tick nvm_min = nvm ?  nvm->minWriteToReadDataGap() : MaxTick;
+    return std::min(dram_min, nvm_min);
+}
+
+MemPacketQueue::iterator
+DcacheCtrl::chooseNext(MemPacketQueue& queue, Tick extra_col_delay,
+                        bool is_dram)
+{
+    // This method does the arbitration between requests.
+
+    MemPacketQueue::iterator ret = queue.end();
+
+    if (!queue.empty()) {
+        if (queue.size() == 1) {
+            // available rank corresponds to state refresh idle
+            MemPacket* mem_pkt = *(queue.begin());
+            if (packetReady(mem_pkt)) {
+                ret = queue.begin();
+                DPRINTF(DcacheCtrl, "Single request, going to a free rank\n");
+            } else {
+                DPRINTF(DcacheCtrl, "Single request, going to a busy rank\n");
+            }
+        } else if (memSchedPolicy == Enums::fcfs) {
+            // check if there is a packet going to a free rank
+            for (auto i = queue.begin(); i != queue.end(); ++i) {
+                MemPacket* mem_pkt = *i;
+                if (packetReady(mem_pkt)) {
+                    ret = i;
+                    break;
+                }
+            }
+        } else if (memSchedPolicy == Enums::frfcfs) {
+            ret = chooseNextFRFCFS(queue, extra_col_delay, is_dram);
+        } else {
+            panic("No scheduling policy chosen\n");
+        }
+    }
+    return ret;
+}
+
+MemPacketQueue::iterator
+DcacheCtrl::chooseNextFRFCFS(MemPacketQueue& queue, Tick extra_col_delay,
+                            bool is_dram)
+{
+    auto selected_pkt_it = queue.end();
+    Tick col_allowed_at = MaxTick;
+
+    // time we need to issue a column command to be seamless
+    const Tick min_col_at = std::max(nextBurstAt + extra_col_delay, curTick());
+
+    if (is_dram) {
+        std::tie(selected_pkt_it, col_allowed_at) =
+                 dram->chooseNextFRFCFS(queue, min_col_at);
+    } else {
+        std::tie(selected_pkt_it, col_allowed_at) =
+                 nvm->chooseNextFRFCFSDCache(queue, min_col_at);
+    }
+
+    if (selected_pkt_it == queue.end()) {
+        DPRINTF(DcacheCtrl, "%s no available packets found\n", __func__);
+    }
+
+    return selected_pkt_it;
+}
+
+Addr
+DcacheCtrl::burstAlign(Addr addr, bool is_dram) const
+{
+    if (is_dram)
+        return (addr & ~(Addr(dram->bytesPerBurst() - 1)));
+    else
+        return (addr & ~(Addr(nvm->bytesPerBurst() - 1)));
+}
+
+DcacheCtrl::CtrlStats::CtrlStats(DcacheCtrl &_ctrl)
+    : Stats::Group(&_ctrl),
+    ctrl(_ctrl),
+
+    ADD_STAT(readReqs, "Number of read requests accepted"),
+    ADD_STAT(writeReqs, "Number of write requests accepted"),
+
+    ADD_STAT(readBursts,
+             "Number of controller read bursts, "
+             "including those serviced by the write queue"),
+    ADD_STAT(writeBursts,
+             "Number of controller write bursts, "
+             "including those merged in the write queue"),
+    ADD_STAT(servicedByWrQ,
+             "Number of controller read bursts serviced by the write queue"),
+    ADD_STAT(mergedWrBursts,
+             "Number of controller write bursts merged with an existing one"),
+
+    //ADD_STAT(neitherReadNorWriteReqs,
+    //         "Number of requests that are neither read nor write"),
+
+    ADD_STAT(avgRdQLen, "Average read queue length when enqueuing"),
+    ADD_STAT(avgWrQLen, "Average write queue length when enqueuing"),
+
+    ADD_STAT(numRdRetry, "Number of times read queue was full causing retry"),
+    ADD_STAT(numWrRetry, "Number of times write queue was full causing retry"),
+
+    ADD_STAT(readPktSize, "Read request sizes (log2)"),
+    ADD_STAT(writePktSize, "Write request sizes (log2)"),
+
+    ADD_STAT(rdToWrTurnAround, "Read to write switch"),
+    ADD_STAT(wrToRdTurnAround, "Write to read switch)"),
+
+    //ADD_STAT(rdQLenPdf, "What read queue length does an incoming req see"),
+    //ADD_STAT(wrQLenPdf, "What write queue length does an incoming req see"),
+
+    //ADD_STAT(rdPerTurnAround,
+    //         "Reads before turning the bus around for writes"),
+    //ADD_STAT(wrPerTurnAround,
+    //         "Writes before turning the bus around for reads"),
+
+    ADD_STAT(bytesReadWrQ, "Total number of bytes read from write queue"),
+    ADD_STAT(bytesReadSys, "Total read bytes from the system interface side"),
+    ADD_STAT(bytesWrittenSys,
+             "Total written bytes from the system interface side"),
+
+    ADD_STAT(avgRdBWSys, "Average system read bandwidth in MiByte/s"),
+    ADD_STAT(avgWrBWSys, "Average system write bandwidth in MiByte/s"),
+
+    ADD_STAT(totGap, "Total gap between requests"),
+    ADD_STAT(avgGap, "Average gap between requests"),
+
+    ADD_STAT(requestorReadBytes,
+            "Per-requestor bytes read from memory"),
+    ADD_STAT(requestorWriteBytes,
+            "Per-requestor bytes write to memory"),
+    ADD_STAT(requestorReadRate,
+             "Per-requestor bytes read from memory rate (Bytes/sec)"),
+    ADD_STAT(requestorWriteRate,
+             "Per-requestor bytes write to memory rate (Bytes/sec)"),
+    ADD_STAT(requestorReadAccesses,
+             "Per-requestor read serviced memory accesses"),
+    ADD_STAT(requestorWriteAccesses,
+             "Per-requestor write serviced memory accesses"),
+    ADD_STAT(requestorReadTotalLat,
+             "Per-requestor read total memory access latency"),
+    ADD_STAT(requestorWriteTotalLat,
+             "Per-requestor write total memory access latency"),
+    ADD_STAT(requestorReadAvgLat,
+             "Per-requestor read average memory access latency"),
+    ADD_STAT(requestorWriteAvgLat,
+             "Per-requestor write average memory access latency"),
+
+    ADD_STAT(numHits,
+            "Total number of hits on DRAM cache"),
+    ADD_STAT(numMisses,
+            "Total number of misses on DRAM cache"),
+    ADD_STAT(numRdHits,
+            "Total number of read hits on DRAM cache"),
+    ADD_STAT(numWrHits,
+            "Total number of write hits on DRAM cache"),
+    ADD_STAT(numRdMisses,
+            "Total number of read misses on DRAM cache"),
+    ADD_STAT(numWrMisses,
+            "Total number of write misses on DRAM cache"),
+    ADD_STAT(numColdMisses,
+            "Total number of misses on DRAM cache due to"
+            " first reference to a cache block"),
+    ADD_STAT(numHotMisses,
+            "Total number of misses on DRAM cache that are not cold miss"),
+    ADD_STAT(numWrBacks,
+            "Total number of write backs from DRAM cache to main memory"),
+    ADD_STAT(totNumConf,
+            "Total number of packets conflicted on DRAM cache"),
+    ADD_STAT(totNumConfBufFull,
+            "Total number of packets conflicted and couldn't "
+            "enter confBuffer"),
+
+    ADD_STAT(timeInDramRead,
+             "Total time spent in dram read state in ns"),
+    ADD_STAT(timeInDramWrite,
+            "Total time spent in dram write state in ns"),
+    ADD_STAT(timeInWaitingToIssueNvmRead,
+            "Total time spent in waitingToIssueNvmRead state in ns"),
+    ADD_STAT(timeInNvmRead,
+            "Total time spent in nvmRead state in ns"),
+    ADD_STAT(timeInNvmWrite,
+            "Total time spent in nvmWrite state in ns"),
+
+    ADD_STAT(drRdQingTime,
+            "Total time spent as DRAM read queuing time in ns"),
+    ADD_STAT(drWrQingTime,
+            "Total time spent as DRAM write queuing time in ns"),
+    ADD_STAT(nvmRdQingTime,
+            "Total time spent as NVM read queuing time in ns"),
+    ADD_STAT(nvmWrQingTime,
+            "Total time spent as NVM write queuing time in ns"),
+
+    ADD_STAT(drRdDevTime,
+            "Total time spent as DRAM read device time in Ticks"),
+    ADD_STAT(drWrDevTime,
+            "Total time spent as DRAM write device time in Ticks"),
+    ADD_STAT(nvRdDevTime,
+            "Total time spent as NVM read device time in Ticks"),
+    ADD_STAT(nvWrDevTime,
+            "Total time spent as NVM write device time in Ticks"),
+
+    ADD_STAT(totNumPktsDrRd,
+            "Total number of packets enterted to Dram read state"),
+    ADD_STAT(totNumPktsDrWr,
+            "Total number of packets enterted to Dram write state"),
+    ADD_STAT(totNumPktsNvmRdWait,
+            "Total number of packets enterted to NVM waitToIssue state"),
+    ADD_STAT(totNumPktsNvmRd,
+            "Total number of packets enterted to NVM read state"),
+    ADD_STAT(totNumPktsNvmWr,
+            "Total number of packets enterted to NVM write state"),
+
+    ADD_STAT(maxNumConf,
+            "Maximum number of packets conflicted on DRAM cache"),
+    ADD_STAT(maxDrRdEvQ,
+            "Maximum number of packets in DrRdEvent concurrently"),
+    ADD_STAT(maxDrRdRespEvQ,
+            "Maximum number of packets in DrRdRespEvent concurrently"),
+    ADD_STAT(maxDrWrEvQ,
+            "Maximum number of packets in DrWrEvent concurrently"),
+    ADD_STAT(maxNvRdIssEvQ,
+            "Maximum number of packets in NvRdIssEvent concurrently"),
+    ADD_STAT(maxNvRdEvQ,
+            "Maximum number of packets in NvRdEvent concurrently"),
+    ADD_STAT(maxNvRdRespEvQ,
+            "Maximum number of packets in NvRdRespEvent concurrently"),
+    ADD_STAT(maxNvWrEvQ,
+            "Maximum number of packets in NvWrEvent concurrently")
+
+{
+}
+
+void
+DcacheCtrl::CtrlStats::regStats()
+{
+    using namespace Stats;
+
+    assert(ctrl.system());
+    const auto max_requestors = ctrl.system()->maxRequestors();
+
+    avgRdQLen.precision(2);
+    avgWrQLen.precision(2);
+
+    readPktSize.init(ceilLog2(ctrl.system()->cacheLineSize()) + 1);
+    writePktSize.init(ceilLog2(ctrl.system()->cacheLineSize()) + 1);
+
+    avgRdBWSys.precision(2);
+    avgWrBWSys.precision(2);
+    avgGap.precision(2);
+
+    // per-requestor bytes read and written to memory
+    requestorReadBytes
+        .init(max_requestors)
+        .flags(nozero | nonan);
+
+    requestorWriteBytes
+        .init(max_requestors)
+        .flags(nozero | nonan);
+
+    // per-requestor bytes read and written to memory rate
+    requestorReadRate
+        .flags(nozero | nonan)
+        .precision(12);
+
+    requestorReadAccesses
+        .init(max_requestors)
+        .flags(nozero);
+
+    requestorWriteAccesses
+        .init(max_requestors)
+        .flags(nozero);
+
+    requestorReadTotalLat
+        .init(max_requestors)
+        .flags(nozero | nonan);
+
+    requestorReadAvgLat
+        .flags(nonan)
+        .precision(2);
+
+    requestorWriteRate
+        .flags(nozero | nonan)
+        .precision(12);
+
+    requestorWriteTotalLat
+        .init(max_requestors)
+        .flags(nozero | nonan);
+
+    requestorWriteAvgLat
+        .flags(nonan)
+        .precision(2);
+
+    for (int i = 0; i < max_requestors; i++) {
+        const std::string requestor = ctrl.system()->getRequestorName(i);
+        requestorReadBytes.subname(i, requestor);
+        requestorReadRate.subname(i, requestor);
+        requestorWriteBytes.subname(i, requestor);
+        requestorWriteRate.subname(i, requestor);
+        requestorReadAccesses.subname(i, requestor);
+        requestorWriteAccesses.subname(i, requestor);
+        requestorReadTotalLat.subname(i, requestor);
+        requestorReadAvgLat.subname(i, requestor);
+        requestorWriteTotalLat.subname(i, requestor);
+        requestorWriteAvgLat.subname(i, requestor);
+    }
+
+    // Formula stats
+    avgRdBWSys = (bytesReadSys / 1000000) / simSeconds;
+    avgWrBWSys = (bytesWrittenSys / 1000000) / simSeconds;
+
+    avgGap = totGap / (readReqs + writeReqs);
+
+    requestorReadRate = requestorReadBytes / simSeconds;
+    requestorWriteRate = requestorWriteBytes / simSeconds;
+    requestorReadAvgLat = requestorReadTotalLat / requestorReadAccesses;
+    requestorWriteAvgLat = requestorWriteTotalLat / requestorWriteAccesses;
+}
+
+void
+DcacheCtrl::recvFunctional(PacketPtr pkt)
+{
+    if (dram && dram->getAddrRange().contains(pkt->getAddr())) {
+        // rely on the abstract memory
+        dram->functionalAccess(pkt);
+    } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) {
+        // rely on the abstract memory
+        nvm->functionalAccess(pkt);
+   } else {
+        panic("Can't handle address range for packet %s\n",
+              pkt->print());
+   }
+}
+
+Port &
+DcacheCtrl::getPort(const std::string &if_name, PortID idx)
+{
+    if (if_name != "port") {
+        return QoS::MemCtrl::getPort(if_name, idx);
+    } else {
+        return port;
+    }
+}
+
+bool
+DcacheCtrl::allIntfDrained() const
+{
+   // ensure dram is in power down and refresh IDLE states
+   bool dram_drained = !dram || dram->allRanksDrained();
+   // No outstanding NVM writes
+   // All other queues verified as needed with calling logic
+   bool nvm_drained = !nvm || nvm->allRanksDrained();
+   return (dram_drained && nvm_drained);
+}
+
+DrainState
+DcacheCtrl::drain()
+{
+    // if there is anything in any of our internal queues, keep track
+    // of that as well
+    unsigned rdsNum = pktDramRead[0].size() +
+                      pktNvmReadWaitIssue[0].size() +
+                      pktNvmRead[0].size();
+    unsigned wrsNum = pktNvmWrite[0].size() +
+                      pktDramWrite[0].size();
+    if (!(!wrsNum && !rdsNum &&
+          addrDramRespReady.empty() &&
+          allIntfDrained())) {
+
+        DPRINTF(Drain, "Memory controller not drained, write: %d, read: %d,"
+                " resp: %d\n", wrsNum, rdsNum,
+                addrDramRespReady.size());
+
+        // the only queue that is not drained automatically over time
+        // is the write queue, thus kick things into action if needed
+
+        if (dram)
+            dram->drainRanks();
+
+        return DrainState::Draining;
+    } else {
+        return DrainState::Drained;
+    }
+}
+
+void
+DcacheCtrl::drainResume()
+{
+    if (!isTimingMode && system()->isTimingMode()) {
+        // if we switched to timing mode, kick things into action,
+        // and behave as if we restored from a checkpoint
+        startup();
+        dram->startup();
+    } else if (isTimingMode && !system()->isTimingMode()) {
+        // if we switch from timing mode, stop the refresh events to
+        // not cause issues with KVM
+        if (dram)
+            dram->suspend();
+    }
+
+    // update the mode
+    isTimingMode = system()->isTimingMode();
+}
+
+DcacheCtrl::MemoryPort::MemoryPort(const std::string& name, DcacheCtrl& _ctrl)
+    : QueuedResponsePort(name, &_ctrl, queue), queue(_ctrl, *this, true),
+      ctrl(_ctrl)
+{ }
+
+AddrRangeList
+DcacheCtrl::MemoryPort::getAddrRanges() const
+{
+    AddrRangeList ranges;
+    if (ctrl.dram) {
+        DPRINTF(DRAM, "Pushing DRAM ranges to port\n");
+        ranges.push_back(ctrl.dram->getAddrRange());
+    }
+    if (ctrl.nvm) {
+        DPRINTF(NVM, "Pushing NVM ranges to port\n");
+        ranges.push_back(ctrl.nvm->getAddrRange());
+    }
+    return ranges;
+}
+
+void
+DcacheCtrl::MemoryPort::recvFunctional(PacketPtr pkt)
+{
+    pkt->pushLabel(ctrl.name());
+
+    if (!queue.trySatisfyFunctional(pkt)) {
+        // Default implementation of SimpleTimingPort::recvFunctional()
+        // calls recvAtomic() and throws away the latency; we can save a
+        // little here by just not calculating the latency.
+        ctrl.recvFunctional(pkt);
+    }
+
+    pkt->popLabel();
+}
+
+Tick
+DcacheCtrl::MemoryPort::recvAtomic(PacketPtr pkt)
+{
+    return ctrl.recvAtomic(pkt);
+}
+
+Tick
+DcacheCtrl::MemoryPort::recvAtomicBackdoor(
+        PacketPtr pkt, MemBackdoorPtr &backdoor)
+{
+    return ctrl.recvAtomicBackdoor(pkt, backdoor);
+}
+
+bool
+DcacheCtrl::MemoryPort::recvTimingReq(PacketPtr pkt)
+{
+    // pass it to the memory controller
+    return ctrl.recvTimingReq(pkt);
+}
diff --git a/src/mem/dcache_ctrl.hh b/src/mem/dcache_ctrl.hh
new file mode 100644
index 0000000000..c71c28412e
--- /dev/null
+++ b/src/mem/dcache_ctrl.hh
@@ -0,0 +1,769 @@
+/// The copyright needs be modified for UCD/DArchR/the names of the writers
+
+/*
+ * Copyright (c) 2012-2020 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2013 Amin Farmahini-Farahani
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * DcacheCtrl declaration
+ */
+
+#ifndef __DCACHE_CTRL_HH__
+#define __DCACHE_CTRL_HH__
+
+#include <queue>
+
+#include "mem/mem_ctrl.hh"
+#include "params/DcacheCtrl.hh"
+
+class DRAMInterface;
+class NVMInterface;
+
+class DcacheCtrl : public QoS::MemCtrl
+{
+  private:
+
+   bool stallRds = false;
+   bool drainDramWrite = false;
+   bool drainNvmWrite = false;
+
+
+   unsigned maxConf = 0,
+   maxDrRdEv = 0, maxDrRdRespEv = 0,
+   maxDrWrEv = 0,
+   maxNvRdIssEv = 0, maxNvRdEv = 0, maxNvRdRespEv = 0,
+   maxNvWrEv = 0;
+
+   unsigned numDirtyLinesInDrRdRespQ = 0;
+
+    // For now, make use of a queued response port to avoid dealing with
+    // flow control for the responses being sent back
+    class MemoryPort : public QueuedResponsePort
+    {
+        RespPacketQueue queue;
+        DcacheCtrl& ctrl;
+      public:
+        MemoryPort(const std::string& name, DcacheCtrl& _ctrl);
+      protected:
+        Tick recvAtomic(PacketPtr pkt) override;
+        Tick recvAtomicBackdoor(
+                PacketPtr pkt, MemBackdoorPtr &backdoor) override;
+        void recvFunctional(PacketPtr pkt) override;
+        bool recvTimingReq(PacketPtr) override;
+        AddrRangeList getAddrRanges() const override;
+    };
+
+
+    /**
+     * Our incoming port, for a multi-ported controller add a crossbar
+     * in front of it
+     */
+    MemoryPort port;
+
+    /**
+     * Remember if the memory system is in timing mode
+     */
+    bool isTimingMode;
+
+    /**
+     * Remember if we have to retry a request when available.
+     */
+    bool retry;
+
+    void printORB();
+    void printCRB();
+    void printAddrInitRead();
+    void printAddrDramRespReady();
+    void printNvmWritebackQueue();
+    Addr returnTagDC(Addr pkt_addr, unsigned size);
+    Addr returnIndexDC(Addr pkt_addr, unsigned size);
+
+    template <class Q>
+    void clearQueue(Q & q) {
+        q = Q();
+    }
+
+    /**
+     * Bunch of things requires to setup "events" in gem5
+     * When event "respondEvent" occurs for example, the method
+     * processRespondEvent is called; no parameters are allowed
+     * in these methods
+     */
+    void processNextReqEvent();
+    EventFunctionWrapper nextReqEvent;
+
+    void processRespondEvent();
+    EventFunctionWrapper respondEvent;
+
+    /**
+     * processDramReadEvent() is an event handler which
+     * schedules the initial DRAM read accesses for every
+     * received packet by the DRAM Cache Controller.
+    */
+    void processDramReadEvent();
+    EventFunctionWrapper dramReadEvent;
+
+    /**
+     * processRespDramReadEvent() is an event handler which
+     * handles the responses of the initial DRAM read accesses
+     * for the received packets by the DRAM Cache Controller.
+    */
+    void processRespDramReadEvent();
+    EventFunctionWrapper respDramReadEvent;
+
+    /**
+     * processWaitingToIssueNvmReadEvent() is an event handler which
+     * handles the satte in which the packets that missed in DRAM cache
+     * will wait before being issued, if the NVM read has reached to the
+     * maximum number allowed for pending reads.
+    */
+    void processWaitingToIssueNvmReadEvent();
+    EventFunctionWrapper waitingToIssueNvmReadEvent;
+
+    /**
+     * processNvmReadEvent() is an event handler which
+     * schedules the NVM read accesses in the DRAM Cache Controller.
+    */
+    void processNvmReadEvent();
+    EventFunctionWrapper nvmReadEvent;
+
+    /**
+     * processRespNvmReadEvent() is an event handler which
+     * handles the responses of the NVM read accesses in
+     * the DRAM Cache Controller.
+    */
+    void processRespNvmReadEvent();
+    EventFunctionWrapper respNvmReadEvent;
+
+    /**
+     * processOverallWriteEvent() is an event handler which
+     * handles all write accesses to DRAM and NVM.
+    */
+
+    void processOverallWriteEvent();
+    EventFunctionWrapper overallWriteEvent;
+
+    /**
+     * Actually do the burst based on media specific access function.
+     * Update bus statistics when complete.
+     *
+     * @param mem_pkt The memory packet created from the outside world pkt
+     * returns cmd_at tick
+     */
+    Tick doBurstAccess(MemPacket* mem_pkt);
+
+    /**
+     * When a packet reaches its "readyTime" in the response Q,
+     * use the "access()" method in AbstractMemory to actually
+     * create the response packet, and send it back to the outside
+     * world requestor.
+     *
+     * @param pkt The packet from the outside world
+     * @param static_latency Static latency to add before sending the packet
+     */
+    void accessAndRespond(PacketPtr pkt, Tick static_latency, bool in_dram);
+
+    /**
+     * Determine if there is a packet that can issue.
+     *
+     * @param pkt The packet to evaluate
+     */
+    bool packetReady(MemPacket* pkt);
+
+    /**
+     * Calculate the minimum delay used when scheduling a read-to-write
+     * transision.
+     * @param return minimum delay
+     */
+    Tick minReadToWriteDataGap();
+
+    /**
+     * Calculate the minimum delay used when scheduling a write-to-read
+     * transision.
+     * @param return minimum delay
+     */
+    Tick minWriteToReadDataGap();
+
+
+    /**
+     * The memory schduler/arbiter - picks which request needs to
+     * go next, based on the specified policy such as FCFS or FR-FCFS
+     * and moves it to the head of the queue.
+     * Prioritizes accesses to the same rank as previous burst unless
+     * controller is switching command type.
+     *
+     * @param queue Queued requests to consider
+     * @param extra_col_delay Any extra delay due to a read/write switch
+     * @return an iterator to the selected packet, else queue.end()
+     */
+    MemPacketQueue::iterator chooseNext(MemPacketQueue& queue,
+        Tick extra_col_delay, bool is_dram);
+
+    /**
+     * For FR-FCFS policy reorder the read/write queue depending on row buffer
+     * hits and earliest bursts available in memory
+     *
+     * @param queue Queued requests to consider
+     * @param extra_col_delay Any extra delay due to a read/write switch
+     * @return an iterator to the selected packet, else queue.end()
+     */
+    MemPacketQueue::iterator chooseNextFRFCFS(MemPacketQueue& queue,
+            Tick extra_col_delay, bool is_dram);
+
+    /**
+     * Calculate burst window aligned tick
+     *
+     * @param cmd_tick Initial tick of command
+     * @return burst window aligned tick
+     */
+    Tick getBurstWindow(Tick cmd_tick);
+
+    /**
+     * Burst-align an address.
+     *
+     * @param addr The potentially unaligned address
+     * @param is_dram Does this packet access DRAM?
+     *
+     * @return An address aligned to a memory burst
+     */
+    Addr burstAlign(Addr addr, bool is_dram) const;
+
+    /**
+     * To avoid iterating over the outstanding requests buffer
+     *  to check for overlapping transactions, maintain a set
+     * of burst addresses that are currently queued.
+     * Since we merge writes to the same location we never
+     * have more than one address to the same burst address.
+     */
+    std::unordered_set<Addr> isInWriteQueue;
+
+    struct tagMetaStoreEntry {
+      // DRAM cache related metadata
+      Addr tagDC;
+      Addr indexDC;
+      // constant to indicate that the cache line is valid
+      bool validLine = false;
+      // constant to indicate that the cache line is dirty
+      bool dirtyLine = false;
+      Addr nvmAddr;
+    };
+
+    /** A storage to keep the tag and metadata for the
+     * DRAM Cache entries.
+     */
+    std::vector<tagMetaStoreEntry> tagMetadataStore;
+
+    /** Different states a packet can transition from one
+     * to the other while it's process in the DRAM Cache
+     * Controller.
+     */
+    enum reqState { dramRead, dramWrite,
+                    waitingToIssueNvmRead, nvmRead, nvmWrite};
+
+    /**
+     * A class for the entries of the
+     * outstanding request buffer.
+     */
+    class reqBufferEntry {
+      public:
+      bool validEntry;
+      Tick arrivalTick;
+
+      // DRAM cache related metadata
+      Addr tagDC;
+      Addr indexDC;
+
+      // pointer to the outside world (ow) packet received from llc
+      const PacketPtr owPkt;
+      // pointer to the dram cache controller (dcc) packet
+      MemPacket* dccPkt;
+
+      reqState state;
+      bool isHit;
+      bool conflict;
+
+      Addr dirtyLineAddr;
+      bool handleDirtyLine;
+
+      Tick drRd;
+      Tick drWr;
+      Tick nvWait;
+      Tick nvRd;
+      Tick nvWr;
+
+      Tick nvmIssueReadyTime;
+
+      // Tick dramRdCmdAt;
+      // Tick dramWrCmdAt;
+      // Tick nvmRdCmdAt;
+      // Tick nvmWrCmdAt;
+
+      Tick dramRdDevTime;
+      Tick dramWrDevTime;
+      Tick nvmRdDevTime;
+      //Tick nvmWrDevTime;
+
+      reqBufferEntry(
+        bool _validEntry, Tick _arrivalTick,
+        Addr _tagDC, Addr _indexDC,
+        PacketPtr _owPkt, MemPacket* _dccPkt,
+        reqState _state, bool _isHit, bool _conflict,
+        Addr _dirtyLineAddr, bool _handleDirtyLine,
+        Tick _drRd, Tick _drWr, Tick _nvWait, Tick _nvRd, Tick _nvWr,
+        Tick _nvmIssueReadyTime,
+        Tick _dramRdDevTime, Tick _dramWrDevTime, Tick _nvmRdDevTime)
+      :
+      validEntry(_validEntry), arrivalTick(_arrivalTick),
+      tagDC(_tagDC), indexDC(_indexDC),
+      owPkt( _owPkt), dccPkt(_dccPkt),
+      state(_state), isHit(_isHit), conflict(_conflict),
+      dirtyLineAddr(_dirtyLineAddr), handleDirtyLine(_handleDirtyLine),
+      drRd(_drRd), drWr(_drWr),
+      nvWait(_nvWait), nvRd(_nvRd), nvWr(_nvWr),
+      nvmIssueReadyTime(_nvmIssueReadyTime),
+      dramRdDevTime(_dramRdDevTime), dramWrDevTime(_dramWrDevTime),
+      nvmRdDevTime( _nvmRdDevTime)
+      { }
+    };
+
+    /**
+     * This is the outstanding request buffer data
+     * structure, the main DS within the DRAM Cache
+     * Controller. The key is the address, for each key
+     * the map returns a reqBufferEntry which maintains
+     * the entire info related to that address while it's
+     * been processed in the DRAM Cache controller.
+     */
+    std::map<Addr,reqBufferEntry*> reqBuffer;
+
+
+    typedef std::pair<Tick, PacketPtr> confReqBufferPair;
+    /**
+     * This is the second important data structure
+     * within the Dram Cache controller which hold
+     * received packets that had conflict with some
+     * other address(s) in the DRAM Cache that they
+     * are still under process in the controller.
+     * Once thoes addresses are finished processing,
+     * confReqBufferPair is consulted to see if any
+     * packet can be moved into outstanding request
+     * buffer and start processing in the DRAM Cache
+     * controller.
+     */
+    std::vector<confReqBufferPair> confReqBuffer;
+
+    /**
+     * To avoid iterating over the outstanding requests
+     * buffer for dramReadEvent handler, we maintain the
+     * required addresses in a fifo queue.
+     */
+    std::deque <Addr> addrInitRead;
+    // std::vector <MemPacket*> pktInitRead;
+    // MemPacketQueue pktInitRead;
+    std::vector<MemPacketQueue> pktDramRead;
+
+    /**
+     * To avoid iterating over the outstanding requests
+     * buffer for respDramReadEvent handler, we maintain the
+     * required addresses in a fifo queue.
+     */
+    std::deque <Addr> addrDramRespReady;
+
+    //priority queue ordered by earliest tick
+    typedef std::pair<Tick, Addr> addrNvmReadPair;
+
+    /**
+     * To maintain the packets missed in DRAM cache and
+     * now require to read NVM, this queue holds them in order,
+     * incase they can't be issued due to reaching to the maximum
+     * pending number of reads for NVM.
+     */
+    std::priority_queue<addrNvmReadPair, std::vector<addrNvmReadPair>,
+            std::greater<addrNvmReadPair> > addrWaitingToIssueNvmRead;
+    std::vector<MemPacketQueue> pktNvmReadWaitIssue;
+
+    /**
+     * To avoid iterating over the outstanding requests
+     * buffer for nvmReadEvent handler, we maintain the
+     * required addresses in a priority queue.
+     */
+    std::priority_queue<addrNvmReadPair, std::vector<addrNvmReadPair>,
+            std::greater<addrNvmReadPair> > addrNvmRead;
+
+    std::vector<MemPacketQueue> pktNvmRead;
+
+    /**
+     * To avoid iterating over the outstanding requests
+     * buffer for respNvmReadEvent handler, we maintain the
+     * required addresses in a fifo queue.
+     */
+    std::deque <Addr> addrNvmRespReady;
+
+    /**
+     * To avoid iterating over the outstanding requests
+     * buffer for dramWriteEvent handler, we maintain the
+     * required addresses in a fifo queue.
+     */
+    std::deque <Addr> addrDramFill;
+    std::vector<MemPacketQueue> pktDramWrite;
+
+    /**
+     * To avoid iterating over the outstanding requests
+     * buffer for nvmWriteEvent handler, we maintain the
+     * required addresses in a fifo queue.
+     */
+    typedef std::pair<Tick, MemPacket*> nvmWritePair;
+    std::priority_queue<nvmWritePair, std::vector<nvmWritePair>,
+                        std::greater<nvmWritePair> > nvmWritebackQueue;
+    std::vector<MemPacketQueue> pktNvmWrite;
+
+
+    void handleRequestorPkt(PacketPtr pkt);
+    void checkHitOrMiss(reqBufferEntry* orbEntry);
+    bool checkDirty(Addr addr);
+    void handleDirtyCacheLine(reqBufferEntry* orbEntry);
+    bool checkConflictInDramCache(PacketPtr pkt);
+    void checkConflictInCRB(reqBufferEntry* orbEntry);
+    bool resumeConflictingReq(reqBufferEntry* orbEntry);
+    void logStatsDcache(reqBufferEntry* orbEntry);
+    Tick earliestDirtyLineInDrRdResp();
+
+    /**
+     * Holds count of commands issued in burst window starting at
+     * defined Tick. This is used to ensure that the command bandwidth
+     * does not exceed the allowable media constraints.
+     */
+    std::unordered_multiset<Tick> burstTicks;
+
+    /**
+     * Create pointer to interface of the actual dram media when connected
+     */
+    DRAMInterface* const dram;
+
+    /**
+     * Create pointer to interface of the actual nvm media when connected
+     */
+
+    NVMInterface* const nvm;
+
+    /**
+     * The following are basic design parameters of the memory
+     * controller, and are initialized based on parameter values.
+     * The rowsPerBank is determined based on the capacity, number of
+     * ranks and banks, the burst size, and the row buffer size.
+     */
+    unsigned long long dramCacheSize;
+    unsigned blockSize;
+    unsigned addrSize;
+    unsigned orbMaxSize;
+    unsigned orbSize;
+    unsigned crbMaxSize;
+    unsigned crbSize;
+
+    unsigned writeHighThreshold;
+    unsigned writeLowThreshold;
+    unsigned minWritesPerSwitch;
+    float dramWrDrainPerc;
+    unsigned minDrWrPerSwitch;
+    unsigned minNvWrPerSwitch;
+    unsigned drWrCounter;
+    unsigned nvWrCounter;
+
+    /**
+     * Memory controller configuration initialized based on parameter
+     * values.
+     */
+    Enums::MemSched memSchedPolicy;
+
+    /**
+     * Pipeline latency of the controller frontend. The frontend
+     * contribution is added to writes (that complete when they are in
+     * the write buffer) and reads that are serviced the write buffer.
+     */
+    const Tick frontendLatency;
+
+    /**
+     * Pipeline latency of the backend and PHY. Along with the
+     * frontend contribution, this latency is added to reads serviced
+     * by the memory.
+     */
+    const Tick backendLatency;
+
+    /**
+     * Length of a command window, used to check
+     * command bandwidth
+     */
+    const Tick commandWindow;
+
+    /**
+     * Till when must we wait before issuing next RD/WR burst?
+     */
+    Tick nextBurstAt;
+
+    Tick prevArrival;
+
+    /**
+     * The soonest you have to start thinking about the next request
+     * is the longest access time that can occur before
+     * nextBurstAt. Assuming you need to precharge, open a new row,
+     * and access, it is tRP + tRCD + tCL.
+     */
+    Tick nextReqTime;
+
+    struct CtrlStats : public Stats::Group
+    {
+        CtrlStats(DcacheCtrl &ctrl);
+
+        void regStats() override;
+
+        DcacheCtrl &ctrl;
+
+        // All statistics that the model needs to capture
+        Stats::Scalar readReqs;
+        Stats::Scalar writeReqs;
+        Stats::Scalar readBursts;
+        Stats::Scalar writeBursts;
+        Stats::Scalar servicedByWrQ;
+        Stats::Scalar mergedWrBursts;
+        //Stats::Scalar neitherReadNorWriteReqs;
+        // Average queue lengths
+        Stats::Average avgRdQLen;
+        Stats::Average avgWrQLen;
+
+        Stats::Scalar numRdRetry;
+        Stats::Scalar numWrRetry;
+        Stats::Vector readPktSize;
+        Stats::Vector writePktSize;
+        //Stats::Vector rdQLenPdf;
+        //Stats::Vector wrQLenPdf;
+        //Stats::Histogram rdPerTurnAround;
+        //Stats::Histogram wrPerTurnAround;
+        Stats::Scalar rdToWrTurnAround;
+        Stats::Scalar wrToRdTurnAround;
+
+        Stats::Scalar bytesReadWrQ;
+        Stats::Scalar bytesReadSys;
+        Stats::Scalar bytesWrittenSys;
+        // Average bandwidth
+        Stats::Formula avgRdBWSys;
+        Stats::Formula avgWrBWSys;
+
+        Stats::Scalar totGap;
+        Stats::Formula avgGap;
+
+        // per-requestor bytes read and written to memory
+        Stats::Vector requestorReadBytes;
+        Stats::Vector requestorWriteBytes;
+
+        // per-requestor bytes read and written to memory rate
+        Stats::Formula requestorReadRate;
+        Stats::Formula requestorWriteRate;
+
+        // per-requestor read and write serviced memory accesses
+        Stats::Vector requestorReadAccesses;
+        Stats::Vector requestorWriteAccesses;
+
+        // per-requestor read and write total memory access latency
+        Stats::Vector requestorReadTotalLat;
+        Stats::Vector requestorWriteTotalLat;
+
+        // per-requestor raed and write average memory access latency
+        Stats::Formula requestorReadAvgLat;
+        Stats::Formula requestorWriteAvgLat;
+
+        Stats::Scalar numHits;
+        Stats::Scalar numMisses;
+        Stats::Scalar numRdHits;
+        Stats::Scalar numWrHits;
+        Stats::Scalar numRdMisses;
+        Stats::Scalar numWrMisses;
+        Stats::Scalar numColdMisses;
+        Stats::Scalar numHotMisses;
+        Stats::Scalar numWrBacks;
+        Stats::Scalar totNumConf;
+        Stats::Scalar totNumConfBufFull;
+
+        Stats::Scalar timeInDramRead;
+        Stats::Scalar timeInDramWrite;
+        Stats::Scalar timeInWaitingToIssueNvmRead;
+        Stats::Scalar timeInNvmRead;
+        Stats::Scalar timeInNvmWrite;
+
+        Stats::Scalar drRdQingTime;
+        Stats::Scalar drWrQingTime;
+        Stats::Scalar nvmRdQingTime;
+        Stats::Scalar nvmWrQingTime;
+
+        Stats::Scalar drRdDevTime;
+        Stats::Scalar drWrDevTime;
+        Stats::Scalar nvRdDevTime;
+        Stats::Scalar nvWrDevTime;
+
+        Stats::Scalar totNumPktsDrRd;
+        Stats::Scalar totNumPktsDrWr;
+        Stats::Scalar totNumPktsNvmRdWait;
+        Stats::Scalar totNumPktsNvmRd;
+        Stats::Scalar totNumPktsNvmWr;
+
+        Stats::Scalar maxNumConf;
+        Stats::Scalar maxDrRdEvQ;
+        Stats::Scalar maxDrRdRespEvQ;
+        Stats::Scalar maxDrWrEvQ;
+        Stats::Scalar maxNvRdIssEvQ;
+        Stats::Scalar maxNvRdEvQ;
+        Stats::Scalar maxNvRdRespEvQ;
+        Stats::Scalar maxNvWrEvQ;
+      };
+
+    CtrlStats stats;
+
+    /**
+     * Upstream caches need this packet until true is returned, so
+     * hold it for deletion until a subsequent call
+     */
+    std::unique_ptr<Packet> pendingDelete;
+
+    /**
+     * Remove commands that have already issued from burstTicks
+     */
+    void pruneBurstTick();
+
+  public:
+
+    DcacheCtrl(const DcacheCtrlParams &p);
+
+
+    /**
+     * Ensure that all interfaced have drained commands
+     *
+     * @return bool flag, set once drain complete
+     */
+    bool allIntfDrained() const;
+
+    DrainState drain() override;
+
+    /**
+     * Check for command bus contention for single cycle command.
+     * If there is contention, shift command to next burst.
+     * Check verifies that the commands issued per burst is less
+     * than a defined max number, maxCommandsPerWindow.
+     * Therefore, contention per cycle is not verified and instead
+     * is done based on a burst window.
+     *
+     * @param cmd_tick Initial tick of command, to be verified
+     * @param max_cmds_per_burst Number of commands that can issue
+     *                           in a burst window
+     * @return tick for command issue without contention
+     */
+    Tick verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst);
+
+    /**
+     * Check for command bus contention for multi-cycle (2 currently)
+     * command. If there is contention, shift command(s) to next burst.
+     * Check verifies that the commands issued per burst is less
+     * than a defined max number, maxCommandsPerWindow.
+     * Therefore, contention per cycle is not verified and instead
+     * is done based on a burst window.
+     *
+     * @param cmd_tick Initial tick of command, to be verified
+     * @param max_multi_cmd_split Maximum delay between commands
+     * @param max_cmds_per_burst Number of commands that can issue
+     *                           in a burst window
+     * @return tick for command issue without contention
+     */
+    Tick verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst,
+                        Tick max_multi_cmd_split = 0);
+
+    /**
+     * Is there a respondEvent scheduled?
+     *
+     * @return true if event is scheduled
+     */
+    bool respondEventScheduled() const { return respondEvent.scheduled(); }
+
+    /**
+     * Is there a read/write burst Event scheduled?
+     *
+     * @return true if event is scheduled
+     */
+    bool requestEventScheduled() const { return nextReqEvent.scheduled(); }
+
+    /**
+     * restart the controller
+     * This can be used by interfaces to restart the
+     * scheduler after maintainence commands complete
+     *
+     * @param Tick to schedule next event
+     */
+    void restartScheduler(Tick tick) { schedule(nextReqEvent, tick); }
+
+    /**
+     * Check the current direction of the memory channel
+     *
+     * @param next_state Check either the current or next bus state
+     * @return True when bus is currently in a read state
+     */
+    bool inReadBusState(bool next_state) const;
+
+    /**
+     * Check the current direction of the memory channel
+     *
+     * @param next_state Check either the current or next bus state
+     * @return True when bus is currently in a write state
+     */
+    bool inWriteBusState(bool next_state) const;
+
+    Port &getPort(const std::string &if_name,
+                  PortID idx=InvalidPortID) override;
+
+    virtual void init() override;
+    virtual void startup() override;
+    virtual void drainResume() override;
+
+  protected:
+
+    Tick recvAtomic(PacketPtr pkt);
+    Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor);
+    void recvFunctional(PacketPtr pkt);
+    bool recvTimingReq(PacketPtr pkt);
+
+};
+
+#endif //__DCACHE_CTRL_HH__
diff --git a/src/mem/mem_ctrl.hh b/src/mem/mem_ctrl.hh
index dd13e3ce09..cf399cb3ec 100644
--- a/src/mem/mem_ctrl.hh
+++ b/src/mem/mem_ctrl.hh
@@ -63,6 +63,7 @@
 class DRAMInterface;
 class NVMInterface;
 
+
 /**
  * A burst helper helps organize and manage a packet that is larger than
  * the memory burst size. A system packet that is larger than the burst size
@@ -71,17 +72,17 @@ class NVMInterface;
  */
 class BurstHelper
 {
-  public:
-
-    /** Number of bursts requred for a system packet **/
-    const unsigned int burstCount;
+public:
+  /** Number of bursts requred for a system packet **/
+  const unsigned int burstCount;
 
-    /** Number of bursts serviced so far for a system packet **/
-    unsigned int burstsServiced;
+  /** Number of bursts serviced so far for a system packet **/
+  unsigned int burstsServiced;
 
-    BurstHelper(unsigned int _burstCount)
-        : burstCount(_burstCount), burstsServiced(0)
-    { }
+  BurstHelper(unsigned int _burstCount)
+      : burstCount(_burstCount), burstsServiced(0)
+  {
+  }
 };
 
 /**
@@ -90,125 +91,123 @@ class BurstHelper
  */
 class MemPacket
 {
-  public:
-
-    /** When did request enter the controller */
-    const Tick entryTime;
-
-    /** When will request leave the controller */
-    Tick readyTime;
-
-    /** This comes from the outside world */
-    const PacketPtr pkt;
-
-    /** RequestorID associated with the packet */
-    const RequestorID _requestorId;
-
-    const bool read;
-
-    /** Does this packet access DRAM?*/
-    const bool dram;
-
-    /** Will be populated by address decoder */
-    const uint8_t rank;
-    const uint8_t bank;
-    const uint32_t row;
-
-    /**
-     * Bank id is calculated considering banks in all the ranks
-     * eg: 2 ranks each with 8 banks, then bankId = 0 --> rank0, bank0 and
-     * bankId = 8 --> rank1, bank0
-     */
-    const uint16_t bankId;
-
-    /**
-     * The starting address of the packet.
-     * This address could be unaligned to burst size boundaries. The
-     * reason is to keep the address offset so we can accurately check
-     * incoming read packets with packets in the write queue.
-     */
-    Addr addr;
-
-    /**
-     * The size of this dram packet in bytes
-     * It is always equal or smaller than the burst size
-     */
-    unsigned int size;
-
-    /**
-     * A pointer to the BurstHelper if this MemPacket is a split packet
-     * If not a split packet (common case), this is set to NULL
-     */
-    BurstHelper* burstHelper;
-
-    /**
-     * QoS value of the encapsulated packet read at queuing time
-     */
-    uint8_t _qosValue;
-
-    /**
-     * Set the packet QoS value
-     * (interface compatibility with Packet)
-     */
-    inline void qosValue(const uint8_t qv) { _qosValue = qv; }
-
-    /**
-     * Get the packet QoS value
-     * (interface compatibility with Packet)
-     */
-    inline uint8_t qosValue() const { return _qosValue; }
-
-    /**
-     * Get the packet RequestorID
-     * (interface compatibility with Packet)
-     */
-    inline RequestorID requestorId() const { return _requestorId; }
-
-    /**
-     * Get the packet size
-     * (interface compatibility with Packet)
-     */
-    inline unsigned int getSize() const { return size; }
-
-    /**
-     * Get the packet address
-     * (interface compatibility with Packet)
-     */
-    inline Addr getAddr() const { return addr; }
-
-    /**
-     * Return true if its a read packet
-     * (interface compatibility with Packet)
-     */
-    inline bool isRead() const { return read; }
-
-    /**
-     * Return true if its a write packet
-     * (interface compatibility with Packet)
-     */
-    inline bool isWrite() const { return !read; }
-
-    /**
-     * Return true if its a DRAM access
-     */
-    inline bool isDram() const { return dram; }
-
-    MemPacket(PacketPtr _pkt, bool is_read, bool is_dram, uint8_t _rank,
-               uint8_t _bank, uint32_t _row, uint16_t bank_id, Addr _addr,
-               unsigned int _size)
-        : entryTime(curTick()), readyTime(curTick()), pkt(_pkt),
-          _requestorId(pkt->requestorId()),
-          read(is_read), dram(is_dram), rank(_rank), bank(_bank), row(_row),
-          bankId(bank_id), addr(_addr), size(_size), burstHelper(NULL),
-          _qosValue(_pkt->qosValue())
-    { }
-
+public:
+  /** When did request enter the controller */
+  Tick entryTime;
+
+  /** When will request leave the controller */
+  Tick readyTime;
+
+  /** This comes from the outside world */
+  const PacketPtr pkt;
+
+  /** RequestorID associated with the packet */
+  const RequestorID _requestorId;
+
+  const bool read;
+
+  /** Does this packet access DRAM?*/
+  const bool dram;
+
+  /** Will be populated by address decoder */
+  const uint8_t rank;
+  const uint8_t bank;
+  const uint32_t row;
+
+  /**
+   * Bank id is calculated considering banks in all the ranks
+   * eg: 2 ranks each with 8 banks, then bankId = 0 --> rank0, bank0 and
+   * bankId = 8 --> rank1, bank0
+   */
+  const uint16_t bankId;
+
+  /**
+   * The starting address of the packet.
+   * This address could be unaligned to burst size boundaries. The
+   * reason is to keep the address offset so we can accurately check
+   * incoming read packets with packets in the write queue.
+   */
+  Addr addr;
+
+  /**
+   * The size of this dram packet in bytes
+   * It is always equal or smaller than the burst size
+   */
+  unsigned int size;
+
+  /**
+   * A pointer to the BurstHelper if this MemPacket is a split packet
+   * If not a split packet (common case), this is set to NULL
+   */
+  BurstHelper *burstHelper;
+
+  /**
+   * QoS value of the encapsulated packet read at queuing time
+   */
+  uint8_t _qosValue;
+
+  /**
+   * Set the packet QoS value
+   * (interface compatibility with Packet)
+   */
+  inline void qosValue(const uint8_t qv) { _qosValue = qv; }
+
+  /**
+   * Get the packet QoS value
+   * (interface compatibility with Packet)
+   */
+  inline uint8_t qosValue() const { return _qosValue; }
+
+  /**
+   * Get the packet RequestorID
+   * (interface compatibility with Packet)
+   */
+  inline RequestorID requestorId() const { return _requestorId; }
+
+  /**
+   * Get the packet size
+   * (interface compatibility with Packet)
+   */
+  inline unsigned int getSize() const { return size; }
+
+  /**
+   * Get the packet address
+   * (interface compatibility with Packet)
+   */
+  inline Addr getAddr() const { return addr; }
+
+  /**
+   * Return true if its a read packet
+   * (interface compatibility with Packet)
+   */
+  inline bool isRead() const { return read; }
+
+  /**
+   * Return true if its a write packet
+   * (interface compatibility with Packet)
+   */
+  inline bool isWrite() const { return !read; }
+
+  /**
+   * Return true if its a DRAM access
+   */
+  inline bool isDram() const { return dram; }
+
+  MemPacket(PacketPtr _pkt, bool is_read, bool is_dram, uint8_t _rank,
+            uint8_t _bank, uint32_t _row, uint16_t bank_id, Addr _addr,
+            unsigned int _size)
+      : entryTime(curTick()), readyTime(curTick()), pkt(_pkt),
+        _requestorId((_pkt != nullptr) ? _pkt->requestorId() : -1),
+        read(is_read), dram(is_dram), rank(_rank), bank(_bank), row(_row),
+        bankId(bank_id), addr(_addr), size(_size), burstHelper(NULL),
+        _qosValue((_pkt != nullptr) ? _pkt->qosValue() : -1)
+  {
+  }
 };
 
 // The memory packets are store in a multiple dequeue structure,
 // based on their QoS priority
-typedef std::deque<MemPacket*> MemPacketQueue;
-
+typedef std::deque<MemPacket *> MemPacketQueue;
 
 /**
  * The memory controller is a single-channel memory controller capturing
@@ -233,480 +232,471 @@ typedef std::deque<MemPacket*> MemPacketQueue;
  * please cite the paper.
  *
  */
+
 class MemCtrl : public QoS::MemCtrl
 {
-  private:
-
-    // For now, make use of a queued response port to avoid dealing with
-    // flow control for the responses being sent back
-    class MemoryPort : public QueuedResponsePort
+protected:
+  // For now, make use of a queued response port to avoid dealing with
+  // flow control for the responses being sent back
+  class MemoryPort : public QueuedResponsePort
     {
-
         RespPacketQueue queue;
         MemCtrl& ctrl;
-
       public:
-
         MemoryPort(const std::string& name, MemCtrl& _ctrl);
-
       protected:
-
         Tick recvAtomic(PacketPtr pkt) override;
         Tick recvAtomicBackdoor(
                 PacketPtr pkt, MemBackdoorPtr &backdoor) override;
-
         void recvFunctional(PacketPtr pkt) override;
-
         bool recvTimingReq(PacketPtr) override;
-
         AddrRangeList getAddrRanges() const override;
-
-    };
-
-    /**
-     * Our incoming port, for a multi-ported controller add a crossbar
-     * in front of it
-     */
-    MemoryPort port;
-
-    /**
-     * Remember if the memory system is in timing mode
-     */
-    bool isTimingMode;
-
-    /**
-     * Remember if we have to retry a request when available.
-     */
-    bool retryRdReq;
-    bool retryWrReq;
-
-    /**
-     * Bunch of things requires to setup "events" in gem5
-     * When event "respondEvent" occurs for example, the method
-     * processRespondEvent is called; no parameters are allowed
-     * in these methods
-     */
-    void processNextReqEvent();
-    EventFunctionWrapper nextReqEvent;
-
-    void processRespondEvent();
-    EventFunctionWrapper respondEvent;
-
-    /**
-     * Check if the read queue has room for more entries
-     *
-     * @param pkt_count The number of entries needed in the read queue
-     * @return true if read queue is full, false otherwise
-     */
-    bool readQueueFull(unsigned int pkt_count) const;
-
-    /**
-     * Check if the write queue has room for more entries
-     *
-     * @param pkt_count The number of entries needed in the write queue
-     * @return true if write queue is full, false otherwise
-     */
-    bool writeQueueFull(unsigned int pkt_count) const;
-
-    /**
-     * When a new read comes in, first check if the write q has a
-     * pending request to the same address.\ If not, decode the
-     * address to populate rank/bank/row, create one or mutliple
-     * "mem_pkt", and push them to the back of the read queue.\
-     * If this is the only
-     * read request in the system, schedule an event to start
-     * servicing it.
-     *
-     * @param pkt The request packet from the outside world
-     * @param pkt_count The number of memory bursts the pkt
-     * @param is_dram Does this packet access DRAM?
-     * translate to. If pkt size is larger then one full burst,
-     * then pkt_count is greater than one.
-     */
-    void addToReadQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram);
-
-    /**
-     * Decode the incoming pkt, create a mem_pkt and push to the
-     * back of the write queue. \If the write q length is more than
-     * the threshold specified by the user, ie the queue is beginning
-     * to get full, stop reads, and start draining writes.
-     *
-     * @param pkt The request packet from the outside world
-     * @param pkt_count The number of memory bursts the pkt
-     * @param is_dram Does this packet access DRAM?
-     * translate to. If pkt size is larger then one full burst,
-     * then pkt_count is greater than one.
-     */
-    void addToWriteQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram);
-
-    /**
-     * Actually do the burst based on media specific access function.
-     * Update bus statistics when complete.
-     *
-     * @param mem_pkt The memory packet created from the outside world pkt
-     */
-    void doBurstAccess(MemPacket* mem_pkt);
-
-    /**
-     * When a packet reaches its "readyTime" in the response Q,
-     * use the "access()" method in AbstractMemory to actually
-     * create the response packet, and send it back to the outside
-     * world requestor.
-     *
-     * @param pkt The packet from the outside world
-     * @param static_latency Static latency to add before sending the packet
-     */
-    void accessAndRespond(PacketPtr pkt, Tick static_latency);
-
-    /**
-     * Determine if there is a packet that can issue.
-     *
-     * @param pkt The packet to evaluate
-     */
-    bool packetReady(MemPacket* pkt);
-
-    /**
-     * Calculate the minimum delay used when scheduling a read-to-write
-     * transision.
-     * @param return minimum delay
-     */
-    Tick minReadToWriteDataGap();
-
-    /**
-     * Calculate the minimum delay used when scheduling a write-to-read
-     * transision.
-     * @param return minimum delay
-     */
-    Tick minWriteToReadDataGap();
-
-    /**
-     * The memory schduler/arbiter - picks which request needs to
-     * go next, based on the specified policy such as FCFS or FR-FCFS
-     * and moves it to the head of the queue.
-     * Prioritizes accesses to the same rank as previous burst unless
-     * controller is switching command type.
-     *
-     * @param queue Queued requests to consider
-     * @param extra_col_delay Any extra delay due to a read/write switch
-     * @return an iterator to the selected packet, else queue.end()
-     */
-    MemPacketQueue::iterator chooseNext(MemPacketQueue& queue,
-        Tick extra_col_delay);
-
-    /**
-     * For FR-FCFS policy reorder the read/write queue depending on row buffer
-     * hits and earliest bursts available in memory
-     *
-     * @param queue Queued requests to consider
-     * @param extra_col_delay Any extra delay due to a read/write switch
-     * @return an iterator to the selected packet, else queue.end()
-     */
-    MemPacketQueue::iterator chooseNextFRFCFS(MemPacketQueue& queue,
-            Tick extra_col_delay);
-
-    /**
-     * Calculate burst window aligned tick
-     *
-     * @param cmd_tick Initial tick of command
-     * @return burst window aligned tick
-     */
-    Tick getBurstWindow(Tick cmd_tick);
-
-    /**
-     * Used for debugging to observe the contents of the queues.
-     */
-    void printQs() const;
-
-    /**
-     * Burst-align an address.
-     *
-     * @param addr The potentially unaligned address
-     * @param is_dram Does this packet access DRAM?
-     *
-     * @return An address aligned to a memory burst
-     */
-    Addr burstAlign(Addr addr, bool is_dram) const;
-
-    /**
-     * The controller's main read and write queues,
-     * with support for QoS reordering
-     */
-    std::vector<MemPacketQueue> readQueue;
-    std::vector<MemPacketQueue> writeQueue;
-
-    /**
-     * To avoid iterating over the write queue to check for
-     * overlapping transactions, maintain a set of burst addresses
-     * that are currently queued. Since we merge writes to the same
-     * location we never have more than one address to the same burst
-     * address.
-     */
-    std::unordered_set<Addr> isInWriteQueue;
-
-    /**
-     * Response queue where read packets wait after we're done working
-     * with them, but it's not time to send the response yet. The
-     * responses are stored separately mostly to keep the code clean
-     * and help with events scheduling. For all logical purposes such
-     * as sizing the read queue, this and the main read queue need to
-     * be added together.
-     */
-    std::deque<MemPacket*> respQueue;
-
-    /**
-     * Holds count of commands issued in burst window starting at
-     * defined Tick. This is used to ensure that the command bandwidth
-     * does not exceed the allowable media constraints.
-     */
-    std::unordered_multiset<Tick> burstTicks;
-
-    /**
-     * Create pointer to interface of the actual dram media when connected
-     */
-    DRAMInterface* const dram;
-
-    /**
-     * Create pointer to interface of the actual nvm media when connected
-     */
-    NVMInterface* const nvm;
-
-    /**
-     * The following are basic design parameters of the memory
-     * controller, and are initialized based on parameter values.
-     * The rowsPerBank is determined based on the capacity, number of
-     * ranks and banks, the burst size, and the row buffer size.
-     */
-    const uint32_t readBufferSize;
-    const uint32_t writeBufferSize;
-    const uint32_t writeHighThreshold;
-    const uint32_t writeLowThreshold;
-    const uint32_t minWritesPerSwitch;
-    uint32_t writesThisTime;
-    uint32_t readsThisTime;
-
-    /**
-     * Memory controller configuration initialized based on parameter
-     * values.
-     */
-    Enums::MemSched memSchedPolicy;
-
-    /**
-     * Pipeline latency of the controller frontend. The frontend
-     * contribution is added to writes (that complete when they are in
-     * the write buffer) and reads that are serviced the write buffer.
-     */
-    const Tick frontendLatency;
-
-    /**
-     * Pipeline latency of the backend and PHY. Along with the
-     * frontend contribution, this latency is added to reads serviced
-     * by the memory.
-     */
-    const Tick backendLatency;
-
-    /**
-     * Length of a command window, used to check
-     * command bandwidth
-     */
-    const Tick commandWindow;
-
-    /**
-     * Till when must we wait before issuing next RD/WR burst?
-     */
-    Tick nextBurstAt;
-
-    Tick prevArrival;
-
-    /**
-     * The soonest you have to start thinking about the next request
-     * is the longest access time that can occur before
-     * nextBurstAt. Assuming you need to precharge, open a new row,
-     * and access, it is tRP + tRCD + tCL.
-     */
-    Tick nextReqTime;
-
-    struct CtrlStats : public Stats::Group
-    {
-        CtrlStats(MemCtrl &ctrl);
-
-        void regStats() override;
-
-        MemCtrl &ctrl;
-
-        // All statistics that the model needs to capture
-        Stats::Scalar readReqs;
-        Stats::Scalar writeReqs;
-        Stats::Scalar readBursts;
-        Stats::Scalar writeBursts;
-        Stats::Scalar servicedByWrQ;
-        Stats::Scalar mergedWrBursts;
-        Stats::Scalar neitherReadNorWriteReqs;
-        // Average queue lengths
-        Stats::Average avgRdQLen;
-        Stats::Average avgWrQLen;
-
-        Stats::Scalar numRdRetry;
-        Stats::Scalar numWrRetry;
-        Stats::Vector readPktSize;
-        Stats::Vector writePktSize;
-        Stats::Vector rdQLenPdf;
-        Stats::Vector wrQLenPdf;
-        Stats::Histogram rdPerTurnAround;
-        Stats::Histogram wrPerTurnAround;
-
-        Stats::Scalar bytesReadWrQ;
-        Stats::Scalar bytesReadSys;
-        Stats::Scalar bytesWrittenSys;
-        // Average bandwidth
-        Stats::Formula avgRdBWSys;
-        Stats::Formula avgWrBWSys;
-
-        Stats::Scalar totGap;
-        Stats::Formula avgGap;
-
-        // per-requestor bytes read and written to memory
-        Stats::Vector requestorReadBytes;
-        Stats::Vector requestorWriteBytes;
-
-        // per-requestor bytes read and written to memory rate
-        Stats::Formula requestorReadRate;
-        Stats::Formula requestorWriteRate;
-
-        // per-requestor read and write serviced memory accesses
-        Stats::Vector requestorReadAccesses;
-        Stats::Vector requestorWriteAccesses;
-
-        // per-requestor read and write total memory access latency
-        Stats::Vector requestorReadTotalLat;
-        Stats::Vector requestorWriteTotalLat;
-
-        // per-requestor raed and write average memory access latency
-        Stats::Formula requestorReadAvgLat;
-        Stats::Formula requestorWriteAvgLat;
-    };
-
-    CtrlStats stats;
-
-    /**
-     * Upstream caches need this packet until true is returned, so
-     * hold it for deletion until a subsequent call
-     */
-    std::unique_ptr<Packet> pendingDelete;
-
-    /**
-     * Select either the read or write queue
-     *
-     * @param is_read The current burst is a read, select read queue
-     * @return a reference to the appropriate queue
-     */
-    std::vector<MemPacketQueue>& selQueue(bool is_read)
-    {
-        return (is_read ? readQueue : writeQueue);
     };
 
-    /**
-     * Remove commands that have already issued from burstTicks
-     */
-    void pruneBurstTick();
-
-  public:
-
-    MemCtrl(const MemCtrlParams &p);
-
-    /**
-     * Ensure that all interfaced have drained commands
-     *
-     * @return bool flag, set once drain complete
-     */
-    bool allIntfDrained() const;
-
-    DrainState drain() override;
-
-    /**
-     * Check for command bus contention for single cycle command.
-     * If there is contention, shift command to next burst.
-     * Check verifies that the commands issued per burst is less
-     * than a defined max number, maxCommandsPerWindow.
-     * Therefore, contention per cycle is not verified and instead
-     * is done based on a burst window.
-     *
-     * @param cmd_tick Initial tick of command, to be verified
-     * @param max_cmds_per_burst Number of commands that can issue
-     *                           in a burst window
-     * @return tick for command issue without contention
-     */
-    Tick verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst);
-
-    /**
-     * Check for command bus contention for multi-cycle (2 currently)
-     * command. If there is contention, shift command(s) to next burst.
-     * Check verifies that the commands issued per burst is less
-     * than a defined max number, maxCommandsPerWindow.
-     * Therefore, contention per cycle is not verified and instead
-     * is done based on a burst window.
-     *
-     * @param cmd_tick Initial tick of command, to be verified
-     * @param max_multi_cmd_split Maximum delay between commands
-     * @param max_cmds_per_burst Number of commands that can issue
-     *                           in a burst window
-     * @return tick for command issue without contention
-     */
-    Tick verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst,
-                        Tick max_multi_cmd_split = 0);
-
-    /**
-     * Is there a respondEvent scheduled?
-     *
-     * @return true if event is scheduled
-     */
-    bool respondEventScheduled() const { return respondEvent.scheduled(); }
-
-    /**
-     * Is there a read/write burst Event scheduled?
-     *
-     * @return true if event is scheduled
-     */
-    bool requestEventScheduled() const { return nextReqEvent.scheduled(); }
-
-    /**
-     * restart the controller
-     * This can be used by interfaces to restart the
-     * scheduler after maintainence commands complete
-     *
-     * @param Tick to schedule next event
-     */
-    void restartScheduler(Tick tick) { schedule(nextReqEvent, tick); }
-
-    /**
-     * Check the current direction of the memory channel
-     *
-     * @param next_state Check either the current or next bus state
-     * @return True when bus is currently in a read state
-     */
-    bool inReadBusState(bool next_state) const;
-
-    /**
-     * Check the current direction of the memory channel
-     *
-     * @param next_state Check either the current or next bus state
-     * @return True when bus is currently in a write state
-     */
-    bool inWriteBusState(bool next_state) const;
-
-    Port &getPort(const std::string &if_name,
-                  PortID idx=InvalidPortID) override;
-
-    virtual void init() override;
-    virtual void startup() override;
-    virtual void drainResume() override;
-
-  protected:
-
-    Tick recvAtomic(PacketPtr pkt);
-    Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor);
-    void recvFunctional(PacketPtr pkt);
-    bool recvTimingReq(PacketPtr pkt);
 
+  /**
+   * Our incoming port, for a multi-ported controller add a crossbar
+   * in front of it
+   */
+  MemoryPort port;
+
+  /**
+   * Remember if the memory system is in timing mode
+   */
+  bool isTimingMode;
+
+  /**
+   * Remember if we have to retry a request when available.
+   */
+  bool retryRdReq;
+  bool retryWrReq;
+
+  /**
+   * Bunch of things requires to setup "events" in gem5
+   * When event "respondEvent" occurs for example, the method
+   * processRespondEvent is called; no parameters are allowed
+   * in these methods
+   */
+  void processNextReqEvent();
+  EventFunctionWrapper nextReqEvent;
+
+  void processRespondEvent();
+  EventFunctionWrapper respondEvent;
+
+  /**
+   * Check if the read queue has room for more entries
+   *
+   * @param pkt_count The number of entries needed in the read queue
+   * @return true if read queue is full, false otherwise
+   */
+  bool readQueueFull(unsigned int pkt_count) const;
+
+  /**
+   * Check if the write queue has room for more entries
+   *
+   * @param pkt_count The number of entries needed in the write queue
+   * @return true if write queue is full, false otherwise
+   */
+  bool writeQueueFull(unsigned int pkt_count) const;
+
+  /**
+   * When a new read comes in, first check if the write q has a
+   * pending request to the same address.\ If not, decode the
+   * address to populate rank/bank/row, create one or mutliple
+   * "mem_pkt", and push them to the back of the read queue.\
+   * If this is the only
+   * read request in the system, schedule an event to start
+   * servicing it.
+   *
+   * @param pkt The request packet from the outside world
+   * @param pkt_count The number of memory bursts the pkt
+   * @param is_dram Does this packet access DRAM?
+   * translate to. If pkt size is larger then one full burst,
+   * then pkt_count is greater than one.
+   */
+  void addToReadQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram);
+
+  /**
+   * Decode the incoming pkt, create a mem_pkt and push to the
+   * back of the write queue. \If the write q length is more than
+   * the threshold specified by the user, ie the queue is beginning
+   * to get full, stop reads, and start draining writes.
+   *
+   * @param pkt The request packet from the outside world
+   * @param pkt_count The number of memory bursts the pkt
+   * @param is_dram Does this packet access DRAM?
+   * translate to. If pkt size is larger then one full burst,
+   * then pkt_count is greater than one.
+   */
+  void addToWriteQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram);
+
+  /**
+   * Actually do the burst based on media specific access function.
+   * Update bus statistics when complete.
+   *
+   * @param mem_pkt The memory packet created from the outside world pkt
+   */
+  void doBurstAccess(MemPacket *mem_pkt);
+
+  /**
+   * When a packet reaches its "readyTime" in the response Q,
+   * use the "access()" method in AbstractMemory to actually
+   * create the response packet, and send it back to the outside
+   * world requestor.
+   *
+   * @param pkt The packet from the outside world
+   * @param static_latency Static latency to add before sending the packet
+   */
+  void accessAndRespond(PacketPtr pkt, Tick static_latency);
+
+  /**
+   * Determine if there is a packet that can issue.
+   *
+   * @param pkt The packet to evaluate
+   */
+  bool packetReady(MemPacket *pkt);
+
+  /**
+   * Calculate the minimum delay used when scheduling a read-to-write
+   * transision.
+   * @param return minimum delay
+   */
+  Tick minReadToWriteDataGap();
+
+  /**
+   * Calculate the minimum delay used when scheduling a write-to-read
+   * transision.
+   * @param return minimum delay
+   */
+  Tick minWriteToReadDataGap();
+
+  /**
+   * The memory schduler/arbiter - picks which request needs to
+   * go next, based on the specified policy such as FCFS or FR-FCFS
+   * and moves it to the head of the queue.
+   * Prioritizes accesses to the same rank as previous burst unless
+   * controller is switching command type.
+   *
+   * @param queue Queued requests to consider
+   * @param extra_col_delay Any extra delay due to a read/write switch
+   * @return an iterator to the selected packet, else queue.end()
+   */
+  MemPacketQueue::iterator chooseNext(MemPacketQueue &queue,
+                                      Tick extra_col_delay);
+
+  /**
+   * For FR-FCFS policy reorder the read/write queue depending on row buffer
+   * hits and earliest bursts available in memory
+   *
+   * @param queue Queued requests to consider
+   * @param extra_col_delay Any extra delay due to a read/write switch
+   * @return an iterator to the selected packet, else queue.end()
+   */
+  MemPacketQueue::iterator chooseNextFRFCFS(MemPacketQueue &queue,
+                                            Tick extra_col_delay);
+
+  /**
+   * Calculate burst window aligned tick
+   *
+   * @param cmd_tick Initial tick of command
+   * @return burst window aligned tick
+   */
+  Tick getBurstWindow(Tick cmd_tick);
+
+  /**
+   * Used for debugging to observe the contents of the queues.
+   */
+  void printQs() const;
+
+  /**
+   * Burst-align an address.
+   *
+   * @param addr The potentially unaligned address
+   * @param is_dram Does this packet access DRAM?
+   *
+   * @return An address aligned to a memory burst
+   */
+  Addr burstAlign(Addr addr, bool is_dram) const;
+
+  /**
+   * The controller's main read and write queues,
+   * with support for QoS reordering
+   */
+  std::vector<MemPacketQueue> readQueue;
+  std::vector<MemPacketQueue> writeQueue;
+
+  /**
+   * To avoid iterating over the write queue to check for
+   * overlapping transactions, maintain a set of burst addresses
+   * that are currently queued. Since we merge writes to the same
+   * location we never have more than one address to the same burst
+   * address.
+   */
+  std::unordered_set<Addr> isInWriteQueue;
+
+  /**
+   * Response queue where read packets wait after we're done working
+   * with them, but it's not time to send the response yet. The
+   * responses are stored separately mostly to keep the code clean
+   * and help with events scheduling. For all logical purposes such
+   * as sizing the read queue, this and the main read queue need to
+   * be added together.
+   */
+  std::deque<MemPacket *> respQueue;
+
+  /**
+   * Holds count of commands issued in burst window starting at
+   * defined Tick. This is used to ensure that the command bandwidth
+   * does not exceed the allowable media constraints.
+   */
+  std::unordered_multiset<Tick> burstTicks;
+
+  /**
+   * Create pointer to interface of the actual dram media when connected
+   */
+  DRAMInterface *const dram;
+
+  /**
+   * Create pointer to interface of the actual nvm media when connected
+   */
+  NVMInterface *const nvm;
+
+  /**
+   * The following are basic design parameters of the memory
+   * controller, and are initialized based on parameter values.
+   * The rowsPerBank is determined based on the capacity, number of
+   * ranks and banks, the burst size, and the row buffer size.
+   */
+  const uint32_t readBufferSize;
+  const uint32_t writeBufferSize;
+  const uint32_t writeHighThreshold;
+  const uint32_t writeLowThreshold;
+  const uint32_t minWritesPerSwitch;
+  uint32_t writesThisTime;
+  uint32_t readsThisTime;
+
+  /**
+   * Memory controller configuration initialized based on parameter
+   * values.
+   */
+  Enums::MemSched memSchedPolicy;
+
+  /**
+   * Pipeline latency of the controller frontend. The frontend
+   * contribution is added to writes (that complete when they are in
+   * the write buffer) and reads that are serviced the write buffer.
+   */
+  const Tick frontendLatency;
+
+  /**
+   * Pipeline latency of the backend and PHY. Along with the
+   * frontend contribution, this latency is added to reads serviced
+   * by the memory.
+   */
+  const Tick backendLatency;
+
+  /**
+   * Length of a command window, used to check
+   * command bandwidth
+   */
+  const Tick commandWindow;
+
+  /**
+   * Till when must we wait before issuing next RD/WR burst?
+   */
+  Tick nextBurstAt;
+
+  Tick prevArrival;
+
+  /**
+   * The soonest you have to start thinking about the next request
+   * is the longest access time that can occur before
+   * nextBurstAt. Assuming you need to precharge, open a new row,
+   * and access, it is tRP + tRCD + tCL.
+   */
+  Tick nextReqTime;
+
+  struct CtrlStats : public Stats::Group
+  {
+    CtrlStats(MemCtrl &ctrl);
+
+    void regStats() override;
+
+    MemCtrl &ctrl;
+
+    // All statistics that the model needs to capture
+    Stats::Scalar readReqs;
+    Stats::Scalar writeReqs;
+    Stats::Scalar readBursts;
+    Stats::Scalar writeBursts;
+    Stats::Scalar servicedByWrQ;
+    Stats::Scalar mergedWrBursts;
+    Stats::Scalar neitherReadNorWriteReqs;
+    // Average queue lengths
+    Stats::Average avgRdQLen;
+    Stats::Average avgWrQLen;
+
+    Stats::Scalar numRdRetry;
+    Stats::Scalar numWrRetry;
+    Stats::Vector readPktSize;
+    Stats::Vector writePktSize;
+    Stats::Vector rdQLenPdf;
+    Stats::Vector wrQLenPdf;
+    Stats::Histogram rdPerTurnAround;
+    Stats::Histogram wrPerTurnAround;
+
+    Stats::Scalar bytesReadWrQ;
+    Stats::Scalar bytesReadSys;
+    Stats::Scalar bytesWrittenSys;
+    // Average bandwidth
+    Stats::Formula avgRdBWSys;
+    Stats::Formula avgWrBWSys;
+
+    Stats::Scalar totGap;
+    Stats::Formula avgGap;
+
+    // per-requestor bytes read and written to memory
+    Stats::Vector requestorReadBytes;
+    Stats::Vector requestorWriteBytes;
+
+    // per-requestor bytes read and written to memory rate
+    Stats::Formula requestorReadRate;
+    Stats::Formula requestorWriteRate;
+
+    // per-requestor read and write serviced memory accesses
+    Stats::Vector requestorReadAccesses;
+    Stats::Vector requestorWriteAccesses;
+
+    // per-requestor read and write total memory access latency
+    Stats::Vector requestorReadTotalLat;
+    Stats::Vector requestorWriteTotalLat;
+
+    // per-requestor raed and write average memory access latency
+    Stats::Formula requestorReadAvgLat;
+    Stats::Formula requestorWriteAvgLat;
+  };
+
+  CtrlStats stats;
+
+  /**
+   * Upstream caches need this packet until true is returned, so
+   * hold it for deletion until a subsequent call
+   */
+  std::unique_ptr<Packet> pendingDelete;
+
+  /**
+   * Select either the read or write queue
+   *
+   * @param is_read The current burst is a read, select read queue
+   * @return a reference to the appropriate queue
+   */
+  std::vector<MemPacketQueue> &selQueue(bool is_read)
+  {
+    return (is_read ? readQueue : writeQueue);
+  };
+
+  /**
+   * Remove commands that have already issued from burstTicks
+   */
+  void pruneBurstTick();
+
+public:
+  MemCtrl(const MemCtrlParams &p);
+
+  /**
+   * Ensure that all interfaced have drained commands
+   *
+   * @return bool flag, set once drain complete
+   */
+  bool allIntfDrained() const;
+
+  DrainState drain() override;
+
+  /**
+   * Check for command bus contention for single cycle command.
+   * If there is contention, shift command to next burst.
+   * Check verifies that the commands issued per burst is less
+   * than a defined max number, maxCommandsPerWindow.
+   * Therefore, contention per cycle is not verified and instead
+   * is done based on a burst window.
+   *
+   * @param cmd_tick Initial tick of command, to be verified
+   * @param max_cmds_per_burst Number of commands that can issue
+   *                           in a burst window
+   * @return tick for command issue without contention
+   */
+  Tick verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst) override;
+
+  /**
+   * Check for command bus contention for multi-cycle (2 currently)
+   * command. If there is contention, shift command(s) to next burst.
+   * Check verifies that the commands issued per burst is less
+   * than a defined max number, maxCommandsPerWindow.
+   * Therefore, contention per cycle is not verified and instead
+   * is done based on a burst window.
+   *
+   * @param cmd_tick Initial tick of command, to be verified
+   * @param max_multi_cmd_split Maximum delay between commands
+   * @param max_cmds_per_burst Number of commands that can issue
+   *                           in a burst window
+   * @return tick for command issue without contention
+   */
+  Tick verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst,
+                      Tick max_multi_cmd_split = 0) override;
+
+  /**
+   * Is there a respondEvent scheduled?
+   *
+   * @return true if event is scheduled
+   */
+  bool respondEventScheduled() const override
+  { return respondEvent.scheduled(); }
+
+  /**
+   * Is there a read/write burst Event scheduled?
+   *
+   * @return true if event is scheduled
+   */
+  bool requestEventScheduled() const override
+  { return nextReqEvent.scheduled(); }
+
+  /**
+   * restart the controller
+   * This can be used by interfaces to restart the
+   * scheduler after maintainence commands complete
+   *
+   * @param Tick to schedule next event
+   */
+  void restartScheduler(Tick tick) override { schedule(nextReqEvent, tick); }
+
+  /**
+   * Check the current direction of the memory channel
+   *
+   * @param next_state Check either the current or next bus state
+   * @return True when bus is currently in a read state
+   */
+  bool inReadBusState(bool next_state) const override;
+
+  /**
+   * Check the current direction of the memory channel
+   *
+   * @param next_state Check either the current or next bus state
+   * @return True when bus is currently in a write state
+   */
+  bool inWriteBusState(bool next_state) const override;
+
+  Port &getPort(const std::string &if_name,
+                PortID idx = InvalidPortID) override;
+
+  virtual void init() override;
+  virtual void startup() override;
+  virtual void drainResume() override;
+
+protected:
+  Tick recvAtomic(PacketPtr pkt);
+  Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor);
+  void recvFunctional(PacketPtr pkt);
+  bool recvTimingReq(PacketPtr pkt);
 };
 
 #endif //__MEM_CTRL_HH__
diff --git a/src/mem/mem_interface.cc b/src/mem/mem_interface.cc
index d81d34c59d..1a1ccc231f 100644
--- a/src/mem/mem_interface.cc
+++ b/src/mem/mem_interface.cc
@@ -73,9 +73,13 @@ MemInterface::MemInterface(const MemInterfaceParams &_p)
 {}
 
 void
-MemInterface::setCtrl(MemCtrl* _ctrl, unsigned int command_window)
+MemInterface::setCtrl(QoS::MemCtrl* _ctrl, unsigned int command_window)
 {
-    ctrl = _ctrl;
+    if (dynamic_cast<DcacheCtrl*>(_ctrl) != nullptr) {
+        ctrl = dynamic_cast<DcacheCtrl*>(_ctrl);
+    } else {
+        ctrl = dynamic_cast<MemCtrl*>(_ctrl);
+    }
     maxCommandsPerWindow = command_window / tCK;
 }
 
@@ -713,6 +717,266 @@ DRAMInterface::doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at,
     return std::make_pair(cmd_at, cmd_at + burst_gap);
 }
 
+std::pair<Tick, Tick>
+DRAMInterface::doBurstAccess(MemPacket* dcc_pkt, Tick next_burst_at)
+                             //,const std::vector<MemPacketQueue>& queue)
+{
+    DPRINTF(DRAM, "Timing access to addr %lld, rank/bank/row %d %d %d\n",
+            dcc_pkt->addr, dcc_pkt->rank, dcc_pkt->bank, dcc_pkt->row);
+
+    // get the rank
+    Rank& rank_ref = *ranks[dcc_pkt->rank];
+    assert(rank_ref.inRefIdleState());
+
+    // are we in or transitioning to a low-power state and have not scheduled
+    // a power-up event?
+    // if so, wake up from power down to issue RD/WR burst
+    if (rank_ref.inLowPowerState) {
+        assert(rank_ref.pwrState != PWR_SREF);
+        rank_ref.scheduleWakeUpEvent(tXP);
+    }
+
+    // get the bank
+    Bank& bank_ref = rank_ref.banks[dcc_pkt->bank];
+
+    // for the state we need to track if it is a row hit or not
+    bool row_hit = true;
+
+    // Determine the access latency and update the bank state
+    if (bank_ref.openRow == dcc_pkt->row) {
+        // nothing to do
+    } else {
+        row_hit = false;
+
+        // If there is a page open, precharge it.
+        if (bank_ref.openRow != Bank::NO_ROW) {
+            prechargeBank(rank_ref, bank_ref, std::max(bank_ref.preAllowedAt,
+                                                   curTick()));
+        }
+
+        // next we need to account for the delay in activating the page
+        Tick act_tick = std::max(bank_ref.actAllowedAt, curTick());
+
+        // Record the activation and deal with all the global timing
+        // constraints caused be a new activation (tRRD and tXAW)
+        activateBank(rank_ref, bank_ref, act_tick, dcc_pkt->row);
+    }
+
+    // respect any constraints on the command (e.g. tRCD or tCCD)
+    const Tick col_allowed_at = dcc_pkt->isRead() ?
+                                bank_ref.rdAllowedAt : bank_ref.wrAllowedAt;
+
+    // we need to wait until the bus is available before we can issue
+    // the command; need to ensure minimum bus delay requirement is met
+    Tick cmd_at = std::max({col_allowed_at, next_burst_at, curTick()});
+
+    // verify that we have command bandwidth to issue the burst
+    // if not, shift to next burst window
+    if (dataClockSync && ((cmd_at - rank_ref.lastBurstTick) > clkResyncDelay))
+        cmd_at = ctrl->verifyMultiCmd(cmd_at, maxCommandsPerWindow, tCK);
+    else
+        cmd_at = ctrl->verifySingleCmd(cmd_at, maxCommandsPerWindow);
+
+    // if we are interleaving bursts, ensure that
+    // 1) we don't double interleave on next burst issue
+    // 2) we are at an interleave boundary; if not, shift to next boundary
+    Tick burst_gap = tBURST_MIN;
+    if (burstInterleave) {
+        if (cmd_at == (rank_ref.lastBurstTick + tBURST_MIN)) {
+            // already interleaving, push next command to end of full burst
+            burst_gap = tBURST;
+        } else if (cmd_at < (rank_ref.lastBurstTick + tBURST)) {
+            // not at an interleave boundary after bandwidth check
+            // Shift command to tBURST boundary to avoid data contention
+            // Command will remain in the same burst window given that
+            // tBURST is less than tBURST_MAX
+            cmd_at = rank_ref.lastBurstTick + tBURST;
+        }
+    }
+    DPRINTF(DRAM, "Schedule RD/WR burst at tick %d\n", cmd_at);
+
+    // update the packet ready time
+    dcc_pkt->readyTime = cmd_at + tCL + tBURST;
+
+    rank_ref.lastBurstTick = cmd_at;
+
+    // update the time for the next read/write burst for each
+    // bank (add a max with tCCD/tCCD_L/tCCD_L_WR here)
+    Tick dly_to_rd_cmd;
+    Tick dly_to_wr_cmd;
+    for (int j = 0; j < ranksPerChannel; j++) {
+        for (int i = 0; i < banksPerRank; i++) {
+            if (dcc_pkt->rank == j) {
+                if (bankGroupArch &&
+                   (bank_ref.bankgr == ranks[j]->banks[i].bankgr)) {
+                    // bank group architecture requires longer delays between
+                    // RD/WR burst commands to the same bank group.
+                    // tCCD_L is default requirement for same BG timing
+                    // tCCD_L_WR is required for write-to-write
+                    // Need to also take bus turnaround delays into account
+                    dly_to_rd_cmd = dcc_pkt->isRead() ?
+                                    tCCD_L : std::max(tCCD_L, wrToRdDlySameBG);
+                    dly_to_wr_cmd = dcc_pkt->isRead() ?
+                                    std::max(tCCD_L, rdToWrDlySameBG) :
+                                    tCCD_L_WR;
+                } else {
+                    // tBURST is default requirement for diff BG timing
+                    // Need to also take bus turnaround delays into account
+                    dly_to_rd_cmd = dcc_pkt->isRead() ? burst_gap :
+                                                       writeToReadDelay();
+                    dly_to_wr_cmd = dcc_pkt->isRead() ? readToWriteDelay() :
+                                                       burst_gap;
+                }
+            } else {
+                // different rank is by default in a different bank group and
+                // doesn't require longer tCCD or additional RTW, WTR delays
+                // Need to account for rank-to-rank switching
+                dly_to_wr_cmd = rankToRankDelay();
+                dly_to_rd_cmd = rankToRankDelay();
+            }
+            ranks[j]->banks[i].rdAllowedAt = std::max(cmd_at + dly_to_rd_cmd,
+                                             ranks[j]->banks[i].rdAllowedAt);
+            ranks[j]->banks[i].wrAllowedAt = std::max(cmd_at + dly_to_wr_cmd,
+                                             ranks[j]->banks[i].wrAllowedAt);
+        }
+    }
+
+    // Save rank of current access
+    activeRank = dcc_pkt->rank;
+
+    // If this is a write, we also need to respect the write recovery
+    // time before a precharge, in the case of a read, respect the
+    // read to precharge constraint
+    bank_ref.preAllowedAt = std::max(bank_ref.preAllowedAt,
+                                 dcc_pkt->isRead() ? cmd_at + tRTP :
+                                 dcc_pkt->readyTime + tWR);
+
+    // increment the bytes accessed and the accesses per row
+    bank_ref.bytesAccessed += burstSize;
+    ++bank_ref.rowAccesses;
+
+    // if we reached the max, then issue with an auto-precharge
+    bool auto_precharge = pageMgmt == Enums::close ||
+        bank_ref.rowAccesses == maxAccessesPerRow;
+
+    // if we did not hit the limit, we might still want to
+    // auto-precharge
+    /*if (!auto_precharge &&
+        (pageMgmt == Enums::open_adaptive ||
+         pageMgmt == Enums::close_adaptive)) {
+        // a twist on the open and close page policies:
+        // 1) open_adaptive page policy does not blindly keep the
+        // page open, but close it if there are no row hits, and there
+        // are bank conflicts in the queue
+        // 2) close_adaptive page policy does not blindly close the
+        // page, but closes it only if there are no row hits in the queue.
+        // In this case, only force an auto precharge when there
+        // are no same page hits in the queue
+        bool got_more_hits = false;
+        bool got_bank_conflict = false;
+
+        for (uint8_t i = 0; i < ctrl->numPriorities(); ++i) {
+            auto p = queue[i].begin();
+            // keep on looking until we find a hit or reach the end of the
+            // queue
+            // 1) if a hit is found, then both open and close adaptive
+            //    policies keep the page open
+            // 2) if no hit is found, got_bank_conflict is set to true if a
+            //    bank conflict request is waiting in the queue
+            // 3) make sure we are not considering the packet that we are
+            //    currently dealing with
+            while (!got_more_hits && p != queue[i].end()) {
+                if (dcc_pkt != (*p)) {
+                    bool same_rank_bank = (dcc_pkt->rank == (*p)->rank) &&
+                                          (dcc_pkt->bank == (*p)->bank);
+
+                    bool same_row = dcc_pkt->row == (*p)->row;
+                    got_more_hits |= same_rank_bank && same_row;
+                    got_bank_conflict |= same_rank_bank && !same_row;
+                }
+                ++p;
+            }
+
+            if (got_more_hits)
+                break;
+        }
+
+        // auto pre-charge when either
+        // 1) open_adaptive policy, we have not got any more hits, and
+        //    have a bank conflict
+        // 2) close_adaptive policy and we have not got any more hits
+        auto_precharge = !got_more_hits &&
+            (got_bank_conflict || pageMgmt == Enums::close_adaptive);
+    }*/
+
+    // DRAMPower trace command to be written
+    std::string mem_cmd = dcc_pkt->isRead() ? "RD" : "WR";
+
+    // MemCommand required for DRAMPower library
+    MemCommand::cmds command = (mem_cmd == "RD") ? MemCommand::RD :
+                                                   MemCommand::WR;
+
+    rank_ref.cmdList.push_back(Command(command, dcc_pkt->bank, cmd_at));
+
+    DPRINTF(DRAMPower, "%llu,%s,%d,%d\n", divCeil(cmd_at, tCK) -
+            timeStampOffset, mem_cmd, dcc_pkt->bank, dcc_pkt->rank);
+
+    // if this access should use auto-precharge, then we are
+    // closing the row after the read/write burst
+    if (auto_precharge) {
+        // if auto-precharge push a PRE command at the correct tick to the
+        // list used by DRAMPower library to calculate power
+        prechargeBank(rank_ref, bank_ref, std::max(curTick(),
+                      bank_ref.preAllowedAt), true);
+
+        DPRINTF(DRAM, "Auto-precharged bank: %d\n", dcc_pkt->bankId);
+    }
+
+    // Update the stats and schedule the next request
+    if (dcc_pkt->isRead()) {
+        // Every respQueue which will generate an event, increment count
+        ++rank_ref.outstandingEvents;
+
+        stats.readBursts++;
+        if (row_hit)
+            stats.readRowHits++;
+        stats.bytesRead += burstSize;
+        stats.perBankRdBursts[dcc_pkt->bankId]++;
+
+        // Update latency stats
+        stats.totMemAccLat += dcc_pkt->readyTime - dcc_pkt->entryTime;
+        stats.totQLat += cmd_at - dcc_pkt->entryTime;
+        stats.totBusLat += tBURST;
+    } else {
+        // Schedule write done event to decrement event count
+        // after the readyTime has been reached
+        // Only schedule latest write event to minimize events
+        // required; only need to ensure that final event scheduled covers
+        // the time that writes are outstanding and bus is active
+        // to holdoff power-down entry events
+        if (!rank_ref.writeDoneEvent.scheduled()) {
+            schedule(rank_ref.writeDoneEvent, dcc_pkt->readyTime);
+            // New event, increment count
+            ++rank_ref.outstandingEvents;
+
+        } else if (rank_ref.writeDoneEvent.when() < dcc_pkt->readyTime) {
+            reschedule(rank_ref.writeDoneEvent, dcc_pkt->readyTime);
+        }
+        // will remove write from queue when returned to parent function
+        // decrement count for DRAM rank
+        --rank_ref.writeEntries;
+
+        stats.writeBursts++;
+        if (row_hit)
+            stats.writeRowHits++;
+        stats.bytesWritten += burstSize;
+        stats.perBankWrBursts[dcc_pkt->bankId]++;
+
+    }
+    // Update bus state to reflect when previous command was issued
+    return std::make_pair(cmd_at, cmd_at + burst_gap);
+}
+
 void
 DRAMInterface::addRankToRankDelay(Tick cmd_at)
 {
@@ -754,6 +1018,8 @@ DRAMInterface::DRAMInterface(const DRAMInterfaceParams &_p)
       enableDRAMPowerdown(_p.enable_dram_powerdown),
       lastStatsResetTick(0),
       stats(*this)
+      //rescheduleRead_udcc(false),
+      //rescheduleWrite_udcc(false)
 {
     DPRINTF(DRAM, "Setting up DRAM Interface\n");
 
@@ -1677,6 +1943,20 @@ DRAMInterface::Rank::processPowerEvent()
                            " rank %d\n", rank);
             dram.ctrl->restartScheduler(curTick());
         }
+
+        // if (dram.rescheduleRead_udcc) {
+        //     DPRINTF(DRAM, "Scheduling next DRAM read after refreshing"
+        //                    " rank %d\n", rank);
+        //     dram.ctrl->restartDramReadScheduler(curTick());
+        //     dram.rescheduleRead_udcc = false;
+        // }
+
+        // if (dram.rescheduleWrite_udcc) {
+        //     DPRINTF(DRAM, "Scheduling next DRAM write after refreshing"
+        //                    " rank %d\n", rank);
+        //     dram.ctrl->restartDramWriteScheduler(curTick());
+        //     dram.rescheduleWrite_udcc = false;
+        // }
     }
 
     if ((pwrState == PWR_ACT) && (refreshState == REF_PD_EXIT)) {
@@ -1767,6 +2047,14 @@ DRAMInterface::Rank::processPowerEvent()
 
 }
 
+Tick
+DRAMInterface::Rank::getRefreshEventSchdTick()
+{
+    assert(refreshEvent.scheduled());
+    return(refreshEvent.when());
+}
+
+
 void
 DRAMInterface::Rank::updatePowerStats()
 {
@@ -2113,6 +2401,62 @@ NVMInterface::chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const
     return std::make_pair(selected_pkt_it, selected_col_at);
 }
 
+std::pair<MemPacketQueue::iterator, Tick>
+NVMInterface::chooseNextFRFCFSDCache(MemPacketQueue& queue, Tick min_col_at)
+{
+    // remember if we found a hit, but one that cannit issue seamlessly
+    bool found_prepped_pkt = false;
+
+    auto selected_pkt_it = queue.end();
+    Tick selected_col_at = MaxTick;
+
+    for (auto i = queue.begin(); i != queue.end() ; ++i) {
+        MemPacket* pkt = *i;
+
+        // select optimal NVM packet in Q
+        if (!pkt->isDram()) {
+            const Bank& bank = ranks[pkt->rank]->banks[pkt->bank];
+            const Tick col_allowed_at = pkt->isRead() ? bank.rdAllowedAt :
+                                                        bank.wrAllowedAt;
+
+            // check if rank is not doing a refresh and thus is available,
+            // if not, jump to the next packet
+            if (burstReadyDCache(pkt)) {
+                DPRINTF(NVM, "%s bank %d - Rank %d available\n", __func__,
+                        pkt->bank, pkt->rank);
+
+                // no additional rank-to-rank or media delays
+                if (col_allowed_at <= min_col_at) {
+                    // FCFS within entries that can issue without
+                    // additional delay, such as same rank accesses
+                    // or media delay requirements
+                    selected_pkt_it = i;
+                    selected_col_at = col_allowed_at;
+                    // no need to look through the remaining queue entries
+                    DPRINTF(NVM, "%s Seamless buffer hit\n", __func__);
+                    break;
+                } else if (!found_prepped_pkt) {
+                    // packet is to prepped region but cannnot issue
+                    // seamlessly; remember this one and continue
+                    selected_pkt_it = i;
+                    selected_col_at = col_allowed_at;
+                    DPRINTF(NVM, "%s Prepped packet found \n", __func__);
+                    found_prepped_pkt = true;
+                }
+            } else {
+                DPRINTF(NVM, "%s bank %d - Rank %d not available\n", __func__,
+                        pkt->bank, pkt->rank);
+            }
+        }
+    }
+
+    if (selected_pkt_it == queue.end()) {
+        DPRINTF(NVM, "%s no available NVM ranks found\n", __func__);
+    }
+
+    return std::make_pair(selected_pkt_it, selected_col_at);
+}
+
 void
 NVMInterface::chooseRead(MemPacketQueue& queue)
 {
@@ -2208,6 +2552,96 @@ NVMInterface::chooseRead(MemPacketQueue& queue)
     }
 }
 
+void
+NVMInterface::processReadPkt(MemPacket* pkt)
+{
+    Tick cmd_at = std::max(curTick(), nextReadAt);
+
+    assert(numReadsToIssue > 0);
+
+    numReadsToIssue--;
+
+    assert(pkt->readyTime == MaxTick);
+    assert(!pkt->isDram());
+    assert(pkt->isRead());
+
+    // get the bank
+    Bank& bank_ref = ranks[pkt->rank]->banks[pkt->bank];
+
+    // issueing a read, inc counter and verify we haven't overrun
+    numPendingReads++;
+    assert(numPendingReads <= maxPendingReads);
+
+    // increment the bytes accessed and the accesses per row
+    bank_ref.bytesAccessed += burstSize;
+
+    // Verify command bandiwth to issue
+    // Host can issue read immediately uith buffering closer
+    // to the NVM. The actual execution at the NVM may be delayed
+    // due to busy resources
+    if (twoCycleRdWr) {
+        cmd_at = ctrl->verifyMultiCmd(cmd_at,
+                                        maxCommandsPerWindow, tCK);
+    } else {
+        cmd_at = ctrl->verifySingleCmd(cmd_at,
+                                        maxCommandsPerWindow);
+    }
+
+    // Update delay to next read
+    // Ensures single read command issued per cycle
+    nextReadAt = cmd_at + tCK;
+
+    // If accessing a new location in this bank, update timing
+    // and stats
+    if (bank_ref.openRow != pkt->row) {
+        // update the open bank, re-using row field
+        bank_ref.openRow = pkt->row;
+
+        // sample the bytes accessed to a buffer in this bank
+        // here when we are re-buffering the data
+        stats.bytesPerBank.sample(bank_ref.bytesAccessed);
+        // start counting anew
+        bank_ref.bytesAccessed = 0;
+
+        // holdoff next command to this bank until the read completes
+        // and the data has been successfully buffered
+        // can pipeline accesses to the same bank, sending them
+        // across the interface B2B, but will incur full access
+        // delay between data ready responses to different buffers
+        // in a bank
+        bank_ref.actAllowedAt = std::max(cmd_at,
+                                bank_ref.actAllowedAt) + tREAD;
+    }
+    // update per packet readyTime to holdoff burst read operation
+    // overloading readyTime, which will be updated again when the
+    // burst is issued
+    pkt->readyTime = std::max(cmd_at, bank_ref.actAllowedAt);
+    DPRINTF(NVM, "Issuing NVM Read to bank %d at tick %d. "
+                    "Data ready at %d\n",
+                    bank_ref.bank, cmd_at, pkt->readyTime);
+
+    // Insert into read ready queue. It will be handled after
+    // the media delay has been met
+    if (readReadyQueue.empty()) {
+        assert(!readReadyEvent.scheduled());
+        schedule(readReadyEvent, pkt->readyTime);
+    } else if (readReadyEvent.when() > pkt->readyTime) {
+        // move it sooner in time, to the first read with data
+        reschedule(readReadyEvent, pkt->readyTime);
+    } else {
+        assert(readReadyEvent.scheduled());
+    }
+    readReadyQueue.push_back(pkt->readyTime);
+
+}
+
+Tick
+NVMInterface::nextReadReadyEventTick()
+{
+    assert(readReadyEvent.scheduled());
+    return readReadyEvent.when();
+}
+
 void
 NVMInterface::processReadReadyEvent()
 {
@@ -2262,6 +2696,15 @@ NVMInterface::burstReady(MemPacket* pkt) const {
     return (read_rdy || write_rdy);
 }
 
+bool
+NVMInterface::burstReadyDCache(MemPacket* pkt) {
+    bool read_rdy =  pkt->isRead() && (ctrl->inReadBusState(false)) &&
+               (pkt->readyTime <= curTick()) && (numReadDataReady > 0);
+    bool write_rdy =  !pkt->isRead() && !ctrl->inReadBusState(false) &&
+                !writeRespQueueFull();
+    return (read_rdy || write_rdy);
+}
+
     std::pair<Tick, Tick>
 NVMInterface::doBurstAccess(MemPacket* pkt, Tick next_burst_at)
 {
diff --git a/src/mem/mem_interface.hh b/src/mem/mem_interface.hh
index d1bf671e88..9c61812ab7 100644
--- a/src/mem/mem_interface.hh
+++ b/src/mem/mem_interface.hh
@@ -52,14 +52,20 @@
 #include <utility>
 #include <vector>
 
+#include "base/compiler.hh"
 #include "base/statistics.hh"
 #include "enums/AddrMap.hh"
 #include "enums/PageManage.hh"
 #include "mem/abstract_mem.hh"
-#include "mem/drampower.hh"
+
+
+#include "mem/dcache_ctrl.hh"
 #include "mem/mem_ctrl.hh"
-#include "params/DRAMInterface.hh"
+#include "mem/drampower.hh"
+#include "mem/qos/mem_ctrl.hh"
+
 #include "params/MemInterface.hh"
+#include "params/DRAMInterface.hh"
 #include "params/NVMInterface.hh"
 #include "sim/eventq.hh"
 
@@ -107,7 +113,7 @@ class MemInterface : public AbstractMemory
     /**
      * A pointer to the parent MemCtrl instance
      */
-    MemCtrl* ctrl;
+    QoS::MemCtrl* ctrl;
 
     /**
      * Number of commands that can issue in the defined controller
@@ -179,7 +185,7 @@ class MemInterface : public AbstractMemory
      * @param command_window size of command window used to
      *                       check command bandwidth
      */
-    void setCtrl(MemCtrl* _ctrl, unsigned int command_window);
+    void setCtrl(QoS::MemCtrl* _ctrl, unsigned int command_window);
 
     /**
      * Get an address in a dense range which starts from 0. The input
@@ -697,6 +703,8 @@ class DRAMInterface : public MemInterface
         void processWakeUpEvent();
         EventFunctionWrapper wakeUpEvent;
 
+        Tick getRefreshEventSchdTick();
+
       protected:
         RankStats stats;
     };
@@ -882,6 +890,7 @@ class DRAMInterface : public MemInterface
     }
 
   public:
+
     /**
      * Initialize the DRAM interface and verify parameters
      */
@@ -926,6 +935,8 @@ class DRAMInterface : public MemInterface
      */
     Tick commandOffset() const override { return (tRP + tRCD); }
 
+    Tick getTBurst() { return tBURST; }
+
     /*
      * Function to calulate unloaded, closed bank access latency
      */
@@ -960,6 +971,22 @@ class DRAMInterface : public MemInterface
     doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at,
                   const std::vector<MemPacketQueue>& queue);
 
+
+    /**
+     * Specifically for dcache_ctrlr only.
+     * Actually do the burst - figure out the latency it
+     * will take to service the req based on bank state, channel state etc
+     * and then update those states to account for this request. Based
+     * on this, update the packet's "readyTime" and move it to the
+     * response q from where it will eventually go back to the outside
+     * world.
+     *
+     * @param dcc_pkt The packet created from the outside world pkt
+     * @param next_burst_at Minimum bus timing requirement from controller
+     */
+    std::pair<Tick, Tick>
+    doBurstAccess(MemPacket* dcc_pkt, Tick next_burst_at);
+
     /**
      * Check if a burst operation can be issued to the DRAM
      *
@@ -973,6 +1000,12 @@ class DRAMInterface : public MemInterface
         return ranks[pkt->rank]->inRefIdleState();
     }
 
+    Tick
+    getRankRefEventSchdTick(MemPacket* pkt)
+    {
+      return ranks[pkt->rank]->getRefreshEventSchdTick();
+    }
+
     /**
      * This function checks if ranks are actively refreshing and
      * therefore busy. The function also checks if ranks are in
@@ -1009,6 +1042,11 @@ class DRAMInterface : public MemInterface
      */
     void checkRefreshState(uint8_t rank);
 
+    //bool rescheduleRead_udcc;
+
+    //bool rescheduleWrite_udcc;
+
+
     DRAMInterface(const DRAMInterfaceParams &_p);
 };
 
@@ -1123,13 +1161,6 @@ class NVMInterface : public MemInterface
 
     std::deque<Tick> readReadyQueue;
 
-    /**
-     * Check if the write response queue is empty
-     *
-     * @param Return true if empty
-     */
-    bool writeRespQueueEmpty() const { return writeRespQueue.empty(); }
-
     /**
      * Till when must we wait before issuing next read command?
      */
@@ -1173,6 +1204,8 @@ class NVMInterface : public MemInterface
      */
     Tick commandOffset() const override { return tBURST; }
 
+    Tick getTBurst() { return tBURST; }
+
     /**
      * Check if a burst operation can be issued to the NVM
      *
@@ -1183,6 +1216,8 @@ class NVMInterface : public MemInterface
      */
     bool burstReady(MemPacket* pkt) const override;
 
+    bool burstReadyDCache(MemPacket* pkt);
+
     /**
      * This function checks if ranks are busy.
      * This state is true when either:
@@ -1207,6 +1242,9 @@ class NVMInterface : public MemInterface
     std::pair<MemPacketQueue::iterator, Tick>
     chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const override;
 
+    std::pair<MemPacketQueue::iterator, Tick>
+    chooseNextFRFCFSDCache(MemPacketQueue& queue, Tick min_col_at);
+
     /**
      *  Add rank to rank delay to bus timing to all NVM banks in alli ranks
      *  when access to an alternate interface is issued
@@ -1222,6 +1260,10 @@ class NVMInterface : public MemInterface
      */
     void chooseRead(MemPacketQueue& queue);
 
+    void processReadPkt(MemPacket* pkt);
+
+    Tick nextReadReadyEventTick();
+
     /*
      * Function to calulate unloaded access latency
      */
@@ -1238,6 +1280,34 @@ class NVMInterface : public MemInterface
         return writeRespQueue.size() == maxPendingWrites;
     }
 
+         /**
+     * Check if the write response queue is empty
+     *
+     * @param Return true if empty
+     */
+    bool writeRespQueueEmpty() const { return writeRespQueue.empty(); }
+
+
+    uint32_t
+    getMaxPendingWrites()
+    {
+        return maxPendingWrites;
+    }
+
+
+    Tick
+    writeRespQueueFront()
+    {
+        return writeRespQueue.front();
+    }
+
+    unsigned
+    writeRespQueueSize()
+    {
+        return writeRespQueue.size();
+    }
+
+
     bool
     readsWaitingToIssue() const
     {
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 4ad8d46b6c..35e15d6c3e 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -824,7 +824,8 @@ class Packet : public Printable
      */
     Packet(const RequestPtr &_req, MemCmd _cmd)
         :  cmd(_cmd), id((PacketId)_req.get()), req(_req),
-           data(nullptr), addr(0), _isSecure(false), size(0),
+           data(nullptr),
+           _isSecure(false), size(0),
            _qosValue(0),
            htmReturnReason(HtmCacheFailure::NO_FAIL),
            htmTransactionUid(0),
diff --git a/src/mem/qos/mem_ctrl.hh b/src/mem/qos/mem_ctrl.hh
index 02954d20d5..491dab7525 100644
--- a/src/mem/qos/mem_ctrl.hh
+++ b/src/mem/qos/mem_ctrl.hh
@@ -268,6 +268,12 @@ class MemCtrl : public ClockedObject
 
     virtual ~MemCtrl();
 
+    virtual Tick getNextBurstTick()
+    {
+        panic("QoS::MemCtrl getNextBurstTick should not be called \n");
+        return curTick();
+    };
+
     /**
      * Gets the current bus state
      *
@@ -275,6 +281,55 @@ class MemCtrl : public ClockedObject
      */
     BusState getBusState() const { return busState; }
 
+    /**some virtual functions
+     * that will be implemented in the mem
+     * controllers
+     */
+    virtual Tick verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst,
+            Tick max_multi_cmd_split = 0) {
+                panic("QoS::MemCtrl verifyMultiCmd should not be called \n");
+                return curTick();
+        };
+
+    virtual Tick verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst) {
+                panic("QoS::MemCtrl verifySingleCmd should not be called \n");
+                return curTick();
+        };
+
+    virtual bool inReadBusState(bool next_state) const {
+                panic("QoS::MemCtrl inReadBusState should not be called \n");
+                return false;
+        };
+
+    virtual bool inWriteBusState(bool next_state) const {
+                panic("QoS::MemCtrl inWriteBusState should not be called \n");
+                return false;
+        };
+
+    virtual bool requestEventScheduled() const {
+            panic("QoS::MemCtrl requestEventScheduled wrongly called \n");
+                return false;
+        };
+
+    virtual bool respondEventScheduled() const {
+            panic("QoS::MemCtrl respondEventScheduled wrongly called \n");
+                return false;
+        };
+
+    virtual void restartScheduler(Tick tick) {
+            panic("QoS::MemCtrl restartScheduler should not be called \n");
+        };
+
+    virtual void restartDramReadScheduler(Tick tick) {
+            panic("QoS::MemCtrl restartDramReadScheduler "
+            "should not be called \n");
+        };
+
+    virtual void restartDramWriteScheduler(Tick tick) {
+            panic("QoS::MemCtrl restartDramWriteScheduler "
+            "should not be called \n");
+        };
+
     /**
      * Gets the next bus state
      *
diff --git a/traffGen.py b/traffGen.py
new file mode 100644
index 0000000000..0ef30b3a4f
--- /dev/null
+++ b/traffGen.py
@@ -0,0 +1,129 @@
+from m5.objects import *
+import m5
+import argparse
+from m5.objects.DRAMInterface import *
+from m5.objects.NVMInterface import *
+
+
+args = argparse.ArgumentParser()
+
+# This scipt takes these arguments [device model for dram cache]
+# [dram cache size] [maximum orb size]
+# [traffic mode] [duration of simulation in ticks]
+# [max address] [request injection period in ticks] [rd percentage]
+# min address is 0, data limit is 0, block size is 64B.
+# crb_max_size is 32 by default.
+
+# sample cmd: gem5.opt traffGen.py DDR3_1600_8x8  16MiB
+#             32 linear 100000000 128MiB 1000 100
+# sample cmd: gem5.opt traffGen.py DDR4_2400_16x4 1GB
+#             32 random 100000000 2GB    1000 100
+
+args.add_argument(
+    "device",
+    type = str,
+    help = "Memory device to use as a dram cache"
+)
+
+args.add_argument(
+    "dram_cache_size",
+    type = str,
+    help = "Duration of simulation"
+)
+
+args.add_argument(
+    "max_orb",
+    type = int,
+    help = "Duration of simulation"
+)
+
+args.add_argument(
+    "traffic_mode",
+    type = str,
+    help = "Traffic type to use"
+)
+
+args.add_argument(
+    "duration",
+    type = int,
+    help = "Duration of simulation"
+)
+
+args.add_argument(
+    "max_address",
+    type=str,
+    help="End address of the range to be accessed",
+)
+
+args.add_argument(
+    "inj_period",
+    type = int,
+    help = "Period to inject reqs"
+)
+
+args.add_argument(
+    "rd_prct",
+    type=int,
+    help="Read Percentage",
+)
+
+options = args.parse_args()
+
+system = System()
+system.clk_domain = SrcClockDomain()
+system.clk_domain.clock = "4GHz"
+system.clk_domain.voltage_domain = VoltageDomain()
+system.mem_mode = 'timing'
+
+system.generator = PyTrafficGen()
+
+system.mem_ctrl = DcacheCtrl()
+system.mem_ctrl.dram = eval(options.device)(range=AddrRange('8GB'),
+                                                in_addr_map=False)
+system.mem_ctrl.nvm = NVM_2400_1x64(range=AddrRange('8GB'))
+
+#system.mem_ctrl.dram.tREFI = "200"
+system.mem_ctrl.dram_cache_size = options.dram_cache_size
+system.mem_ctrl.orb_max_size = options.max_orb
+system.mem_ctrl.crb_max_size = "32"
+
+system.mem_ranges = [AddrRange('8GB')]
+
+system.generator.port = system.mem_ctrl.port
+
+def createRandomTraffic(tgen):
+    yield tgen.createRandom(options.duration,   # duration
+                            0,              # min_addr
+                            AddrRange(options.max_address).end,  # max_adr
+                            64,             # block_size
+                            options.inj_period,       # min_period
+                            options.inj_period,       # max_period
+                            options.rd_prct,          # rd_perc
+                            0)              # data_limit
+    yield tgen.createExit(0)
+
+def createLinearTraffic(tgen):
+    yield tgen.createLinear(options.duration,   # duration
+                            0,              # min_addr
+                            AddrRange(options.max_address).end,  # max_adr
+                            64,             # block_size
+                            options.inj_period,   # min_period
+                            options.inj_period,   # max_period
+                            options.rd_prct,       # rd_perc
+                            0)              # data_limit
+    yield tgen.createExit(0)
+
+
+root = Root(full_system=False, system=system)
+
+m5.instantiate()
+
+if options.traffic_mode == 'linear':
+    system.generator.start(createLinearTraffic(system.generator))
+elif options.traffic_mode == 'random':
+    system.generator.start(createRandomTraffic(system.generator))
+else:
+    print('Wrong traffic type! Exiting!')
+    exit()
+
+exit_event = m5.simulate()
\ No newline at end of file