diff --git a/build_opts/RISCV_MESI_Three_Level b/build_opts/RISCV_MESI_Three_Level new file mode 100644 index 0000000000..348e8deec0 --- /dev/null +++ b/build_opts/RISCV_MESI_Three_Level @@ -0,0 +1,2 @@ +USE_RISCV_ISA = True +PROTOCOL = 'MESI_Three_Level' diff --git a/configs-drtrace/drtrace.py b/configs-drtrace/drtrace.py new file mode 100644 index 0000000000..aad1d2e6f5 --- /dev/null +++ b/configs-drtrace/drtrace.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2019 The Regents of the University of California. +# All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Jason Lowe-Power, Ayaz Akram + + +import argparse +import time +import m5 +import m5.ticks +from m5.objects import * + +from system import * + +supported_protocols = ["MESI_Two_Level"] + +def parse_options(): + + parser = argparse.ArgumentParser(description='For use with gem5.') + + parser.add_argument("num_cpus", type=int, help="Number of CPU cores") + parser.add_argument("workload", help="The google trace workload to run") + + return parser.parse_args() + +if __name__ == "__m5_main__": + args = parse_options() + + # create the system we are going to simulate + system = MyRubySystem("MESI_Two_Level", args.num_cpus, args) + + # set up the root SimObject and start the simulation + root = Root(full_system = True, system = system) + + # instantiate all of the objects we've created above + m5.instantiate() + + exit_event = m5.simulate(1000*1000000) diff --git a/configs-drtrace/drtrace_new.py b/configs-drtrace/drtrace_new.py new file mode 100644 index 0000000000..feed6b006f --- /dev/null +++ b/configs-drtrace/drtrace_new.py @@ -0,0 +1,88 @@ +import m5 +import argparse +from m5.objects import * +from system import * + +""" +Usage: +------ + +``` +./build/X86/gem5.opt \ + drtrace.py \ + --path \ + --workload \ + --players \ + --dram +``` +""" + +parser = argparse.ArgumentParser( + description="A script to run google traces." +) + +benchmark_choices = ["charlie", "delta", "merced", "whiskey"] + +parser.add_argument( + "path", + type=str, + help="Main directory containing the traces.", +) + +parser.add_argument( + "workload", + type=str, + help="Input the benchmark program to execute.", + choices=benchmark_choices, +) + +parser.add_argument( + "players", + type=int, + help="Input the number of players to use.", +) + +parser.add_argument( + "dcache_policy", + type=str, + help="The architecture of DRAM cache: " + "CascadeLakeNoPartWrs, Oracle, BearWriteOpt, Rambus", +) +parser.add_argument( + "assoc", + type=int, + help="THe associativity of the DRAM cache", +) +parser.add_argument( + "dcache_size", + type=str, + help="The size of DRAM cache", +) +parser.add_argument( + "main_mem_size", + type=str, + help="The size of main memory", +) +parser.add_argument( + "is_link", + type=int, + help="whether to use a link for backing store or not", +) +parser.add_argument( + "link_lat", + type=str, + help="latency of the link to backing store" +) + +args = parser.parse_args() + +#system = System() +system = MyRubySystem("MESI_Two_Level", args.players, args.assoc, args.dcache_size, args.main_mem_size, args.dcache_policy, args.is_link, args.link_lat, args) + +root = Root(full_system=True, system=system) + +m5.instantiate() + +print("Beginning simulation!") +exit_event = m5.simulate(100000000000) +print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}") diff --git a/configs-drtrace/system/MESI_Two_Level.py b/configs-drtrace/system/MESI_Two_Level.py new file mode 100755 index 0000000000..c8cd7d218f --- /dev/null +++ b/configs-drtrace/system/MESI_Two_Level.py @@ -0,0 +1,312 @@ +#Copyright (c) 2020 The Regents of the University of California. +#All Rights Reserved +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + + +""" This file creates a set of Ruby caches for the MESI TWO Level protocol +This protocol models two level cache hierarchy. The L1 cache is split into +instruction and data cache. + +This system support the memory size of up to 3GB. + +""" + +import math + +from m5.defines import buildEnv +from m5.util import fatal, panic + +from m5.objects import * + +class MESITwoLevelCache(RubySystem): + + def __init__(self): + if buildEnv['PROTOCOL'] != 'MESI_Two_Level': + fatal("This system assumes MESI_Two_Level!") + + super(MESITwoLevelCache, self).__init__() + + self._numL2Caches = 8 + + def setup(self, system, cpus, mem_ctrls): + """Set up the Ruby cache subsystem. Note: This can't be done in the + constructor because many of these items require a pointer to the + ruby system (self). This causes infinite recursion in initialize() + if we do this in the __init__. + """ + # Ruby's global network. + self.network = MyNetwork(self) + + # MESI_Two_Level example uses 5 virtual networks + self.number_of_virtual_networks = 5 + self.network.number_of_virtual_networks = 5 + + # There is a single global list of all of the controllers to make it + # easier to connect everything to the global network. This can be + # customized depending on the topology/network requirements. + # L1 caches are private to a core, hence there are one L1 cache per CPU + # core. The number of L2 caches are dependent to the architecture. + self.controllers = \ + [L1Cache(system, self, cpu, self._numL2Caches) for cpu in cpus] + \ + [L2Cache(system, self, self._numL2Caches) for num in \ + range(self._numL2Caches)] + \ + [DirController(self, system.mem_ranges, mem_ctrls)] + + # Create one sequencer per CPU and dma controller. + # Sequencers for other controllers can be here here. + self.sequencers = [RubySequencer(version = i, + # Grab dcache from ctrl + dcache = self.controllers[i].L1Dcache, + clk_domain = self.controllers[i].clk_domain, + ) for i in range(len(cpus))] + + for i,c in enumerate(self.controllers[:len(cpus)]): + c.sequencer = self.sequencers[i] + + self.num_of_sequencers = len(self.sequencers) + + # Create the network and connect the controllers. + # NOTE: This is quite different if using Garnet! + self.network.connectControllers(self.controllers) + self.network.setup_buffers() + + # Set up a proxy port for the system_port. Used for load binaries and + # other functional-only things. + self.sys_port_proxy = RubyPortProxy() + system.system_port = self.sys_port_proxy.in_ports + + # Connect the cpu's cache, interrupt, and TLB ports to Ruby + for i,cpu in enumerate(cpus): + cpu.port = self.sequencers[i].in_ports + +class L1Cache(L1Cache_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, system, ruby_system, cpu, num_l2Caches): + """Creating L1 cache controller. Consist of both instruction + and data cache. The size of data cache is 512KB and + 8-way set associative. The instruction cache is 32KB, + 2-way set associative. + """ + super(L1Cache, self).__init__() + + self.version = self.versionCount() + block_size_bits = int(math.log(system.cache_line_size, 2)) + l1i_size = '32kB' + l1i_assoc = '2' + l1d_size = '512kB' + l1d_assoc = '8' + # This is the cache memory object that stores the cache data and tags + self.L1Icache = RubyCache(size = l1i_size, + assoc = l1i_assoc, + start_index_bit = block_size_bits , + is_icache = True) + self.L1Dcache = RubyCache(size = l1d_size, + assoc = l1d_assoc, + start_index_bit = block_size_bits, + is_icache = False) + self.l2_select_num_bits = int(math.log(num_l2Caches , 2)) + self.clk_domain = cpu.clk_domain + self.prefetcher = RubyPrefetcher() + self.send_evictions = self.sendEvicts(cpu) + self.transitions_per_cycle = 4 + self.enable_prefetch = False + self.ruby_system = ruby_system + self.connectQueues(ruby_system) + + def getBlockSizeBits(self, system): + bits = int(math.log(system.cache_line_size, 2)) + if 2**bits != system.cache_line_size.value: + panic("Cache line size not a power of 2!") + return bits + + def sendEvicts(self, cpu): + """True if the CPU model or ISA requires sending evictions from caches + to the CPU. Two scenarios warrant forwarding evictions to the CPU: + 1. The O3 model must keep the LSQ coherent with the caches + 2. The x86 mwait instruction is built on top of coherence + 3. The local exclusive monitor in ARM systems + """ + if type(cpu) is DerivO3CPU: + return True + return False + + def connectQueues(self, ruby_system): + """Connect all of the queues for this controller. + """ + self.mandatoryQueue = MessageBuffer() + self.requestFromL1Cache = MessageBuffer() + self.requestFromL1Cache.out_port = ruby_system.network.in_port + self.responseFromL1Cache = MessageBuffer() + self.responseFromL1Cache.out_port = ruby_system.network.in_port + self.unblockFromL1Cache = MessageBuffer() + self.unblockFromL1Cache.out_port = ruby_system.network.in_port + + self.optionalQueue = MessageBuffer() + + self.requestToL1Cache = MessageBuffer() + self.requestToL1Cache.in_port = ruby_system.network.out_port + self.responseToL1Cache = MessageBuffer() + self.responseToL1Cache.in_port = ruby_system.network.out_port + +class L2Cache(L2Cache_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, system, ruby_system, num_l2Caches): + + super(L2Cache, self).__init__() + + self.version = self.versionCount() + # This is the cache memory object that stores the cache data and tags + self.L2cache = RubyCache(size = '1 MB', + assoc = 16, + start_index_bit = self.getBlockSizeBits(system, + num_l2Caches)) + + self.transitions_per_cycle = '4' + self.ruby_system = ruby_system + self.connectQueues(ruby_system) + + def getBlockSizeBits(self, system, num_l2caches): + l2_bits = int(math.log(num_l2caches, 2)) + bits = int(math.log(system.cache_line_size, 2)) + l2_bits + return bits + + + def connectQueues(self, ruby_system): + """Connect all of the queues for this controller. + """ + self.DirRequestFromL2Cache = MessageBuffer() + self.DirRequestFromL2Cache.out_port = ruby_system.network.in_port + self.L1RequestFromL2Cache = MessageBuffer() + self.L1RequestFromL2Cache.out_port = ruby_system.network.in_port + self.responseFromL2Cache = MessageBuffer() + self.responseFromL2Cache.out_port = ruby_system.network.in_port + self.unblockToL2Cache = MessageBuffer() + self.unblockToL2Cache.in_port = ruby_system.network.out_port + self.L1RequestToL2Cache = MessageBuffer() + self.L1RequestToL2Cache.in_port = ruby_system.network.out_port + self.responseToL2Cache = MessageBuffer() + self.responseToL2Cache.in_port = ruby_system.network.out_port + + +class DirController(Directory_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, ruby_system, ranges, mem_ctrls): + """ranges are the memory ranges assigned to this controller. + """ + if len(mem_ctrls) > 1: + panic("This cache system can only be connected to one mem ctrl") + super(DirController, self).__init__() + self.version = self.versionCount() + self.addr_ranges = ranges + self.ruby_system = ruby_system + self.directory = RubyDirectoryMemory() + # Connect this directory to the memory side. + self.memory_out_port = mem_ctrls[0].port + self.connectQueues(ruby_system) + + def connectQueues(self, ruby_system): + self.requestToDir = MessageBuffer() + self.requestToDir.in_port = ruby_system.network.out_port + self.responseToDir = MessageBuffer() + self.responseToDir.in_port = ruby_system.network.out_port + self.responseFromDir = MessageBuffer() + self.responseFromDir.out_port = ruby_system.network.in_port + self.requestToMemory = MessageBuffer() + self.responseFromMemory = MessageBuffer() + +class DMAController(DMA_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, ruby_system): + super(DMAController, self).__init__() + self.version = self.versionCount() + self.ruby_system = ruby_system + self.connectQueues(ruby_system) + + def connectQueues(self, ruby_system): + self.mandatoryQueue = MessageBuffer() + self.responseFromDir = MessageBuffer(ordered = True) + self.responseFromDir.in_port = ruby_system.network.out_port + self.requestToDir = MessageBuffer() + self.requestToDir.out_port = ruby_system.network.in_port + + +class MyNetwork(SimpleNetwork): + """A simple point-to-point network. This doesn't not use garnet. + """ + + def __init__(self, ruby_system): + super(MyNetwork, self).__init__() + self.netifs = [] + self.ruby_system = ruby_system + + def connectControllers(self, controllers): + """Connect all of the controllers to routers and connec the routers + together in a point-to-point network. + """ + # Create one router/switch per controller in the system + self.routers = [Switch(router_id = i) for i in range(len(controllers))] + + # Make a link from each controller to the router. The link goes + # externally to the network. + self.ext_links = [SimpleExtLink(link_id=i, ext_node=c, + int_node=self.routers[i]) + for i, c in enumerate(controllers)] + + # Make an "internal" link (internal to the network) between every pair + # of routers. + link_count = 0 + self.int_links = [] + for ri in self.routers: + for rj in self.routers: + if ri == rj: continue # Don't connect a router to itself! + link_count += 1 + self.int_links.append(SimpleIntLink(link_id = link_count, + src_node = ri, + dst_node = rj)) diff --git a/configs-drtrace/system/MI_example_caches.py b/configs-drtrace/system/MI_example_caches.py new file mode 100755 index 0000000000..3c7a71d7b1 --- /dev/null +++ b/configs-drtrace/system/MI_example_caches.py @@ -0,0 +1,275 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2015 Jason Power +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Jason Power + +""" This file creates a set of Ruby caches, the Ruby network, and a simple +point-to-point topology. +See Part 3 in the Learning gem5 book: learning.gem5.org/book/part3 +You can change simple_ruby to import from this file instead of from msi_caches +to use the MI_example protocol instead of MSI. + +IMPORTANT: If you modify this file, it's likely that the Learning gem5 book + also needs to be updated. For now, email Jason + +""" + +import math + +from m5.defines import buildEnv +from m5.util import fatal, panic + +from m5.objects import * + +class MIExampleSystem(RubySystem): + + def __init__(self): + if buildEnv['PROTOCOL'] != 'MI_example': + fatal("This system assumes MI_example!") + + super(MIExampleSystem, self).__init__() + + def setup(self, system, cpus, mem_ctrls, dma_ports, iobus): + """Set up the Ruby cache subsystem. Note: This can't be done in the + constructor because many of these items require a pointer to the + ruby system (self). This causes infinite recursion in initialize() + if we do this in the __init__. + """ + # Ruby's global network. + self.network = MyNetwork(self) + + # MI example uses 5 virtual networks + self.number_of_virtual_networks = 5 + self.network.number_of_virtual_networks = 5 + + # There is a single global list of all of the controllers to make it + # easier to connect everything to the global network. This can be + # customized depending on the topology/network requirements. + # Create one controller for each L1 cache (and the cache mem obj.) + # Create a single directory controller (Really the memory cntrl) + self.controllers = \ + [L1Cache(system, self, cpu) for cpu in cpus] + \ + [DirController(self, system.mem_ranges, mem_ctrls)] + \ + [DMAController(self) for i in range(len(dma_ports))] + + # Create one sequencer per CPU. In many systems this is more + # complicated since you have to create sequencers for DMA controllers + # and other controllers, too. + self.sequencers = [RubySequencer(version = i, + # Grab dcache from ctrl + dcache = self.controllers[i].cacheMemory, + clk_domain = self.controllers[i].clk_domain, + pio_request_port = iobus.cpu_side_ports, + mem_request_port = iobus.cpu_side_ports, + pio_response_port = iobus.mem_side_ports + ) for i in range(len(cpus))] + \ + [DMASequencer(version = i, + in_ports = port) + for i,port in enumerate(dma_ports) + ] + + for i,c in enumerate(self.controllers[0:len(cpus)]): + c.sequencer = self.sequencers[i] + + for i,d in enumerate(self.controllers[-len(dma_ports):]): + i += len(cpus) + d.dma_sequencer = self.sequencers[i] + + self.num_of_sequencers = len(self.sequencers) + + # Create the network and connect the controllers. + # NOTE: This is quite different if using Garnet! + self.network.connectControllers(self.controllers) + self.network.setup_buffers() + + # Set up a proxy port for the system_port. Used for load binaries and + # other functional-only things. + self.sys_port_proxy = RubyPortProxy() + system.system_port = self.sys_port_proxy.in_ports + self.sys_port_proxy.pio_request_port = iobus.cpu_side_ports + + # Connect the cpu's cache, interrupt, and TLB ports to Ruby + for i,cpu in enumerate(cpus): + cpu.icache_port = self.sequencers[i].in_ports + cpu.dcache_port = self.sequencers[i].in_ports + cpu.createInterruptController() + isa = buildEnv['TARGET_ISA'] + if isa == 'x86': + cpu.interrupts[0].pio = self.sequencers[i].interrupt_out_port + cpu.interrupts[0].int_requestor = self.sequencers[i].in_ports + cpu.interrupts[0].int_responder = self.sequencers[i].interrupt_out_port + if isa == 'x86' or isa == 'arm': + cpu.mmu.connectWalkerPorts( + self.sequencers[i].in_ports, self.sequencers[i].in_ports) + +class L1Cache(L1Cache_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, system, ruby_system, cpu): + """CPUs are needed to grab the clock domain and system is needed for + the cache block size. + """ + super(L1Cache, self).__init__() + + self.version = self.versionCount() + # This is the cache memory object that stores the cache data and tags + self.cacheMemory = RubyCache(size = '16kB', + assoc = 8, + start_index_bit = self.getBlockSizeBits(system)) + self.clk_domain = cpu.clk_domain + self.send_evictions = self.sendEvicts(cpu) + self.ruby_system = ruby_system + self.connectQueues(ruby_system) + + def getBlockSizeBits(self, system): + bits = int(math.log(system.cache_line_size, 2)) + if 2**bits != system.cache_line_size.value: + panic("Cache line size not a power of 2!") + return bits + + def sendEvicts(self, cpu): + """True if the CPU model or ISA requires sending evictions from caches + to the CPU. Two scenarios warrant forwarding evictions to the CPU: + 1. The O3 model must keep the LSQ coherent with the caches + 2. The x86 mwait instruction is built on top of coherence + 3. The local exclusive monitor in ARM systems + """ + if type(cpu) is DerivO3CPU or \ + buildEnv['TARGET_ISA'] in ('x86', 'arm'): + return True + return False + + def connectQueues(self, ruby_system): + """Connect all of the queues for this controller. + """ + self.mandatoryQueue = MessageBuffer() + self.requestFromCache = MessageBuffer(ordered = True) + self.requestFromCache.out_port = ruby_system.network.in_port + self.responseFromCache = MessageBuffer(ordered = True) + self.responseFromCache.out_port = ruby_system.network.in_port + self.forwardToCache = MessageBuffer(ordered = True) + self.forwardToCache.in_port = ruby_system.network.out_port + self.responseToCache = MessageBuffer(ordered = True) + self.responseToCache.in_port = ruby_system.network.out_port + +class DirController(Directory_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, ruby_system, ranges, mem_ctrls): + """ranges are the memory ranges assigned to this controller. + """ + if len(mem_ctrls) > 1: + panic("This cache system can only be connected to one mem ctrl") + super(DirController, self).__init__() + self.version = self.versionCount() + self.addr_ranges = ranges + self.ruby_system = ruby_system + self.directory = RubyDirectoryMemory() + # Connect this directory to the memory side. + self.memory_out_port = mem_ctrls[0].port + self.connectQueues(ruby_system) + + def connectQueues(self, ruby_system): + self.requestToDir = MessageBuffer(ordered = True) + self.requestToDir.in_port = ruby_system.network.out_port + self.dmaRequestToDir = MessageBuffer(ordered = True) + self.dmaRequestToDir.in_port = ruby_system.network.out_port + + self.responseFromDir = MessageBuffer() + self.responseFromDir.out_port = ruby_system.network.in_port + self.dmaResponseFromDir = MessageBuffer(ordered = True) + self.dmaResponseFromDir.out_port = ruby_system.network.in_port + self.forwardFromDir = MessageBuffer() + self.forwardFromDir.out_port = ruby_system.network.in_port + self.requestToMemory = MessageBuffer() + self.responseFromMemory = MessageBuffer() + +class DMAController(DMA_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, ruby_system): + super(DMAController, self).__init__() + self.version = self.versionCount() + self.ruby_system = ruby_system + self.connectQueues(ruby_system) + + def connectQueues(self, ruby_system): + self.mandatoryQueue = MessageBuffer() + self.requestToDir = MessageBuffer() + self.requestToDir.out_port = ruby_system.network.in_port + self.responseFromDir = MessageBuffer(ordered = True) + self.responseFromDir.in_port = ruby_system.network.out_port + + +class MyNetwork(SimpleNetwork): + """A simple point-to-point network. This doesn't not use garnet. + """ + + def __init__(self, ruby_system): + super(MyNetwork, self).__init__() + self.netifs = [] + self.ruby_system = ruby_system + + def connectControllers(self, controllers): + """Connect all of the controllers to routers and connec the routers + together in a point-to-point network. + """ + # Create one router/switch per controller in the system + self.routers = [Switch(router_id = i) for i in range(len(controllers))] + + # Make a link from each controller to the router. The link goes + # externally to the network. + self.ext_links = [SimpleExtLink(link_id=i, ext_node=c, + int_node=self.routers[i]) + for i, c in enumerate(controllers)] + + # Make an "internal" link (internal to the network) between every pair + # of routers. + link_count = 0 + self.int_links = [] + for ri in self.routers: + for rj in self.routers: + if ri == rj: continue # Don't connect a router to itself! + link_count += 1 + self.int_links.append(SimpleIntLink(link_id = link_count, + src_node = ri, + dst_node = rj)) diff --git a/configs-drtrace/system/MOESI_CMP_directory.py b/configs-drtrace/system/MOESI_CMP_directory.py new file mode 100755 index 0000000000..33f9f47e74 --- /dev/null +++ b/configs-drtrace/system/MOESI_CMP_directory.py @@ -0,0 +1,350 @@ +#Copyright (c) 2020 The Regents of the University of California. +#All Rights Reserved +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + + +""" This file creates a set of Ruby caches for the MOESI CMP directory +protocol. +This protocol models two level cache hierarchy. The L1 cache is split into +instruction and data cache. + +This system support the memory size of up to 3GB. + +""" + +from __future__ import print_function +from __future__ import absolute_import + +import math + +from m5.defines import buildEnv +from m5.util import fatal, panic + +from m5.objects import * + +class MOESICMPDirCache(RubySystem): + + def __init__(self): + if buildEnv['PROTOCOL'] != 'MOESI_CMP_directory': + fatal("This system assumes MOESI_CMP_directory!") + + super(MOESICMPDirCache, self).__init__() + + self._numL2Caches = 8 + + def setup(self, system, cpus, mem_ctrls, dma_ports, iobus): + """Set up the Ruby cache subsystem. Note: This can't be done in the + constructor because many of these items require a pointer to the + ruby system (self). This causes infinite recursion in initialize() + if we do this in the __init__. + """ + # Ruby's global network. + self.network = MyNetwork(self) + + # MOESI_CMP_directory example uses 3 virtual networks + self.number_of_virtual_networks = 3 + self.network.number_of_virtual_networks = 3 + + # There is a single global list of all of the controllers to make it + # easier to connect everything to the global network. This can be + # customized depending on the topology/network requirements. + # L1 caches are private to a core, hence there are one L1 cache per CPU + # core. The number of L2 caches are dependent to the architecture. + self.controllers = \ + [L1Cache(system, self, cpu, self._numL2Caches) for cpu in cpus] + \ + [L2Cache(system, self, self._numL2Caches) for num in \ + range(self._numL2Caches)] + [DirController(self, \ + system.mem_ranges, mem_ctrls)] + [DMAController(self) for i \ + in range(len(dma_ports))] + + # Create one sequencer per CPU and dma controller. + # Sequencers for other controllers can be here here. + self.sequencers = [RubySequencer(version = i, + # Grab dcache from ctrl + dcache = self.controllers[i].L1Dcache, + clk_domain = self.controllers[i].clk_domain, + pio_request_port = iobus.cpu_side_ports, + mem_request_port = iobus.cpu_side_ports, + pio_response_port = iobus.mem_side_ports + ) for i in range(len(cpus))] + \ + [DMASequencer(version = i, + in_ports = port) + for i,port in enumerate(dma_ports) + ] + + for i,c in enumerate(self.controllers[:len(cpus)]): + c.sequencer = self.sequencers[i] + + #Connecting the DMA sequencer to DMA controller + for i,d in enumerate(self.controllers[-len(dma_ports):]): + i += len(cpus) + d.dma_sequencer = self.sequencers[i] + + self.num_of_sequencers = len(self.sequencers) + + # Create the network and connect the controllers. + # NOTE: This is quite different if using Garnet! + self.network.connectControllers(self.controllers) + self.network.setup_buffers() + + # Set up a proxy port for the system_port. Used for load binaries and + # other functional-only things. + self.sys_port_proxy = RubyPortProxy() + system.system_port = self.sys_port_proxy.in_ports + self.sys_port_proxy.pio_request_port = iobus.cpu_side_ports + + # Connect the cpu's cache, interrupt, and TLB ports to Ruby + for i,cpu in enumerate(cpus): + cpu.icache_port = self.sequencers[i].in_ports + cpu.dcache_port = self.sequencers[i].in_ports + cpu.createInterruptController() + isa = buildEnv['TARGET_ISA'] + if isa == 'x86': + cpu.interrupts[0].pio = self.sequencers[i].interrupt_out_port + cpu.interrupts[0].int_requestor = self.sequencers[i].in_ports + cpu.interrupts[0].int_responder = self.sequencers[i].interrupt_out_port + if isa == 'x86' or isa == 'arm': + cpu.mmu.connectWalkerPorts( + self.sequencers[i].in_ports, self.sequencers[i].in_ports) + +class L1Cache(L1Cache_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, system, ruby_system, cpu, num_l2Caches): + """Creating L1 cache controller. Consist of both instruction + and data cache. The size of data cache is 512KB and + 8-way set associative. The instruction cache is 32KB, + 2-way set associative. + """ + super(L1Cache, self).__init__() + + self.version = self.versionCount() + block_size_bits = int(math.log(system.cache_line_size, 2)) + l1i_size = '32kB' + l1i_assoc = '2' + l1d_size = '512kB' + l1d_assoc = '8' + # This is the cache memory object that stores the cache data and tags + self.L1Icache = RubyCache(size = l1i_size, + assoc = l1i_assoc, + start_index_bit = block_size_bits , + is_icache = True, + dataAccessLatency = 1, + tagAccessLatency = 1) + self.L1Dcache = RubyCache(size = l1d_size, + assoc = l1d_assoc, + start_index_bit = block_size_bits, + is_icache = False, + dataAccessLatency = 1, + tagAccessLatency = 1) + self.clk_domain = cpu.clk_domain + self.prefetcher = RubyPrefetcher() + self.send_evictions = self.sendEvicts(cpu) + self.transitions_per_cycle = 4 + self.ruby_system = ruby_system + self.connectQueues(ruby_system) + + def getBlockSizeBits(self, system): + bits = int(math.log(system.cache_line_size, 2)) + if 2**bits != system.cache_line_size.value: + panic("Cache line size not a power of 2!") + return bits + + def sendEvicts(self, cpu): + """True if the CPU model or ISA requires sending evictions from caches + to the CPU. Two scenarios warrant forwarding evictions to the CPU: + 1. The O3 model must keep the LSQ coherent with the caches + 2. The x86 mwait instruction is built on top of coherence + 3. The local exclusive monitor in ARM systems + """ + if type(cpu) is DerivO3CPU or \ + buildEnv['TARGET_ISA'] in ('x86', 'arm'): + return True + return False + + def connectQueues(self, ruby_system): + """Connect all of the queues for this controller. + """ + self.mandatoryQueue = MessageBuffer() + self.requestFromL1Cache = MessageBuffer() + self.requestFromL1Cache.out_port = ruby_system.network.in_port + self.responseFromL1Cache = MessageBuffer() + self.responseFromL1Cache.out_port = ruby_system.network.in_port + self.requestToL1Cache = MessageBuffer() + self.requestToL1Cache.in_port = ruby_system.network.out_port + self.responseToL1Cache = MessageBuffer() + self.responseToL1Cache.in_port = ruby_system.network.out_port + self.triggerQueue = MessageBuffer(ordered = True) + +class L2Cache(L2Cache_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, system, ruby_system, num_l2Caches): + + super(L2Cache, self).__init__() + + self.version = self.versionCount() + # This is the cache memory object that stores the cache data and tags + self.L2cache = RubyCache(size = '1 MB', + assoc = 16, + start_index_bit = self.getL2StartIdx(system, + num_l2Caches), + dataAccessLatency = 20, + tagAccessLatency = 20) + + self.transitions_per_cycle = '4' + self.ruby_system = ruby_system + self.connectQueues(ruby_system) + + def getL2StartIdx(self, system, num_l2caches): + l2_bits = int(math.log(num_l2caches, 2)) + bits = int(math.log(system.cache_line_size, 2)) + l2_bits + return bits + + + def connectQueues(self, ruby_system): + """Connect all of the queues for this controller. + """ + self.GlobalRequestFromL2Cache = MessageBuffer() + self.GlobalRequestFromL2Cache.out_port = ruby_system.network.in_port + self.L1RequestFromL2Cache = MessageBuffer() + self.L1RequestFromL2Cache.out_port = ruby_system.network.in_port + self.responseFromL2Cache = MessageBuffer() + self.responseFromL2Cache.out_port = ruby_system.network.in_port + + self.GlobalRequestToL2Cache = MessageBuffer() + self.GlobalRequestToL2Cache.in_port = ruby_system.network.out_port + self.L1RequestToL2Cache = MessageBuffer() + self.L1RequestToL2Cache.in_port = ruby_system.network.out_port + self.responseToL2Cache = MessageBuffer() + self.responseToL2Cache.in_port = ruby_system.network.out_port + self.triggerQueue = MessageBuffer(ordered = True) + + + +class DirController(Directory_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, ruby_system, ranges, mem_ctrls): + """ranges are the memory ranges assigned to this controller. + """ + if len(mem_ctrls) > 1: + panic("This cache system can only be connected to one mem ctrl") + super(DirController, self).__init__() + self.version = self.versionCount() + self.addr_ranges = ranges + self.ruby_system = ruby_system + self.directory = RubyDirectoryMemory() + # Connect this directory to the memory side. + self.memory_out_port = mem_ctrls[0].port + self.connectQueues(ruby_system) + + def connectQueues(self, ruby_system): + self.requestToDir = MessageBuffer() + self.requestToDir.in_port = ruby_system.network.out_port + self.responseToDir = MessageBuffer() + self.responseToDir.in_port = ruby_system.network.out_port + self.responseFromDir = MessageBuffer() + self.responseFromDir.out_port = ruby_system.network.in_port + self.forwardFromDir = MessageBuffer() + self.forwardFromDir.out_port = ruby_system.network.in_port + self.requestToMemory = MessageBuffer() + self.responseFromMemory = MessageBuffer() + self.triggerQueue = MessageBuffer(ordered = True) + +class DMAController(DMA_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, ruby_system): + super(DMAController, self).__init__() + self.version = self.versionCount() + self.ruby_system = ruby_system + self.connectQueues(ruby_system) + + def connectQueues(self, ruby_system): + self.mandatoryQueue = MessageBuffer() + self.responseFromDir = MessageBuffer() + self.responseFromDir.in_port = ruby_system.network.out_port + self.reqToDir = MessageBuffer() + self.reqToDir.out_port = ruby_system.network.in_port + self.respToDir = MessageBuffer() + self.respToDir.out_port = ruby_system.network.in_port + self.triggerQueue = MessageBuffer(ordered = True) + + +class MyNetwork(SimpleNetwork): + """A simple point-to-point network. This doesn't not use garnet. + """ + + def __init__(self, ruby_system): + super(MyNetwork, self).__init__() + self.netifs = [] + self.ruby_system = ruby_system + + def connectControllers(self, controllers): + """Connect all of the controllers to routers and connec the routers + together in a point-to-point network. + """ + # Create one router/switch per controller in the system + self.routers = [Switch(router_id = i) for i in range(len(controllers))] + + # Make a link from each controller to the router. The link goes + # externally to the network. + self.ext_links = [SimpleExtLink(link_id=i, ext_node=c, + int_node=self.routers[i]) + for i, c in enumerate(controllers)] + + # Make an "internal" link (internal to the network) between every pair + # of routers. + link_count = 0 + self.int_links = [] + for ri in self.routers: + for rj in self.routers: + if ri == rj: continue # Don't connect a router to itself! + link_count += 1 + self.int_links.append(SimpleIntLink(link_id = link_count, + src_node = ri, + dst_node = rj)) diff --git a/configs-drtrace/system/__init__.py b/configs-drtrace/system/__init__.py new file mode 100755 index 0000000000..cd2696fb02 --- /dev/null +++ b/configs-drtrace/system/__init__.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2016 Jason Lowe-Power +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Jason Lowe-Power + +from .ruby_system import MyRubySystem diff --git a/configs-drtrace/system/ruby_system.py b/configs-drtrace/system/ruby_system.py new file mode 100755 index 0000000000..ea339b924e --- /dev/null +++ b/configs-drtrace/system/ruby_system.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2016 Jason Lowe-Power +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Jason Lowe-Power + +import m5 +from m5.objects import * + + +class MyRubySystem(System): + + def __init__(self, mem_sys, num_cpus, assoc, dcache_size, main_mem_size, policy, is_link, link_lat, opts, restore=False): + super(MyRubySystem, self).__init__() + self._opts = opts + + # Set up the clock domain and the voltage domain + self.clk_domain = SrcClockDomain() + self.clk_domain.clock = '5GHz' + self.clk_domain.voltage_domain = VoltageDomain() + + self.mem_ranges = [AddrRange(Addr(main_mem_size))] + + # self.intrctrl = IntrControl() + self._createMemoryControllers(assoc, dcache_size, policy, is_link, link_lat) + + # Create the cache hierarchy for the system. + if mem_sys == 'MI_example': + from .MI_example_caches import MIExampleSystem + self.caches = MIExampleSystem() + elif mem_sys == 'MESI_Two_Level': + from .MESI_Two_Level import MESITwoLevelCache + self.caches = MESITwoLevelCache() + elif mem_sys == 'MOESI_CMP_directory': + from .MOESI_CMP_directory import MOESICMPDirCache + self.caches = MOESICMPDirCache() + + self.reader = DRTraceReader( + directory=f"/{opts.path}/{opts.workload}/", num_players=num_cpus + ) + + self.players = [ + DRTracePlayer( + reader=self.reader, + send_data=True, + compress_address_range=self.mem_ranges[0], + ) + for _ in range(num_cpus) + ] + + self.caches.setup(self, self.players, self.mem_ctrl) + + self.caches.access_backing_store = True + self.caches.phys_mem = SimpleMemory(range=self.mem_ranges[0], + in_addr_map=False) + + + def _createMemoryControllers(self, assoc, dcache_size, policy, is_link, link_lat): + + self.mem_ctrl = PolicyManager(range=self.mem_ranges[0], kvm_map=False) + self.mem_ctrl.static_frontend_latency = "10ns" + self.mem_ctrl.static_backend_latency = "10ns" + + self.mem_ctrl.loc_mem_policy = policy + + self.mem_ctrl.assoc = assoc + + # self.mem_ctrl.bypass_dcache = True + + # TDRAM cache + self.loc_mem_ctrl = MemCtrl() + self.loc_mem_ctrl.consider_oldest_write = True + self.loc_mem_ctrl.oldest_write_age_threshold = 5000000 + self.loc_mem_ctrl.dram = TDRAM_32(range=self.mem_ranges[0], in_addr_map=False, kvm_map=False) + + + self.mem_ctrl.loc_mem = self.loc_mem_ctrl.dram + self.loc_mem_ctrl.static_frontend_latency = "1ns" + self.loc_mem_ctrl.static_backend_latency = "1ns" + self.loc_mem_ctrl.static_frontend_latency_tc = "0ns" + self.loc_mem_ctrl.static_backend_latency_tc = "0ns" + + # main memory + self.far_mem_ctrl = MemCtrl() + self.far_mem_ctrl.dram = DDR4_2400_16x4(range=self.mem_ranges[0], in_addr_map=False, kvm_map=False) + self.far_mem_ctrl.static_frontend_latency = "1ns" + self.far_mem_ctrl.static_backend_latency = "1ns" + + self.loc_mem_ctrl.port = self.mem_ctrl.loc_req_port + + self.far_mem_ctrl.port = self.mem_ctrl.far_req_port + + self.mem_ctrl.orb_max_size = 128 + self.mem_ctrl.dram_cache_size = dcache_size + + self.loc_mem_ctrl.dram.read_buffer_size = 64 + self.loc_mem_ctrl.dram.write_buffer_size = 64 + + self.far_mem_ctrl.dram.read_buffer_size = 64 + self.far_mem_ctrl.dram.write_buffer_size = 64 + diff --git a/configs-npb-gapbs-chkpt-restore/checkpoint_both.py b/configs-npb-gapbs-chkpt-restore/checkpoint_both.py new file mode 100644 index 0000000000..41a305609b --- /dev/null +++ b/configs-npb-gapbs-chkpt-restore/checkpoint_both.py @@ -0,0 +1,285 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2019 The Regents of the University of California. +# All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Jason Lowe-Power, Ayaz Akram + +""" Script to run and take checkpoints for both GAPBS and NPB +""" +import argparse +import time +import m5 +import m5.ticks +from m5.objects import * +from system import * +from m5.stats.gem5stats import get_simstat + +from info import ( + gapbs_benchmarks, + npb_benchmarks, + main_mem_size, +) + + +def writeBenchScript_GAPBS(dir, benchmark_name, size, synthetic): + """ + This method creates a script in dir which will be eventually + passed to the simulated system (to run a specific benchmark + at bootup). + """ + input_file_name = "{}/run_{}_{}".format(dir, benchmark_name, size) + if synthetic: + with open(input_file_name, "w") as f: + f.write("./{} -g {}\n".format(benchmark_name, size)) + elif synthetic == 0: + with open(input_file_name, "w") as f: + # The workloads that are copied to the disk image using Packer + # should be located in /home/gem5/. + # Since the command running the workload will be executed with + # pwd = /home/gem5/gapbs, the path to the copied workload is + # ../{workload-name} + f.write("./{} -sf ../{}".format(benchmark_name, size)) + + return input_file_name + + +def writeBenchScript_NPB(dir, bench): + """ + This method creates a script in dir which will be eventually + passed to the simulated system (to run a specific benchmark + at bootup). + """ + file_name = "{}/run_{}".format(dir, bench) + bench_file = open(file_name, "w+") + bench_file.write("/home/gem5/NPB3.3-OMP/bin/{} \n".format(bench)) + + # sleeping for sometime (5 seconds here) makes sure + # that the benchmark's output has been + # printed to the console + bench_file.write("sleep 5 \n") + bench_file.write("m5 exit \n") + bench_file.close() + return file_name + + +def do_warmup(system, single_channel, benchmark, size): + prevTotalColdMisses = 0 + prevTotalMemReqs = 0 + numOfCacheBlks = 0 + if single_channel: + numOfCacheBlks = float(system.mem_ctrl.dram_cache_size / system.mem_ctrl.block_size) + else: + numOfCacheBlks = float(num_cpus * system.mem_ctrl[0].dram_cache_size / system.mem_ctrl[0].block_size) + print("Number of total cache blocks: {}".format(numOfCacheBlks)) + + iteration_duration = 100_000_000_000 # 100 ms + num_of_iterations = 10 + if benchmark == "is" and size == "C": + num_of_iterations = 4 + + print("Doing {} iterations of {} ps for DRAM $ warmup".format(num_of_iterations, iteration_duration)) + + for interval_number in range(num_of_iterations): + print("Interval number: {}".format(interval_number)) + intervalColdMisses = 0 + intervalMemReqs = 0 + currentColdMisses = 0 + currentMemReqs = 0 + start_tick = m5.curTick() + exit_event = m5.simulate(iteration_duration) + end_tick = m5.curTick() + + if exit_event.getCause() != "simulate() limit reached": + if ( + exit_event.getCause() == "workend" + or exit_event.getCause() == "workbegin" + ): + print("ROI bounds, continuing to stats ...") + else: + print(f"Exiting because {exit_event.getCause()}") + exit(1) + + simstats = get_simstat([polMan for polMan in system.mem_ctrl], prepare_stats=True) + for i in range(num_cpus): + ctrl = simstats.__dict__[f"mem_ctrl{i}"] + currentColdMisses += ctrl.numColdMisses.value + currentMemReqs += (ctrl.readReqs.value + ctrl.writeReqs.value) + + print(exit_event.getCause()+" /// new iter nums: {}, {}, {}, {}".format(currentColdMisses, + prevTotalColdMisses, currentMemReqs, prevTotalMemReqs)) + + assert currentColdMisses >= prevTotalColdMisses, "Number of total cold misses is wrong!" + assert currentMemReqs >= prevTotalMemReqs, "Number of total memory requests is wrong!" + intervalColdMisses = currentColdMisses - prevTotalColdMisses + intervalMemReqs = currentMemReqs - prevTotalMemReqs + + if currentColdMisses >= (numOfCacheBlks*0.95): + print("95% of system's total DRAM cache is warmed up") + break + elif (float(currentColdMisses/currentMemReqs) <= 0.01): + print("Total cold misses is less than 1% of the total mem requests") + break + m5.stats.dump() + # m5.stats.reset() + prevTotalColdMisses = currentColdMisses + prevTotalMemReqs = currentMemReqs + print("tot warmup: {}, iter warmup: {}, iter len: {}".format(float(currentColdMisses/numOfCacheBlks), + float(intervalColdMisses/intervalMemReqs), end_tick - start_tick)) + print("----------------------------------------------------------------------------------\n") + + print("\n") + if interval_number == (num_of_iterations-1): + print("TIMEOUT!\n") + m5.stats.dump() + + +def parse_options(): + parser = argparse.ArgumentParser( + description="For use with gem5. This script " + "runs a GAPBS/NPB application and only works " + "with x86 ISA." + ) + parser.add_argument( + "benchmark", type=str, help="The application to run" + ) + parser.add_argument( + "size", type=str, help="The problem size to run" + ) + return parser.parse_args() + + +if __name__ == "__m5_main__": + args = parse_options() + + kernel = "/home/babaie/projects/TDRAM-resubmission/fsTools/x86-linux-kernel-4.19.83" + disk = "" + if args.benchmark in gapbs_benchmarks: + disk = "/home/babaie/projects/TDRAM-resubmission/fsTools/x86-gapbs" + elif args.benchmark in npb_benchmarks: + disk = "/home/babaie/projects/TDRAM-resubmission/fsTools/x86-npb" + else: + print("Wrong benchmark choice!") + exit(1) + + # These are constant across tests, no need to put them in the args + synthetic = 1 + num_cpus = 8 + mem_sys = "MESI_Two_Level" + dcache_policy = "CascadeLakeNoPartWrs" + dcache_size = "1GiB" # size of each channel + assoc = 1 + single_channel = False + + mem_size = main_mem_size[args.benchmark + "-" + args.size] # size of total main memory + print("main memory size: ", mem_size) + + # create the system we are going to simulate + if single_channel: + system = RubySystem1Channel( + kernel, + disk, + mem_sys, + num_cpus, + assoc, + dcache_size, + mem_size, + dcache_policy, + 0, + 0, + 0, + args, + ) + else: + system = RubySystem8Channel( + kernel, + disk, + mem_sys, + num_cpus, + assoc, + dcache_size, + mem_size, + dcache_policy, + 0, + 0, + 0, + args, + ) + + system.m5ops_base = 0xFFFF0000 + + # Exit from guest on workbegin/workend + system.exit_on_work_items = True + + # Create and pass a script to the simulated system to run the reuired + # benchmark + if args.benchmark in gapbs_benchmarks: + system.readfile = writeBenchScript_GAPBS( + m5.options.outdir, + args.benchmark, + args.size, + synthetic + ) + elif args.benchmark in npb_benchmarks: + system.readfile = writeBenchScript_NPB( + m5.options.outdir, + args.benchmark+"."+args.size+".x" + ) + + # set up the root SimObject and start the simulation + root = Root(full_system=True, system=system) + + if system.getHostParallel(): + # Required for running kvm on multiple host cores. + # Uses gem5's parallel event queue feature + # Note: The simulator is quite picky about this number! + root.sim_quantum = int(1e9) # 1 ms + + # instantiate all of the objects we've created above + m5.instantiate() + + print("Running the simulation") + exit_event = m5.simulate() + + if exit_event.getCause() == "workbegin": + # Reached the start of ROI + # start of ROI is marked by an + # m5_work_begin() call + print("Done booting Linux and reached to ROI") + m5.stats.reset() + print("Reset stats at the start of ROI") + # switching CPU to timing + system.switchCpus(system.cpu, system.atomicNoncachingCpu) + print("Switched CPU from KVM to atomicNoncachingCpu!") + else: + print(exit_event.getCause()) + print("Unexpected termination of simulation !") + exit(1) + + print("Start to run intervals!") + do_warmup(system,single_channel, args.benchmark, args.size) + print("Finished warmup iterations") + system.switchCpus(system.atomicNoncachingCpu, system.o3Cpu) + print("switched from atomicNoncachingCpu to O3") + m5.checkpoint(m5.options.outdir + "/cpt") \ No newline at end of file diff --git a/configs-npb-gapbs-chkpt-restore/info.py b/configs-npb-gapbs-chkpt-restore/info.py new file mode 100644 index 0000000000..8104951103 --- /dev/null +++ b/configs-npb-gapbs-chkpt-restore/info.py @@ -0,0 +1,405 @@ +main_mem_size = { + "bfs-22": ("2GiB"), + "bc-22": ("2GiB"), + "cc-22": ("2GiB"), + "pr-22": ("2GiB"), + "sssp-22": ("5GiB"), + "tc-22": ("2GiB"), + "bfs-25": ("10GiB"), + "bc-25": ("10GiB"), + "cc-25": ("10GiB"), + "pr-25": ("10GiB"), + "sssp-25": ("18GiB"), + "tc-25": ("10GiB"), + "bt-C": ("1GiB"), + "cg-C": ("1GiB"), + "ft-C": ("5GiB"), + "is-C": ("1GiB"), + "lu-C": ("1GiB"), + "mg-C": ("4GiB"), + "sp-C": ("1GiB"), + "ua-C": ("1GiB"), + "bt-D": ("11GiB"), + "cg-D": ("17GiB"), + "ft-D": ("85GiB"), + "is-D": ("34GiB"), + "lu-D": ("9GiB"), + "mg-D": ("27GiB"), + "sp-D": ("12GiB"), + "ua-D": ("8GiB"), +} + +text_info = { + # exe size start + "bt.A.x": (0x00018402, 0x0000000000400E50), + "bt.B.x": (0x000183E2, 0x0000000000400E50), + "bt.C.x": (0x00018342, 0x0000000000400E50), + "bt.D.x": (0x00018562, 0x0000000000400E50), + "cg.A.x": (0x00005D22, 0x0000000000400F60), + "cg.B.x": (0x00005DC2, 0x0000000000400F60), + "cg.C.x": (0x00005E32, 0x0000000000400F60), + "cg.D.x": (0x00005AC2, 0x0000000000400F60), + "ep.A.x": (0x00001E52, 0x0000000000400DB0), + "ep.B.x": (0x00001E52, 0x0000000000400DB0), + "ep.C.x": (0x00001E52, 0x0000000000400DB0), + "ep.D.x": (0x00001E52, 0x0000000000400DB0), + "ft.A.x": (0x00005202, 0x0000000000400F60), + "ft.B.x": (0x00005752, 0x0000000000400F60), + "ft.C.x": (0x00005762, 0x0000000000400F60), + "ft.D.x": (0x00005772, 0x0000000000400F60), + "is.A.x": (0x000020B2, 0x0000000000400BE0), + "is.B.x": (0x000020C2, 0x0000000000400BE0), + "is.C.x": (0x000020B2, 0x0000000000400BE0), + "is.D.x": (0x00001EB2, 0x0000000000400BE0), + "lu.A.x": (0x00016A82, 0x0000000000400F50), + "lu.B.x": (0x00016A52, 0x0000000000400F50), + "lu.C.x": (0x000169C2, 0x0000000000400F50), + "lu.D.x": (0x00016DD2, 0x0000000000400F50), + "mg.A.x": (0x0000B4A2, 0x00000000004010F0), + "mg.B.x": (0x0000B4A2, 0x00000000004010F0), + "mg.C.x": (0x0000B5E2, 0x00000000004010F0), + "mg.D.x": (0x0000B772, 0x00000000004010F0), + "sp.A.x": (0x00014162, 0x0000000000400EB0), + "sp.B.x": (0x00014162, 0x0000000000400EB0), + "sp.C.x": (0x00014052, 0x0000000000400EB0), + "sp.D.x": (0x000141B2, 0x0000000000400EB0), + "ua.A.x": (0x000274E2, 0x00000000004010C0), + "ua.B.x": (0x00027612, 0x00000000004010C0), + "ua.C.x": (0x00027552, 0x00000000004010C0), + "ua.D.x": (0x000274C2, 0x00000000004010C0), + "bc-22": (0x0000EFD2, 0x00000000004029F0), + "bfs-22": (0x0000DCF2, 0x00000000004028A0), + "cc-22": (0x0000E4C2, 0x0000000000402BE0), + "cc_sv-22": (0x0000DF12, 0x0000000000402970), + "pr-22": (0x0000E022, 0x0000000000402A10), + "sssp-22": (0x0000E692, 0x00000000004029C0), + "tc-22": (0x0000DEE2, 0x0000000000402890), + "bc-25": (0x0000EFD2, 0x00000000004029F0), + "bfs-25": (0x0000DCF2, 0x00000000004028A0), + "cc-25": (0x0000E4C2, 0x0000000000402BE0), + "cc_sv-25": (0x0000DF12, 0x0000000000402970), + "pr-25": (0x0000E022, 0x0000000000402A10), + "sssp-25": (0x0000E692, 0x00000000004029C0), + "tc-25": (0x0000DEE2, 0x0000000000402890), +} +interval_info_3hr_8ch = { + "bt.C.x": (0x40b1d8,2957134), + "cg.C.x": (0x4019d8,60153824), + "ft.C.x": (0x400d70,11454498), + "is.C.x": (0x4017c9,52612048), + "lu.C.x": (0x402980,8470596), + "mg.C.x": (0x401b08,9190341), + "sp.C.x": (0x40c210,2945981), + "ua.C.x": (0x400f30,4947911), + "bc-22": (0x404e08,4540370), + "bfs-22": (0x4045a0,120090), + "cc-22": (0x404688,7166714), + "pr-22": (0x4036c0,39736389), + "sssp-22": (0x405390,2180804), + "tc-22": (0x40ca60,21336889), + "bc-25": (0x404e1a,4400725), + "bfs-25": (0x4038e0,13721118), + "cc-25": (0x4037b0,32296930), + "pr-25": (0x4036a0,1313177), + "sssp-25": (0x405441,17780676), + "tc-25": (0x40ca88,11859294), + "bt.D.x": (0x408148,2990727), + "cg.D.x": (0x4019d8,38877453), + "ft.D.x": (0x400d70,5825365), + "is.D.x": (0x401661,49311978), + "lu.D.x": (0x402da0,1468532), + "mg.D.x": (0x401920,4260635), + "sp.D.x": (0x409000,8073528), + "ua.D.x": (0x4039c4,1197504), +} + +interval_info_1hr = { + # exe pc count + "bc-22": (0x404E08, 5409997), + "bfs-22": (0x403790, 3930710), + "bt.C.x": (0x4080E0, 1270955), + "cc-22": (0x4037B0, 8388093), + "cg.C.x": (0x4019D8, 29870850), + "pr-22": (0x4036C0, 25174574), + "ft.C.x": (0x400D70, 6760163), + "ua.C.x": (0x406B00, 344413), + "mg.C.x": (0x401B08, 4467087), + "sp.C.x": (0x409170, 1569121), + "lu.C.x": (0x402980, 5146555), + "is.C.x": (0x4017C9, 48480186), + "tc-22": (0x4052E0, 240202), + "sssp-22": (0x405441, 12651169), + "bc-25": (0x404E1A, 2192896), + "bfs-25": (0x4038E0, 11170933), + "bt.D.x": (0x407FD0, 3729824), + "cc-25": (0x404688, 6506055), + "cg.D.x": (0x4019D8, 17675668), + "pr-25": (0x4036C0, 19663604), + "ft.D.x": (0x400D70, 6498319), + "ua.D.x": (0x400F30, 2709903), + "mg.D.x": (0x401920, 3670463), + "sp.D.x": (0x409000, 3786010), + "lu.D.x": (0x402600, 116), + "is.D.x": (0x401661, 42645519), + "tc-25": (0x4030A0, 5800667), + "sssp-25": (0x405418, 979358), +} + +interval_info_3hr = { + # exe pc count + "bc-22": (0x404E08, 14968517), + "bfs-22": (0x403790, 12277309), + "bt.C.x": (0x408600, 1906919), + "cc-22": (0x404238, 5701575), + "cg.C.x": (0x4019D8, 73121983), + "pr-22": (0x4036C0, 69152771), + "ft.C.x": (0x400D70, 16530458), + "ua.C.x": (0x41D080, 4205282), + "mg.C.x": (0x401920, 12053283), + "sp.C.x": (0x409668, 2192349), + "lu.C.x": (0x402980, 9952905), + "is.C.x": (0x401955, 12922496), + "tc-22": (0x4034E0, 1507255), + "sssp-22": (0x405441, 33740179), + "bc-25": (0x404E08, 6310746), + "bfs-25": (0x4045D0, 2021755), + "bt.D.x": (0x407FD0, 10661006), + "cc-25": (0x4037B0, 31963857), + "cg.D.x": (0x4019D8, 45636549), + "pr-25": (0x4036C0, 51691344), + "ft.D.x": (0x400D70, 13065409), + "ua.D.x": (0x400F30, 8415248), + "mg.D.x": (0x401920, 11871798), + "sp.D.x": (0x409000, 9962530), + "lu.D.x": (0x4027F8, 32448), + "is.D.x": (0x401661, 119913839), + "tc-25": (0x4030A0, 30335985), + "sssp-25": (0x405441, 19973164), +} + +interval_info_6hr = { + # exe pc count + "bc-22": (0x404E08, 29440776), + "bfs-22": (0x4045D0, 3029875), + "bt.C.x": (0x409A20, 1173559), + "cc-22": (0x4037B0, 33552375), + "cg.C.x": (0x4019D8, 148363776), + "pr-22": (0x4036C0, 138691628), + "ft.C.x": (0x400D70, 30067439), + "ua.C.x": (0x405757, 134017), + "mg.C.x": (0x401920, 23222866), + "sp.C.x": (0x40AA60, 1691001), + "lu.C.x": (0x402980, 9952905), + "is.C.x": (0x401955, 79966814), + "tc-22": (0x405800, 516587), + "sssp-22": (0x405441, 67113550), + "bc-25": (0x404E08, 12151937), + "bfs-25": (0x403790, 8317180), + "bt.D.x": (0x407FD0, 21901834), + "cc-25": (0x404238, 32589977), + "cg.D.x": (0x4019D8, 91326969), + "pr-25": (0x4036C0, 99790518), + "ft.D.x": (0x400D70, 26209008), + "ua.D.x": (0x400F30, 13977417), + "mg.D.x": (0x401B08, 24048507), + "sp.D.x": (0x409000, 19860707), + "lu.D.x": (0x4027F8, 100054), + "is.D.x": (0x401661, 241880887), + "tc-25": (0x40CA70, 19083641), + "sssp-25": (0x405441, 42573632), +} + +interval_info_12hr = { + # exe pc count + "bc-22": (0x4036F0, 48778942), + "bfs-22": (0x403790, 54892278), + "bt.C.x": (0x40BF58, 611768), + "cc-22": (0x404688, 39592660), + "cg.C.x": (0x4019D8, 294906202), + "pr-22": (0x4036C0, 271266245), + "ft.C.x": (0x400D70, 56313323), + "ua.C.x": (0x41DCA0, 21222925), + "mg.C.x": (0x401B08, 45327484), + "sp.C.x": (0x40CFA0, 1219582), + "lu.C.x": (0x405C00, 72382), + "is.C.x": (0x401AF0, 129738785), + "tc-22": (0x4054A0, 87026806), + "sssp-22": (0x405441, 89183250), + "bc-25": (0x404E08, 25995768), + "bfs-25": (0x4038E0, 36114591), + "bt.D.x": (0x407FD0, 44658580), + "cc-25": (0x404688, 31320744), + "cg.D.x": (0x4019D8, 19366202), + "pr-25": (0x4036C0, 204816690), + "ft.D.x": (0x401C10, 56461566), + "ua.D.x": (0x4044C4, 6852508), + "mg.D.x": (0x401B08, 47676346), + "sp.D.x": (0x409000, 39454655), + "lu.D.x": (0x4029A0, 10268832), + "is.D.x": (0x401661, 481770516), + "tc-25": (0x40CA70, 19083641), + "sssp-25": (0x405441, 89681018), +} + +interval_info_24hr = { + # exe pc count + "bt.C.x": (0x40D230, 2377023), + "cg.C.x": (0x4019D8, 578428198), + "ft.C.x": (0x405830, 58382196), + "is.C.x": (0x401AF0, 184476965), + "lu.C.x": (0x40D4C0, 1146276), + "mg.C.x": (0x4012F8, 121010179), + "sp.C.x": (0x40EEE8, 3428040), + "ua.C.x": (0x41DCA0, 39733523), + "bc-22": (0x404E1A, 11556233), + "bfs-22": (0x401028, 65), + "cc-22": (0x404238, 39015034), + "pr-22": (0x4036C0, 530256860), + "tc-22": (0x405390, 7008077), + "sssp-22": (0x4054A0, 212570793), + "bc-25": (0x404E08, 44535390), + "bfs-25": (0x403988, 87740083), + "bt.D.x": (0x407FD0, 53208177), + "cc-25": (0x4037B0, 133906775), + "cg.D.x": (0x4019D8, 351587199), + "pr-25": (0x4036C0, 401728224), + "ft.D.x": (0x400D70, 110793818), + "ua.D.x": (0x4039C4, 12695182), ### + "mg.D.x": (0x401B08, 75633571), + "sp.D.x": (0x409000, 47034804), + "lu.D.x": (0x4029A0, 53146691), + "is.D.x": (0x401661, 858226422), + "tc-25": (0x40CA70, 19083641), + "sssp-25": (0x405441, 169473207), +} + +benchmark_choices_npb = [ + "bt.A.x", + "cg.A.x", + "ep.A.x", + "ft.A.x", + "is.A.x", + "lu.A.x", + "mg.A.x", + "sp.A.x", + "ua.A.x", + "bt.B.x", + "cg.B.x", + "ep.B.x", + "ft.B.x", + "is.B.x", + "lu.B.x", + "mg.B.x", + "sp.B.x", + "ua.B.x", + "bt.C.x", + "cg.C.x", + "ep.C.x", + "ft.C.x", + "is.C.x", + "lu.C.x", + "mg.C.x", + "sp.C.x", + "ua.C.x", + "bt.D.x", + "cg.D.x", + "ep.D.x", + "ft.D.x", + "is.D.x", + "lu.D.x", + "mg.D.x", + "sp.D.x", + "ua.D.x", + "bt.F.x", + "cg.F.x", + "ep.F.x", + "ft.F.x", + "is.F.x", + "lu.F.x", + "mg.F.x", + "sp.F.x", + "ua.F.x", +] +benchmark_choices_gapbs = [ + "bfs-22", + "bc-22", + "cc-22", + "pr-22", + "sssp-22", + "tc-22", + "bfs-25", + "bc-25", + "cc-25", + "pr-25", + "sssp-25", + "tc-25", +] + +npb_benchmarks = [ + "bt", + "cg", + "ep", + "ft", + "is", + "lu", + "mg", + "sp", + "ua", +] +gapbs_benchmarks = [ + "bfs", + "bc", + "cc", + "pr", + "sssp", + "tc", +] +interval_info_1hr_512MiB = { + # exe pc count + "bc-22": (0x404E08, 4355635), + "bfs-22": (0x403790, 3210973), + "bt.C.x": (0x408600, 623227), + "cc-22": (0x404688, 2218838), + "cg.C.x": (0x4019D8, 8334402), + "pr-22": (0x4036C0, 6426778), + "ft.C.x": (0x405830, 11202023), + "ua.C.x": (0x421ff6, 182749), + "mg.C.x": (0x401920, 1886013), + "sp.C.x": (0x409668, 445619), + "lu.C.x": (0x404160, 457680), + "is.C.x": (0x401955, 12277189), + "tc-22": (0x4052E0, 1059969), + "sssp-22": (0x405441, 4457679), +} + +interval_info_1GBdramCache_3hr = { + # exe pc count + "bt.C.x": (0x40f3d8,244911), + "cg.C.x": (0x4019d8,42463422), + "ft.C.x": (0x401c00,7146042), + "is.C.x": (0x401af0,46965216), + "lu.C.x": (0x40abf8,764707), + "mg.C.x": (0x401b08,6680641), + "sp.C.x": (0x40e2e0,441148), + "ua.C.x": (0x41dca0,1351162), + "bc-22": (0x4036f0,1315303), + "bfs-22": (0x403790,6915678), + "cc-22": (0x4037b0,8303408), + "pr-22": (0x4036c0,35167103), + "tc-22": (0x405640,760), + "sssp-22": (0x405390,2908597), + "bc-25": (0x404e1a,1578848), + "bfs-25": (0x403790,5365971), + "bt.D.x": (0x407fd0,4048773), + "cc-25": (0x404688,5396243), + "cg.D.x": (0x4019d8,13523512), + "pr-25": (0x4036c0,15770394), + "ft.D.x": (0x401c10,4648334), + "ua.D.x": (0x403f30,31180), + "mg.D.x": (0x401920,4263169), + "sp.D.x": (0x409000,3544598), + "lu.D.x": (0x4027f8,27621), + "is.D.x": (0x401661,31545953), + "tc-25": (0x4030a0,15958999), + "sssp-25": (0x405441,7679886), +} \ No newline at end of file diff --git a/configs-npb-gapbs-chkpt-restore/restore_both.py b/configs-npb-gapbs-chkpt-restore/restore_both.py new file mode 100755 index 0000000000..e614871b75 --- /dev/null +++ b/configs-npb-gapbs-chkpt-restore/restore_both.py @@ -0,0 +1,305 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2019 The Regents of the University of California. +# All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Jason Lowe-Power, Ayaz Akram + +""" Script to restore both GAPBS and NPB with a checkpoint +""" +import argparse +import time +import m5 +import m5.ticks +from m5.objects import * + +from system import * + +from info import ( + text_info, + interval_info_3hr_8ch, + gapbs_benchmarks, + npb_benchmarks, + main_mem_size, +) + +def writeBenchScript_GAPBS(dir, benchmark_name, size, synthetic): + """ + This method creates a script in dir which will be eventually + passed to the simulated system (to run a specific benchmark + at bootup). + """ + input_file_name = "{}/run_{}_{}".format(dir, benchmark_name, size) + if synthetic: + with open(input_file_name, "w") as f: + f.write("./{} -g {}\n".format(benchmark_name, size)) + elif synthetic == 0: + with open(input_file_name, "w") as f: + # The workloads that are copied to the disk image using Packer + # should be located in /home/gem5/. + # Since the command running the workload will be executed with + # pwd = /home/gem5/gapbs, the path to the copied workload is + # ../{workload-name} + f.write("./{} -sf ../{}".format(benchmark_name, size)) + + return input_file_name + +def writeBenchScript_NPB(dir, bench): + """ + This method creates a script in dir which will be eventually + passed to the simulated system (to run a specific benchmark + at bootup). + """ + file_name = "{}/run_{}".format(dir, bench) + bench_file = open(file_name, "w+") + bench_file.write("/home/gem5/NPB3.3-OMP/bin/{} \n".format(bench)) + + # sleeping for sometime (5 seconds here) makes sure + # that the benchmark's output has been + # printed to the console + bench_file.write("sleep 5 \n") + bench_file.write("m5 exit \n") + bench_file.close() + return file_name + + +def parse_options(): + parser = argparse.ArgumentParser( + description="Restores a checkpoint for NPB and GAPBS" + ) + parser.add_argument( + "benchmark", type=str, help="The application to run" + ) + parser.add_argument( + "size", type=str, help="The problem size to run" + ) + parser.add_argument( + "dcache_policy", + type=str, + help="The architecture of DRAM cache: " + "CascadeLakeNoPartWrs, Oracle, BearWriteOpt, Rambus", + ) + parser.add_argument( + "assoc", + type=int, + help="THe associativity of the DRAM cache", + ) + parser.add_argument( + "is_link", + type=int, + help="whether to use a link for backing store or not", + ) + parser.add_argument( + "link_lat", type=str, help="latency of the link to backing store" + ) + parser.add_argument( + "bypass", + type=int, + help="bypass DRAM cache", + ) + parser.add_argument("--do_analysis", action="store_true", default=False) + return parser.parse_args() + + +def do_analysis(): + print( + "**************** Doing analysis! Simulating " + "100 intervals of 10ms each! ********************\n" + ) + start = time.time() + + for interval_number in range(100): + print(f"Working on interval number: {interval_number}") + exit_event = m5.simulate(10_000_000_000) # 10 ms + m5.stats.dump() + + print( + f"Done with interval {interval_number} at {(time.time() - start)/60:0.2f}" + ) + mostRecentPc = lpmanager.getMostRecentPc() + print(f"Exit because {exit_event.getCause()}, before for") + for pc, tick in mostRecentPc: + count = lpmanager.getPcCount(pc) + print("in for loop") + print(f"{hex(pc)},{count[0]},{count[1]}") + if exit_event.getCause() != "simulate() limit reached": + if ( + exit_event.getCause() == "workend" + or exit_event.getCause() == "workbegin" + ): + print(f"Exit because {exit_event.getCause()}, continuing...") + else: + print(f"Exiting because {exit_event.getCause()}") + break + + +def run(): + print("Simulating 100 intervals of 10ms each! \n") + + for interval_number in range(100): + print("Interval number: {}".format(interval_number)) + exit_event = m5.simulate(10_000_000_000) # 10 ms + # m5.stats.dump() + + if exit_event.getCause() != "simulate() limit reached": + if ( + exit_event.getCause() == "workend" + or exit_event.getCause() == "workbegin" + ): + print("Workload finished, continuing...") + else: + print(f"Exiting because {exit_event.getCause()}") + break + + +if __name__ == "__m5_main__": + args = parse_options() + + kernel = "/home/babaie/projects/TDRAM-resubmission/fsTools/x86-linux-kernel-4.19.83" + disk = "" + suite = "" + if args.benchmark in gapbs_benchmarks: + disk = "/home/babaie/projects/TDRAM-resubmission/fsTools/x86-gapbs" + suite = "gapbs" + elif args.benchmark in npb_benchmarks: + disk = "/home/babaie/projects/TDRAM-resubmission/fsTools/x86-npb" + suite = "npb" + else: + print("wrong benchmark choice!") + exit(1) + + synthetic = 1 + num_cpus = 8 + mem_sys = "MESI_Two_Level" + dcache_size = "1GiB" # size of each channel + mem_size = main_mem_size[args.benchmark + "-" + args.size] # size of total main memory + single_channel = False + + checkpoint_dir = "/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/" + \ + suite + "/" + args.size + "/" + args.benchmark + "/" + "cpt" + print("Checkpoint dir: ", checkpoint_dir) + + if single_channel: + system = RubySystem1Channel( + kernel, + disk, + mem_sys, + num_cpus, + args.assoc, + dcache_size, + mem_size, + args.dcache_policy, + args.is_link, + args.link_lat, + args.bypass, + args, + restore=True, + ) + else: + system = RubySystem8Channel( + kernel, + disk, + mem_sys, + num_cpus, + args.assoc, + dcache_size, + mem_size, + args.dcache_policy, + args.is_link, + args.link_lat, + args.bypass, + args, + restore=True, + ) + + app = "" + if args.benchmark in gapbs_benchmarks: + app = args.benchmark + "-" + args.size + elif args.benchmark in npb_benchmarks: + app = args.benchmark+"."+args.size+".x" + + if args.do_analysis: + lpmanager = O3LooppointAnalysisManager() + for core in system.o3Cpu: + lplistener = O3LooppointAnalysis() + lplistener.ptmanager = lpmanager + lplistener.validAddrRangeStart = text_info[app][1] + lplistener.validAddrRangeSize = text_info[app][0] + core.probeListener = lplistener + else: + pc, count = interval_info_3hr_8ch[app] + system.global_tracker = PcCountTrackerManager( + targets=[PcCountPair(pc, count)] + ) + + for core in system.o3Cpu: + core.core_tracker = PcCountTracker( + targets=[PcCountPair(pc, count)], + core=core, + ptmanager=system.global_tracker, + ) + + system.m5ops_base = 0xFFFF0000 + + # Exit from guest on workbegin/workend + system.exit_on_work_items = True + + # Create and pass a script to the simulated system to run the reuired + # benchmark + if args.benchmark in gapbs_benchmarks: + system.readfile = writeBenchScript_GAPBS( + m5.options.outdir, + args.benchmark, + args.size, + synthetic + ) + elif args.benchmark in npb_benchmarks: + system.readfile = writeBenchScript_NPB( + m5.options.outdir, + app + ) + + # set up the root SimObject and start the simulation + root = Root(full_system=True, system=system) + + if system.getHostParallel(): + # Required for running kvm on multiple host cores. + # Uses gem5's parallel event queue feature + # Note: The simulator is quite picky about this number! + root.sim_quantum = int(1e9) # 1 ms + + # needed for long running jobs + m5.disableAllListeners() + + # instantiate all of the objects we've created above + m5.instantiate(checkpoint_dir) + + print("Read the checkpoint. Now, running the simulation\n") + + if args.do_analysis: + do_analysis() + else: + run() + + print("End of simulation\n") diff --git a/configs-npb-gapbs-chkpt-restore/system/MESI_Two_Level.py b/configs-npb-gapbs-chkpt-restore/system/MESI_Two_Level.py new file mode 100755 index 0000000000..8307e74634 --- /dev/null +++ b/configs-npb-gapbs-chkpt-restore/system/MESI_Two_Level.py @@ -0,0 +1,332 @@ +#Copyright (c) 2020 The Regents of the University of California. +#All Rights Reserved +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + + +""" This file creates a set of Ruby caches for the MESI TWO Level protocol +This protocol models two level cache hierarchy. The L1 cache is split into +instruction and data cache. + +This system support the memory size of up to 3GB. + +""" + +import math + +from m5.defines import buildEnv +from m5.util import fatal, panic + +from m5.objects import * + +class MESITwoLevelCache(RubySystem): + + def __init__(self): + if buildEnv['PROTOCOL'] != 'MESI_Two_Level': + fatal("This system assumes MESI_Two_Level!") + + super(MESITwoLevelCache, self).__init__() + + self._numL2Caches = 8 + + def setup(self, system, cpus, mem_ctrls, mem_ranges, dma_ports, iobus): + """Set up the Ruby cache subsystem. Note: This can't be done in the + constructor because many of these items require a pointer to the + ruby system (self). This causes infinite recursion in initialize() + if we do this in the __init__. + """ + # Ruby's global network. + self.network = MyNetwork(self) + + # MESI_Two_Level example uses 5 virtual networks + self.number_of_virtual_networks = 5 + self.network.number_of_virtual_networks = 5 + + # There is a single global list of all of the controllers to make it + # easier to connect everything to the global network. This can be + # customized depending on the topology/network requirements. + # L1 caches are private to a core, hence there are one L1 cache per CPU + # core. The number of L2 caches are dependent to the architecture. + self.controllers = \ + [L1Cache(system, self, cpu, self._numL2Caches) for cpu in cpus] + \ + [L2Cache(system, self, self._numL2Caches) for num in \ + range(self._numL2Caches)] + \ + [DirController(self, rng, mem_ctrl) for rng,mem_ctrl in zip(mem_ranges,mem_ctrls)] + \ + [DMAController(self) for i in range(len(dma_ports))] + + # Create one sequencer per CPU and dma controller. + # Sequencers for other controllers can be here here. + self.sequencers = [RubySequencer(version = i, + # Grab dcache from ctrl + dcache = self.controllers[i].L1Dcache, + clk_domain = self.controllers[i].clk_domain, + pio_request_port = iobus.cpu_side_ports, + mem_request_port = iobus.cpu_side_ports, + pio_response_port = iobus.mem_side_ports + ) for i in range(len(cpus))] + \ + [DMASequencer(version = i, + in_ports = port) + for i,port in enumerate(dma_ports) + ] + + for i,c in enumerate(self.controllers[:len(cpus)]): + c.sequencer = self.sequencers[i] + + #Connecting the DMA sequencer to DMA controller + for i,d in enumerate(self.controllers[-len(dma_ports):]): + i += len(cpus) + d.dma_sequencer = self.sequencers[i] + + self.num_of_sequencers = len(self.sequencers) + + # Create the network and connect the controllers. + # NOTE: This is quite different if using Garnet! + self.network.connectControllers(self.controllers) + self.network.setup_buffers() + + # Set up a proxy port for the system_port. Used for load binaries and + # other functional-only things. + self.sys_port_proxy = RubyPortProxy() + system.system_port = self.sys_port_proxy.in_ports + self.sys_port_proxy.pio_request_port = iobus.cpu_side_ports + + # Connect the cpu's cache, interrupt, and TLB ports to Ruby + for i,cpu in enumerate(cpus): + cpu.icache_port = self.sequencers[i].in_ports + cpu.dcache_port = self.sequencers[i].in_ports + cpu.createInterruptController() + cpu.interrupts[0].pio = self.sequencers[i].interrupt_out_port + cpu.interrupts[0].int_requestor = self.sequencers[i].in_ports + cpu.interrupts[0].int_responder = self.sequencers[i].interrupt_out_port + cpu.mmu.connectWalkerPorts( + self.sequencers[i].in_ports, self.sequencers[i].in_ports) +class L1Cache(L1Cache_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, system, ruby_system, cpu, num_l2Caches): + """Creating L1 cache controller. Consist of both instruction + and data cache. The size of data cache is 512KB and + 8-way set associative. The instruction cache is 32KB, + 2-way set associative. + """ + super(L1Cache, self).__init__() + + self.version = self.versionCount() + block_size_bits = int(math.log(system.cache_line_size, 2)) + l1i_size = '32kB' + l1i_assoc = '2' + l1d_size = '512kB' + l1d_assoc = '8' + # This is the cache memory object that stores the cache data and tags + self.L1Icache = RubyCache(size = l1i_size, + assoc = l1i_assoc, + start_index_bit = block_size_bits , + is_icache = True) + self.L1Dcache = RubyCache(size = l1d_size, + assoc = l1d_assoc, + start_index_bit = block_size_bits, + is_icache = False) + self.l2_select_num_bits = int(math.log(num_l2Caches , 2)) + self.clk_domain = cpu.clk_domain + self.prefetcher = RubyPrefetcher() + self.send_evictions = self.sendEvicts(cpu) + self.transitions_per_cycle = 4 + self.enable_prefetch = False + self.ruby_system = ruby_system + self.connectQueues(ruby_system) + + def getBlockSizeBits(self, system): + bits = int(math.log(system.cache_line_size, 2)) + if 2**bits != system.cache_line_size.value: + panic("Cache line size not a power of 2!") + return bits + + def sendEvicts(self, cpu): + """True if the CPU model or ISA requires sending evictions from caches + to the CPU. Two scenarios warrant forwarding evictions to the CPU: + 1. The O3 model must keep the LSQ coherent with the caches + 2. The x86 mwait instruction is built on top of coherence + 3. The local exclusive monitor in ARM systems + """ + if type(cpu) is X86O3CPU: + return True + return False + + def connectQueues(self, ruby_system): + """Connect all of the queues for this controller. + """ + self.mandatoryQueue = MessageBuffer() + self.requestFromL1Cache = MessageBuffer() + self.requestFromL1Cache.out_port = ruby_system.network.in_port + self.responseFromL1Cache = MessageBuffer() + self.responseFromL1Cache.out_port = ruby_system.network.in_port + self.unblockFromL1Cache = MessageBuffer() + self.unblockFromL1Cache.out_port = ruby_system.network.in_port + + self.optionalQueue = MessageBuffer() + + self.requestToL1Cache = MessageBuffer() + self.requestToL1Cache.in_port = ruby_system.network.out_port + self.responseToL1Cache = MessageBuffer() + self.responseToL1Cache.in_port = ruby_system.network.out_port + +class L2Cache(L2Cache_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, system, ruby_system, num_l2Caches): + + super(L2Cache, self).__init__() + + self.version = self.versionCount() + # This is the cache memory object that stores the cache data and tags + self.L2cache = RubyCache(size = '1 MB', + assoc = 16, + start_index_bit = self.getBlockSizeBits(system, + num_l2Caches)) + + self.transitions_per_cycle = '4' + self.ruby_system = ruby_system + self.connectQueues(ruby_system) + + def getBlockSizeBits(self, system, num_l2caches): + l2_bits = int(math.log(num_l2caches, 2)) + bits = int(math.log(system.cache_line_size, 2)) + l2_bits + return bits + + + def connectQueues(self, ruby_system): + """Connect all of the queues for this controller. + """ + self.DirRequestFromL2Cache = MessageBuffer() + self.DirRequestFromL2Cache.out_port = ruby_system.network.in_port + self.L1RequestFromL2Cache = MessageBuffer() + self.L1RequestFromL2Cache.out_port = ruby_system.network.in_port + self.responseFromL2Cache = MessageBuffer() + self.responseFromL2Cache.out_port = ruby_system.network.in_port + self.unblockToL2Cache = MessageBuffer() + self.unblockToL2Cache.in_port = ruby_system.network.out_port + self.L1RequestToL2Cache = MessageBuffer() + self.L1RequestToL2Cache.in_port = ruby_system.network.out_port + self.responseToL2Cache = MessageBuffer() + self.responseToL2Cache.in_port = ruby_system.network.out_port + + +class DirController(Directory_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, ruby_system, ranges, mem_ctrls): + """ranges are the memory ranges assigned to this controller. + """ + if len(mem_ctrls) > 1: + panic("This cache system can only be connected to one mem ctrl") + super(DirController, self).__init__() + self.version = self.versionCount() + self.addr_ranges = ranges + self.ruby_system = ruby_system + self.directory = RubyDirectoryMemory() + # Connect this directory to the memory side. + self.memory_out_port = mem_ctrls[0].port + self.connectQueues(ruby_system) + + def connectQueues(self, ruby_system): + self.requestToDir = MessageBuffer() + self.requestToDir.in_port = ruby_system.network.out_port + self.responseToDir = MessageBuffer() + self.responseToDir.in_port = ruby_system.network.out_port + self.responseFromDir = MessageBuffer() + self.responseFromDir.out_port = ruby_system.network.in_port + self.requestToMemory = MessageBuffer() + self.responseFromMemory = MessageBuffer() + +class DMAController(DMA_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, ruby_system): + super(DMAController, self).__init__() + self.version = self.versionCount() + self.ruby_system = ruby_system + self.connectQueues(ruby_system) + + def connectQueues(self, ruby_system): + self.mandatoryQueue = MessageBuffer() + self.responseFromDir = MessageBuffer(ordered = True) + self.responseFromDir.in_port = ruby_system.network.out_port + self.requestToDir = MessageBuffer() + self.requestToDir.out_port = ruby_system.network.in_port + + +class MyNetwork(SimpleNetwork): + """A simple point-to-point network. This doesn't not use garnet. + """ + + def __init__(self, ruby_system): + super(MyNetwork, self).__init__() + self.netifs = [] + self.ruby_system = ruby_system + + def connectControllers(self, controllers): + """Connect all of the controllers to routers and connec the routers + together in a point-to-point network. + """ + # Create one router/switch per controller in the system + self.routers = [Switch(router_id = i) for i in range(len(controllers))] + + # Make a link from each controller to the router. The link goes + # externally to the network. + self.ext_links = [SimpleExtLink(link_id=i, ext_node=c, + int_node=self.routers[i]) + for i, c in enumerate(controllers)] + + # Make an "internal" link (internal to the network) between every pair + # of routers. + link_count = 0 + self.int_links = [] + for ri in self.routers: + for rj in self.routers: + if ri == rj: continue # Don't connect a router to itself! + link_count += 1 + self.int_links.append(SimpleIntLink(link_id = link_count, + src_node = ri, + dst_node = rj)) diff --git a/configs-npb-gapbs-chkpt-restore/system/MI_example_caches.py b/configs-npb-gapbs-chkpt-restore/system/MI_example_caches.py new file mode 100755 index 0000000000..3c7a71d7b1 --- /dev/null +++ b/configs-npb-gapbs-chkpt-restore/system/MI_example_caches.py @@ -0,0 +1,275 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2015 Jason Power +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Jason Power + +""" This file creates a set of Ruby caches, the Ruby network, and a simple +point-to-point topology. +See Part 3 in the Learning gem5 book: learning.gem5.org/book/part3 +You can change simple_ruby to import from this file instead of from msi_caches +to use the MI_example protocol instead of MSI. + +IMPORTANT: If you modify this file, it's likely that the Learning gem5 book + also needs to be updated. For now, email Jason + +""" + +import math + +from m5.defines import buildEnv +from m5.util import fatal, panic + +from m5.objects import * + +class MIExampleSystem(RubySystem): + + def __init__(self): + if buildEnv['PROTOCOL'] != 'MI_example': + fatal("This system assumes MI_example!") + + super(MIExampleSystem, self).__init__() + + def setup(self, system, cpus, mem_ctrls, dma_ports, iobus): + """Set up the Ruby cache subsystem. Note: This can't be done in the + constructor because many of these items require a pointer to the + ruby system (self). This causes infinite recursion in initialize() + if we do this in the __init__. + """ + # Ruby's global network. + self.network = MyNetwork(self) + + # MI example uses 5 virtual networks + self.number_of_virtual_networks = 5 + self.network.number_of_virtual_networks = 5 + + # There is a single global list of all of the controllers to make it + # easier to connect everything to the global network. This can be + # customized depending on the topology/network requirements. + # Create one controller for each L1 cache (and the cache mem obj.) + # Create a single directory controller (Really the memory cntrl) + self.controllers = \ + [L1Cache(system, self, cpu) for cpu in cpus] + \ + [DirController(self, system.mem_ranges, mem_ctrls)] + \ + [DMAController(self) for i in range(len(dma_ports))] + + # Create one sequencer per CPU. In many systems this is more + # complicated since you have to create sequencers for DMA controllers + # and other controllers, too. + self.sequencers = [RubySequencer(version = i, + # Grab dcache from ctrl + dcache = self.controllers[i].cacheMemory, + clk_domain = self.controllers[i].clk_domain, + pio_request_port = iobus.cpu_side_ports, + mem_request_port = iobus.cpu_side_ports, + pio_response_port = iobus.mem_side_ports + ) for i in range(len(cpus))] + \ + [DMASequencer(version = i, + in_ports = port) + for i,port in enumerate(dma_ports) + ] + + for i,c in enumerate(self.controllers[0:len(cpus)]): + c.sequencer = self.sequencers[i] + + for i,d in enumerate(self.controllers[-len(dma_ports):]): + i += len(cpus) + d.dma_sequencer = self.sequencers[i] + + self.num_of_sequencers = len(self.sequencers) + + # Create the network and connect the controllers. + # NOTE: This is quite different if using Garnet! + self.network.connectControllers(self.controllers) + self.network.setup_buffers() + + # Set up a proxy port for the system_port. Used for load binaries and + # other functional-only things. + self.sys_port_proxy = RubyPortProxy() + system.system_port = self.sys_port_proxy.in_ports + self.sys_port_proxy.pio_request_port = iobus.cpu_side_ports + + # Connect the cpu's cache, interrupt, and TLB ports to Ruby + for i,cpu in enumerate(cpus): + cpu.icache_port = self.sequencers[i].in_ports + cpu.dcache_port = self.sequencers[i].in_ports + cpu.createInterruptController() + isa = buildEnv['TARGET_ISA'] + if isa == 'x86': + cpu.interrupts[0].pio = self.sequencers[i].interrupt_out_port + cpu.interrupts[0].int_requestor = self.sequencers[i].in_ports + cpu.interrupts[0].int_responder = self.sequencers[i].interrupt_out_port + if isa == 'x86' or isa == 'arm': + cpu.mmu.connectWalkerPorts( + self.sequencers[i].in_ports, self.sequencers[i].in_ports) + +class L1Cache(L1Cache_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, system, ruby_system, cpu): + """CPUs are needed to grab the clock domain and system is needed for + the cache block size. + """ + super(L1Cache, self).__init__() + + self.version = self.versionCount() + # This is the cache memory object that stores the cache data and tags + self.cacheMemory = RubyCache(size = '16kB', + assoc = 8, + start_index_bit = self.getBlockSizeBits(system)) + self.clk_domain = cpu.clk_domain + self.send_evictions = self.sendEvicts(cpu) + self.ruby_system = ruby_system + self.connectQueues(ruby_system) + + def getBlockSizeBits(self, system): + bits = int(math.log(system.cache_line_size, 2)) + if 2**bits != system.cache_line_size.value: + panic("Cache line size not a power of 2!") + return bits + + def sendEvicts(self, cpu): + """True if the CPU model or ISA requires sending evictions from caches + to the CPU. Two scenarios warrant forwarding evictions to the CPU: + 1. The O3 model must keep the LSQ coherent with the caches + 2. The x86 mwait instruction is built on top of coherence + 3. The local exclusive monitor in ARM systems + """ + if type(cpu) is DerivO3CPU or \ + buildEnv['TARGET_ISA'] in ('x86', 'arm'): + return True + return False + + def connectQueues(self, ruby_system): + """Connect all of the queues for this controller. + """ + self.mandatoryQueue = MessageBuffer() + self.requestFromCache = MessageBuffer(ordered = True) + self.requestFromCache.out_port = ruby_system.network.in_port + self.responseFromCache = MessageBuffer(ordered = True) + self.responseFromCache.out_port = ruby_system.network.in_port + self.forwardToCache = MessageBuffer(ordered = True) + self.forwardToCache.in_port = ruby_system.network.out_port + self.responseToCache = MessageBuffer(ordered = True) + self.responseToCache.in_port = ruby_system.network.out_port + +class DirController(Directory_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, ruby_system, ranges, mem_ctrls): + """ranges are the memory ranges assigned to this controller. + """ + if len(mem_ctrls) > 1: + panic("This cache system can only be connected to one mem ctrl") + super(DirController, self).__init__() + self.version = self.versionCount() + self.addr_ranges = ranges + self.ruby_system = ruby_system + self.directory = RubyDirectoryMemory() + # Connect this directory to the memory side. + self.memory_out_port = mem_ctrls[0].port + self.connectQueues(ruby_system) + + def connectQueues(self, ruby_system): + self.requestToDir = MessageBuffer(ordered = True) + self.requestToDir.in_port = ruby_system.network.out_port + self.dmaRequestToDir = MessageBuffer(ordered = True) + self.dmaRequestToDir.in_port = ruby_system.network.out_port + + self.responseFromDir = MessageBuffer() + self.responseFromDir.out_port = ruby_system.network.in_port + self.dmaResponseFromDir = MessageBuffer(ordered = True) + self.dmaResponseFromDir.out_port = ruby_system.network.in_port + self.forwardFromDir = MessageBuffer() + self.forwardFromDir.out_port = ruby_system.network.in_port + self.requestToMemory = MessageBuffer() + self.responseFromMemory = MessageBuffer() + +class DMAController(DMA_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, ruby_system): + super(DMAController, self).__init__() + self.version = self.versionCount() + self.ruby_system = ruby_system + self.connectQueues(ruby_system) + + def connectQueues(self, ruby_system): + self.mandatoryQueue = MessageBuffer() + self.requestToDir = MessageBuffer() + self.requestToDir.out_port = ruby_system.network.in_port + self.responseFromDir = MessageBuffer(ordered = True) + self.responseFromDir.in_port = ruby_system.network.out_port + + +class MyNetwork(SimpleNetwork): + """A simple point-to-point network. This doesn't not use garnet. + """ + + def __init__(self, ruby_system): + super(MyNetwork, self).__init__() + self.netifs = [] + self.ruby_system = ruby_system + + def connectControllers(self, controllers): + """Connect all of the controllers to routers and connec the routers + together in a point-to-point network. + """ + # Create one router/switch per controller in the system + self.routers = [Switch(router_id = i) for i in range(len(controllers))] + + # Make a link from each controller to the router. The link goes + # externally to the network. + self.ext_links = [SimpleExtLink(link_id=i, ext_node=c, + int_node=self.routers[i]) + for i, c in enumerate(controllers)] + + # Make an "internal" link (internal to the network) between every pair + # of routers. + link_count = 0 + self.int_links = [] + for ri in self.routers: + for rj in self.routers: + if ri == rj: continue # Don't connect a router to itself! + link_count += 1 + self.int_links.append(SimpleIntLink(link_id = link_count, + src_node = ri, + dst_node = rj)) diff --git a/configs-npb-gapbs-chkpt-restore/system/MOESI_CMP_directory.py b/configs-npb-gapbs-chkpt-restore/system/MOESI_CMP_directory.py new file mode 100755 index 0000000000..33f9f47e74 --- /dev/null +++ b/configs-npb-gapbs-chkpt-restore/system/MOESI_CMP_directory.py @@ -0,0 +1,350 @@ +#Copyright (c) 2020 The Regents of the University of California. +#All Rights Reserved +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + + +""" This file creates a set of Ruby caches for the MOESI CMP directory +protocol. +This protocol models two level cache hierarchy. The L1 cache is split into +instruction and data cache. + +This system support the memory size of up to 3GB. + +""" + +from __future__ import print_function +from __future__ import absolute_import + +import math + +from m5.defines import buildEnv +from m5.util import fatal, panic + +from m5.objects import * + +class MOESICMPDirCache(RubySystem): + + def __init__(self): + if buildEnv['PROTOCOL'] != 'MOESI_CMP_directory': + fatal("This system assumes MOESI_CMP_directory!") + + super(MOESICMPDirCache, self).__init__() + + self._numL2Caches = 8 + + def setup(self, system, cpus, mem_ctrls, dma_ports, iobus): + """Set up the Ruby cache subsystem. Note: This can't be done in the + constructor because many of these items require a pointer to the + ruby system (self). This causes infinite recursion in initialize() + if we do this in the __init__. + """ + # Ruby's global network. + self.network = MyNetwork(self) + + # MOESI_CMP_directory example uses 3 virtual networks + self.number_of_virtual_networks = 3 + self.network.number_of_virtual_networks = 3 + + # There is a single global list of all of the controllers to make it + # easier to connect everything to the global network. This can be + # customized depending on the topology/network requirements. + # L1 caches are private to a core, hence there are one L1 cache per CPU + # core. The number of L2 caches are dependent to the architecture. + self.controllers = \ + [L1Cache(system, self, cpu, self._numL2Caches) for cpu in cpus] + \ + [L2Cache(system, self, self._numL2Caches) for num in \ + range(self._numL2Caches)] + [DirController(self, \ + system.mem_ranges, mem_ctrls)] + [DMAController(self) for i \ + in range(len(dma_ports))] + + # Create one sequencer per CPU and dma controller. + # Sequencers for other controllers can be here here. + self.sequencers = [RubySequencer(version = i, + # Grab dcache from ctrl + dcache = self.controllers[i].L1Dcache, + clk_domain = self.controllers[i].clk_domain, + pio_request_port = iobus.cpu_side_ports, + mem_request_port = iobus.cpu_side_ports, + pio_response_port = iobus.mem_side_ports + ) for i in range(len(cpus))] + \ + [DMASequencer(version = i, + in_ports = port) + for i,port in enumerate(dma_ports) + ] + + for i,c in enumerate(self.controllers[:len(cpus)]): + c.sequencer = self.sequencers[i] + + #Connecting the DMA sequencer to DMA controller + for i,d in enumerate(self.controllers[-len(dma_ports):]): + i += len(cpus) + d.dma_sequencer = self.sequencers[i] + + self.num_of_sequencers = len(self.sequencers) + + # Create the network and connect the controllers. + # NOTE: This is quite different if using Garnet! + self.network.connectControllers(self.controllers) + self.network.setup_buffers() + + # Set up a proxy port for the system_port. Used for load binaries and + # other functional-only things. + self.sys_port_proxy = RubyPortProxy() + system.system_port = self.sys_port_proxy.in_ports + self.sys_port_proxy.pio_request_port = iobus.cpu_side_ports + + # Connect the cpu's cache, interrupt, and TLB ports to Ruby + for i,cpu in enumerate(cpus): + cpu.icache_port = self.sequencers[i].in_ports + cpu.dcache_port = self.sequencers[i].in_ports + cpu.createInterruptController() + isa = buildEnv['TARGET_ISA'] + if isa == 'x86': + cpu.interrupts[0].pio = self.sequencers[i].interrupt_out_port + cpu.interrupts[0].int_requestor = self.sequencers[i].in_ports + cpu.interrupts[0].int_responder = self.sequencers[i].interrupt_out_port + if isa == 'x86' or isa == 'arm': + cpu.mmu.connectWalkerPorts( + self.sequencers[i].in_ports, self.sequencers[i].in_ports) + +class L1Cache(L1Cache_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, system, ruby_system, cpu, num_l2Caches): + """Creating L1 cache controller. Consist of both instruction + and data cache. The size of data cache is 512KB and + 8-way set associative. The instruction cache is 32KB, + 2-way set associative. + """ + super(L1Cache, self).__init__() + + self.version = self.versionCount() + block_size_bits = int(math.log(system.cache_line_size, 2)) + l1i_size = '32kB' + l1i_assoc = '2' + l1d_size = '512kB' + l1d_assoc = '8' + # This is the cache memory object that stores the cache data and tags + self.L1Icache = RubyCache(size = l1i_size, + assoc = l1i_assoc, + start_index_bit = block_size_bits , + is_icache = True, + dataAccessLatency = 1, + tagAccessLatency = 1) + self.L1Dcache = RubyCache(size = l1d_size, + assoc = l1d_assoc, + start_index_bit = block_size_bits, + is_icache = False, + dataAccessLatency = 1, + tagAccessLatency = 1) + self.clk_domain = cpu.clk_domain + self.prefetcher = RubyPrefetcher() + self.send_evictions = self.sendEvicts(cpu) + self.transitions_per_cycle = 4 + self.ruby_system = ruby_system + self.connectQueues(ruby_system) + + def getBlockSizeBits(self, system): + bits = int(math.log(system.cache_line_size, 2)) + if 2**bits != system.cache_line_size.value: + panic("Cache line size not a power of 2!") + return bits + + def sendEvicts(self, cpu): + """True if the CPU model or ISA requires sending evictions from caches + to the CPU. Two scenarios warrant forwarding evictions to the CPU: + 1. The O3 model must keep the LSQ coherent with the caches + 2. The x86 mwait instruction is built on top of coherence + 3. The local exclusive monitor in ARM systems + """ + if type(cpu) is DerivO3CPU or \ + buildEnv['TARGET_ISA'] in ('x86', 'arm'): + return True + return False + + def connectQueues(self, ruby_system): + """Connect all of the queues for this controller. + """ + self.mandatoryQueue = MessageBuffer() + self.requestFromL1Cache = MessageBuffer() + self.requestFromL1Cache.out_port = ruby_system.network.in_port + self.responseFromL1Cache = MessageBuffer() + self.responseFromL1Cache.out_port = ruby_system.network.in_port + self.requestToL1Cache = MessageBuffer() + self.requestToL1Cache.in_port = ruby_system.network.out_port + self.responseToL1Cache = MessageBuffer() + self.responseToL1Cache.in_port = ruby_system.network.out_port + self.triggerQueue = MessageBuffer(ordered = True) + +class L2Cache(L2Cache_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, system, ruby_system, num_l2Caches): + + super(L2Cache, self).__init__() + + self.version = self.versionCount() + # This is the cache memory object that stores the cache data and tags + self.L2cache = RubyCache(size = '1 MB', + assoc = 16, + start_index_bit = self.getL2StartIdx(system, + num_l2Caches), + dataAccessLatency = 20, + tagAccessLatency = 20) + + self.transitions_per_cycle = '4' + self.ruby_system = ruby_system + self.connectQueues(ruby_system) + + def getL2StartIdx(self, system, num_l2caches): + l2_bits = int(math.log(num_l2caches, 2)) + bits = int(math.log(system.cache_line_size, 2)) + l2_bits + return bits + + + def connectQueues(self, ruby_system): + """Connect all of the queues for this controller. + """ + self.GlobalRequestFromL2Cache = MessageBuffer() + self.GlobalRequestFromL2Cache.out_port = ruby_system.network.in_port + self.L1RequestFromL2Cache = MessageBuffer() + self.L1RequestFromL2Cache.out_port = ruby_system.network.in_port + self.responseFromL2Cache = MessageBuffer() + self.responseFromL2Cache.out_port = ruby_system.network.in_port + + self.GlobalRequestToL2Cache = MessageBuffer() + self.GlobalRequestToL2Cache.in_port = ruby_system.network.out_port + self.L1RequestToL2Cache = MessageBuffer() + self.L1RequestToL2Cache.in_port = ruby_system.network.out_port + self.responseToL2Cache = MessageBuffer() + self.responseToL2Cache.in_port = ruby_system.network.out_port + self.triggerQueue = MessageBuffer(ordered = True) + + + +class DirController(Directory_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, ruby_system, ranges, mem_ctrls): + """ranges are the memory ranges assigned to this controller. + """ + if len(mem_ctrls) > 1: + panic("This cache system can only be connected to one mem ctrl") + super(DirController, self).__init__() + self.version = self.versionCount() + self.addr_ranges = ranges + self.ruby_system = ruby_system + self.directory = RubyDirectoryMemory() + # Connect this directory to the memory side. + self.memory_out_port = mem_ctrls[0].port + self.connectQueues(ruby_system) + + def connectQueues(self, ruby_system): + self.requestToDir = MessageBuffer() + self.requestToDir.in_port = ruby_system.network.out_port + self.responseToDir = MessageBuffer() + self.responseToDir.in_port = ruby_system.network.out_port + self.responseFromDir = MessageBuffer() + self.responseFromDir.out_port = ruby_system.network.in_port + self.forwardFromDir = MessageBuffer() + self.forwardFromDir.out_port = ruby_system.network.in_port + self.requestToMemory = MessageBuffer() + self.responseFromMemory = MessageBuffer() + self.triggerQueue = MessageBuffer(ordered = True) + +class DMAController(DMA_Controller): + + _version = 0 + @classmethod + def versionCount(cls): + cls._version += 1 # Use count for this particular type + return cls._version - 1 + + def __init__(self, ruby_system): + super(DMAController, self).__init__() + self.version = self.versionCount() + self.ruby_system = ruby_system + self.connectQueues(ruby_system) + + def connectQueues(self, ruby_system): + self.mandatoryQueue = MessageBuffer() + self.responseFromDir = MessageBuffer() + self.responseFromDir.in_port = ruby_system.network.out_port + self.reqToDir = MessageBuffer() + self.reqToDir.out_port = ruby_system.network.in_port + self.respToDir = MessageBuffer() + self.respToDir.out_port = ruby_system.network.in_port + self.triggerQueue = MessageBuffer(ordered = True) + + +class MyNetwork(SimpleNetwork): + """A simple point-to-point network. This doesn't not use garnet. + """ + + def __init__(self, ruby_system): + super(MyNetwork, self).__init__() + self.netifs = [] + self.ruby_system = ruby_system + + def connectControllers(self, controllers): + """Connect all of the controllers to routers and connec the routers + together in a point-to-point network. + """ + # Create one router/switch per controller in the system + self.routers = [Switch(router_id = i) for i in range(len(controllers))] + + # Make a link from each controller to the router. The link goes + # externally to the network. + self.ext_links = [SimpleExtLink(link_id=i, ext_node=c, + int_node=self.routers[i]) + for i, c in enumerate(controllers)] + + # Make an "internal" link (internal to the network) between every pair + # of routers. + link_count = 0 + self.int_links = [] + for ri in self.routers: + for rj in self.routers: + if ri == rj: continue # Don't connect a router to itself! + link_count += 1 + self.int_links.append(SimpleIntLink(link_id = link_count, + src_node = ri, + dst_node = rj)) diff --git a/configs-npb-gapbs-chkpt-restore/system/__init__.py b/configs-npb-gapbs-chkpt-restore/system/__init__.py new file mode 100755 index 0000000000..f5c653b06d --- /dev/null +++ b/configs-npb-gapbs-chkpt-restore/system/__init__.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2016 Jason Lowe-Power +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Jason Lowe-Power + +from .system import MySystem +from .ruby_system_1channel import RubySystem1Channel +from .ruby_system_8channel import RubySystem8Channel diff --git a/configs-npb-gapbs-chkpt-restore/system/caches.py b/configs-npb-gapbs-chkpt-restore/system/caches.py new file mode 100755 index 0000000000..9e44211111 --- /dev/null +++ b/configs-npb-gapbs-chkpt-restore/system/caches.py @@ -0,0 +1,173 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2016 Jason Lowe-Power +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Jason Lowe-Power + +""" Caches with options for a simple gem5 configuration script + +This file contains L1 I/D and L2 caches to be used in the simple +gem5 configuration script. +""" + +from m5.objects import Cache, L2XBar, StridePrefetcher + +# Some specific options for caches +# For all options see src/mem/cache/BaseCache.py + +class PrefetchCache(Cache): + + def __init__(self, options): + super(PrefetchCache, self).__init__() + if not options or options.no_prefetchers: + return + self.prefetcher = StridePrefetcher() + +class L1Cache(PrefetchCache): + """Simple L1 Cache with default values""" + + assoc = 8 + tag_latency = 1 + data_latency = 1 + response_latency = 1 + mshrs = 16 + tgts_per_mshr = 20 + writeback_clean = True + + def __init__(self, options=None): + super(L1Cache, self).__init__(options) + pass + + def connectBus(self, bus): + """Connect this cache to a memory-side bus""" + self.mem_side = bus.cpu_side_ports + + def connectCPU(self, cpu): + """Connect this cache's port to a CPU-side port + This must be defined in a subclass""" + raise NotImplementedError + +class L1ICache(L1Cache): + """Simple L1 instruction cache with default values""" + + def __init__(self, opts=None): + super(L1ICache, self).__init__(opts) + if not opts or not opts.l1i_size: + return + self.size = opts.l1i_size + + def connectCPU(self, cpu): + """Connect this cache's port to a CPU icache port""" + self.cpu_side = cpu.icache_port + +class L1DCache(L1Cache): + """Simple L1 data cache with default values""" + + def __init__(self, opts=None): + super(L1DCache, self).__init__(opts) + if not opts or not opts.l1d_size: + return + self.size = opts.l1d_size + + def connectCPU(self, cpu): + """Connect this cache's port to a CPU dcache port""" + self.cpu_side = cpu.dcache_port + +class MMUCache(Cache): + # Default parameters + size = '8kB' + assoc = 4 + tag_latency = 1 + data_latency = 1 + response_latency = 1 + mshrs = 20 + tgts_per_mshr = 12 + writeback_clean = True + + def __init__(self): + super(MMUCache, self).__init__() + + def connectCPU(self, cpu): + """Connect the CPU itb and dtb to the cache + Note: This creates a new crossbar + """ + self.mmubus = L2XBar() + self.cpu_side = self.mmubus.mem_side_ports + cpu.mmu.connectWalkerPorts( + self.mmubus.cpu_side_ports, self.mmubus.cpu_side_ports) + + def connectBus(self, bus): + """Connect this cache to a memory-side bus""" + self.mem_side = bus.cpu_side_ports + +class L2Cache(PrefetchCache): + """Simple L2 Cache with default values""" + + # Default parameters + assoc = 16 + tag_latency = 10 + data_latency = 10 + response_latency = 1 + mshrs = 20 + tgts_per_mshr = 12 + writeback_clean = True + + def __init__(self, opts=None): + super(L2Cache, self).__init__(opts) + if not opts or not opts.l2_size: + return + self.size = opts.l2_size + + def connectCPUSideBus(self, bus): + self.cpu_side = bus.mem_side_ports + + def connectMemSideBus(self, bus): + self.mem_side = bus.cpu_side_ports + +class L3Cache(Cache): + """Simple L3 Cache bank with default values + This assumes that the L3 is made up of multiple banks. This cannot + be used as a standalone L3 cache. + """ + + # Default parameters + assoc = 32 + tag_latency = 40 + data_latency = 40 + response_latency = 10 + mshrs = 256 + tgts_per_mshr = 12 + clusivity = 'mostly_excl' + + def __init__(self, opts): + super(L3Cache, self).__init__() + self.size = (opts.l3_size) + + def connectCPUSideBus(self, bus): + self.cpu_side = bus.mem_side_ports + + def connectMemSideBus(self, bus): + self.mem_side = bus.cpu_side_ports diff --git a/configs-npb-gapbs-chkpt-restore/system/fs_tools.py b/configs-npb-gapbs-chkpt-restore/system/fs_tools.py new file mode 100755 index 0000000000..5e5e2df6e4 --- /dev/null +++ b/configs-npb-gapbs-chkpt-restore/system/fs_tools.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2016 Jason Lowe-Power +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Jason Lowe-Power + +from m5.objects import IdeDisk, CowDiskImage, RawDiskImage + +class CowDisk(IdeDisk): + + def __init__(self, filename): + super(CowDisk, self).__init__() + self.driveID = 'device0' + self.image = CowDiskImage(child=RawDiskImage(read_only=True), + read_only=False) + self.image.child.image_file = filename diff --git a/configs-npb-gapbs-chkpt-restore/system/ruby_system_1channel.py b/configs-npb-gapbs-chkpt-restore/system/ruby_system_1channel.py new file mode 100644 index 0000000000..da61e0dff1 --- /dev/null +++ b/configs-npb-gapbs-chkpt-restore/system/ruby_system_1channel.py @@ -0,0 +1,369 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2016 Jason Lowe-Power +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Jason Lowe-Power + +import m5 +from m5.objects import * +from .fs_tools import * + + +class RubySystem1Channel(System): + def __init__( + self, + kernel, + disk, + mem_sys, + num_cpus, + assoc, + dcache_size, + mem_size, + mem_size_per_channel, + policy, + is_link, + link_lat, + bypass, + opts, + restore=False, + ): + super(RubySystem1Channel, self).__init__() + self._opts = opts + + # Use parallel if using KVM. Don't use parallel is restoring cpt + self._host_parallel = not restore + self._restore = restore + + # Set up the clock domain and the voltage domain + self.clk_domain = SrcClockDomain() + self.clk_domain.clock = "5GHz" + self.clk_domain.voltage_domain = VoltageDomain() + + self.mem_ranges = [ + AddrRange(Addr("128MiB")), # kernel data + AddrRange(0xC0000000, size=0x100000), # For I/0 + AddrRange( + 0x100000000, size=mem_size + ), + ] + + self.initFS(num_cpus) + + # Replace these paths with the path to your disk images. + # The first disk is the root disk. The second could be used for swap + # or anything else. + self.setDiskImages(disk, disk) + + # Change this path to point to the kernel you want to use + self.workload.object_file = kernel + # Options specified on the kernel command line + boot_options = [ + "earlyprintk=ttyS0", + "console=ttyS0", + "lpj=7999923", + "root=/dev/hda1", + ] + + self.workload.command_line = " ".join(boot_options) + + # Create the CPUs for our system. + self.createCPU(num_cpus) + + # self.intrctrl = IntrControl() + self._createMemoryControllers( + assoc, dcache_size, policy, is_link, link_lat, bypass + ) + + # Create the cache hierarchy for the system. + if mem_sys == "MI_example": + from .MI_example_caches import MIExampleSystem + + self.caches = MIExampleSystem() + elif mem_sys == "MESI_Two_Level": + from .MESI_Two_Level import MESITwoLevelCache + + self.caches = MESITwoLevelCache() + elif mem_sys == "MOESI_CMP_directory": + from .MOESI_CMP_directory import MOESICMPDirCache + + self.caches = MOESICMPDirCache() + if self._restore: + cpus = self.o3Cpu + else: + cpus = self.cpu + self.caches.setup( + self, + cpus, + [self.kernel_mem_ctrl, self.mem_ctrl], + [self.mem_ranges[0], self.mem_ranges[2]], + [self.pc.south_bridge.ide.dma, self.iobus.mem_side_ports], + self.iobus, + ) + + self.caches.access_backing_store = True + self.caches.phys_mem = [ + SimpleMemory(range=self.mem_ranges[0], in_addr_map=False), + SimpleMemory(range=self.mem_ranges[2], in_addr_map=False), + ] + + if self._host_parallel: + # To get the KVM CPUs to run on different host CPUs + # Specify a different event queue for each CPU + for i, cpu in enumerate(self.cpu): + for obj in cpu.descendants(): + obj.eventq_index = 0 + + # the number of eventqs are set based + # on experiments with few benchmarks + + cpu.eventq_index = i + 1 + + def getHostParallel(self): + return self._host_parallel + + def totalInsts(self): + return sum([cpu.totalInsts() for cpu in self.cpu]) + + def createCPUThreads(self, cpu): + for c in cpu: + c.createThreads() + + def createCPU(self, num_cpus): + + if not self._restore: + # Note KVM needs a VM and atomic_noncaching + self.cpu = [X86KvmCPU(cpu_id=i) for i in range(num_cpus)] + self.kvm_vm = KvmVM() + self.mem_mode = "atomic_noncaching" + self.createCPUThreads(self.cpu) + + self.atomicNoncachingCpu = [ + X86NonCachingSimpleCPU(cpu_id=i, switched_out=True) + for i in range(num_cpus) + ] + self.createCPUThreads(self.atomicNoncachingCpu) + + self.atomicCpu = [ + X86AtomicSimpleCPU(cpu_id=i, switched_out=True) + for i in range(num_cpus) + ] + self.createCPUThreads(self.atomicCpu) + + self.timingCpu = [ + X86TimingSimpleCPU(cpu_id=i, switched_out=True) + for i in range(num_cpus) + ] + self.createCPUThreads(self.timingCpu) + + self.o3Cpu = [ + X86O3CPU(cpu_id=i, switched_out=True) for i in range(num_cpus) + ] + self.createCPUThreads(self.o3Cpu) + else: + self.o3Cpu = [X86O3CPU(cpu_id=i) for i in range(num_cpus)] + self.mem_mode = "timing" + self.createCPUThreads(self.o3Cpu) + + def switchCpus(self, old, new): + assert new[0].switchedOut() + m5.switchCpus(self, list(zip(old, new))) + + def setDiskImages(self, img_path_1, img_path_2): + disk0 = CowDisk(img_path_1) + disk2 = CowDisk(img_path_2) + self.pc.south_bridge.ide.disks = [disk0, disk2] + + def _createKernelMemoryController(self, cls): + return MemCtrl(dram=cls(range=self.mem_ranges[0], kvm_map=False)) + + def _createMemoryControllers( + self, assoc, dcache_size, policy, is_link, link_lat, bypass + ): + self.kernel_mem_ctrl = self._createKernelMemoryController( + DDR3_1600_8x8 + ) + + self.mem_ctrl = PolicyManager(range=self.mem_ranges[2], kvm_map=False) + self.mem_ctrl.static_frontend_latency = "10ns" + self.mem_ctrl.static_backend_latency = "10ns" + + self.mem_ctrl.loc_mem_policy = policy + + self.mem_ctrl.assoc = assoc + + if bypass == 0: + self.mem_ctrl.bypass_dcache = False + elif bypass == 1: + self.mem_ctrl.bypass_dcache = True + + # TDRAM cache + self.loc_mem_ctrl = MemCtrl() + self.loc_mem_ctrl.consider_oldest_write = True + self.loc_mem_ctrl.oldest_write_age_threshold = 2500000 + self.loc_mem_ctrl.dram = TDRAM( + range=self.mem_ranges[2], in_addr_map=False, kvm_map=False + ) + + self.mem_ctrl.loc_mem = self.loc_mem_ctrl.dram + self.loc_mem_ctrl.static_frontend_latency = "1ns" + self.loc_mem_ctrl.static_backend_latency = "1ns" + self.loc_mem_ctrl.static_frontend_latency_tc = "0ns" + self.loc_mem_ctrl.static_backend_latency_tc = "0ns" + + # main memory + self.far_mem_ctrl = MemCtrl() + self.far_mem_ctrl.dram = DDR4_2400_16x4( + range=self.mem_ranges[2], in_addr_map=False, kvm_map=False + ) + self.far_mem_ctrl.static_frontend_latency = "1ns" + self.far_mem_ctrl.static_backend_latency = "1ns" + + self.loc_mem_ctrl.port = self.mem_ctrl.loc_req_port + + # far backing store + if is_link == 1: + self.membusPolManFarMem = L2XBar(width=64) + self.membusPolManFarMem.cpu_side_ports = self.mem_ctrl.far_req_port + self.membusPolManFarMem.mem_side_ports = self.far_mem_ctrl.port + self.membusPolManFarMem.frontend_latency = link_lat + self.membusPolManFarMem.response_latency = link_lat + else: + self.far_mem_ctrl.port = self.mem_ctrl.far_req_port + + self.mem_ctrl.orb_max_size = 128 + self.mem_ctrl.dram_cache_size = dcache_size + + self.loc_mem_ctrl.dram.read_buffer_size = 64 + self.loc_mem_ctrl.dram.write_buffer_size = 64 + + self.far_mem_ctrl.dram.read_buffer_size = 64 + self.far_mem_ctrl.dram.write_buffer_size = 64 + + def initFS(self, cpus): + self.pc = Pc() + + self.workload = X86FsLinux() + + # North Bridge + self.iobus = IOXBar() + + # connect the io bus + # Note: pass in a reference to where Ruby will connect to in the future + # so the port isn't connected twice. + self.pc.attachIO(self.iobus, [self.pc.south_bridge.ide.dma]) + + ############################################### + + # Add in a Bios information structure. + self.workload.smbios_table.structures = [X86SMBiosBiosInformation()] + + # Set up the Intel MP table + base_entries = [] + ext_entries = [] + for i in range(cpus): + bp = X86IntelMPProcessor( + local_apic_id=i, + local_apic_version=0x14, + enable=True, + bootstrap=(i == 0), + ) + base_entries.append(bp) + io_apic = X86IntelMPIOAPIC( + id=cpus, version=0x11, enable=True, address=0xFEC00000 + ) + self.pc.south_bridge.io_apic.apic_id = io_apic.id + base_entries.append(io_apic) + pci_bus = X86IntelMPBus(bus_id=0, bus_type="PCI ") + base_entries.append(pci_bus) + isa_bus = X86IntelMPBus(bus_id=1, bus_type="ISA ") + base_entries.append(isa_bus) + connect_busses = X86IntelMPBusHierarchy( + bus_id=1, subtractive_decode=True, parent_bus=0 + ) + ext_entries.append(connect_busses) + pci_dev4_inta = X86IntelMPIOIntAssignment( + interrupt_type="INT", + polarity="ConformPolarity", + trigger="ConformTrigger", + source_bus_id=0, + source_bus_irq=0 + (4 << 2), + dest_io_apic_id=io_apic.id, + dest_io_apic_intin=16, + ) + base_entries.append(pci_dev4_inta) + + def assignISAInt(irq, apicPin): + assign_8259_to_apic = X86IntelMPIOIntAssignment( + interrupt_type="ExtInt", + polarity="ConformPolarity", + trigger="ConformTrigger", + source_bus_id=1, + source_bus_irq=irq, + dest_io_apic_id=io_apic.id, + dest_io_apic_intin=0, + ) + base_entries.append(assign_8259_to_apic) + assign_to_apic = X86IntelMPIOIntAssignment( + interrupt_type="INT", + polarity="ConformPolarity", + trigger="ConformTrigger", + source_bus_id=1, + source_bus_irq=irq, + dest_io_apic_id=io_apic.id, + dest_io_apic_intin=apicPin, + ) + base_entries.append(assign_to_apic) + + assignISAInt(0, 2) + assignISAInt(1, 1) + for i in range(3, 15): + assignISAInt(i, i) + self.workload.intel_mp_table.base_entries = base_entries + self.workload.intel_mp_table.ext_entries = ext_entries + + entries = [ + # Mark the first megabyte of memory as reserved + X86E820Entry(addr=0, size="639kB", range_type=1), + X86E820Entry(addr=0x9FC00, size="385kB", range_type=2), + # Mark the rest of physical memory as available + X86E820Entry( + addr=0x100000, + size="%dB" % (self.mem_ranges[0].size() - 0x100000), + range_type=1, + ), + X86E820Entry( + addr=0x100000000, + size="%dB" % (self.mem_ranges[2].size()), + range_type=1, + ), + ] + + # Reserve the last 16kB of the 32-bit address space for m5ops + entries.append( + X86E820Entry(addr=0xFFFF0000, size="64kB", range_type=2) + ) + + self.workload.e820_table.entries = entries diff --git a/configs-npb-gapbs-chkpt-restore/system/ruby_system_8channel.py b/configs-npb-gapbs-chkpt-restore/system/ruby_system_8channel.py new file mode 100644 index 0000000000..fb28e1ec20 --- /dev/null +++ b/configs-npb-gapbs-chkpt-restore/system/ruby_system_8channel.py @@ -0,0 +1,425 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2016 Jason Lowe-Power +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Jason Lowe-Power + +import m5 +from m5.objects import * +from .fs_tools import * +from .MESI_Two_Level import MESITwoLevelCache + + +class RubySystem8Channel(System): + def __init__( + self, + kernel, + disk, + mem_sys, + num_cpus, + assoc, + dcache_size, # size of 1 channel + main_mem_size, + policy, + is_link, + link_lat, + bypass, + opts, + restore=False, + ): + super(RubySystem8Channel, self).__init__() + self._opts = opts + + # Use parallel if using KVM. Don't use parallel is restoring cpt + self._host_parallel = not restore + self._restore = restore + + # Set up the clock domain and the voltage domain + self.clk_domain = SrcClockDomain() + self.clk_domain.clock = "5GHz" + self.clk_domain.voltage_domain = VoltageDomain() + self._main_mem_size = main_mem_size + + self._data_ranges = [ + AddrRange( + start=0x100000000, + size=main_mem_size, + masks=[1 << 6, 1 << 7, 1 << 8], + intlvMatch=0, + ), + AddrRange( + start=0x100000000, + size=main_mem_size, + masks=[1 << 6, 1 << 7, 1 << 8], + intlvMatch=1, + ), + AddrRange( + start=0x100000000, + size=main_mem_size, + masks=[1 << 6, 1 << 7, 1 << 8], + intlvMatch=2, + ), + AddrRange( + start=0x100000000, + size=main_mem_size, + masks=[1 << 6, 1 << 7, 1 << 8], + intlvMatch=3, + ), + AddrRange( + start=0x100000000, + size=main_mem_size, + masks=[1 << 6, 1 << 7, 1 << 8], + intlvMatch=4, + ), + AddrRange( + start=0x100000000, + size=main_mem_size, + masks=[1 << 6, 1 << 7, 1 << 8], + intlvMatch=5, + ), + AddrRange( + start=0x100000000, + size=main_mem_size, + masks=[1 << 6, 1 << 7, 1 << 8], + intlvMatch=6, + ), + AddrRange( + start=0x100000000, + size=main_mem_size, + masks=[1 << 6, 1 << 7, 1 << 8], + intlvMatch=7, + ), + ] + + self.mem_ranges = [ + AddrRange(Addr("128MiB")), # kernel data + AddrRange(0xC0000000, size=0x100000), # For I/0 + ] + self._data_ranges + + self._total_data_range=[AddrRange( + 0x100000000, size=main_mem_size)] + + self.initFS(num_cpus) + + # Replace these paths with the path to your disk images. + # The first disk is the root disk. The second could be used for swap + # or anything else. + self.setDiskImages(disk, disk) + + # Change this path to point to the kernel you want to use + self.workload.object_file = kernel + # Options specified on the kernel command line + boot_options = [ + "earlyprintk=ttyS0", + "console=ttyS0", + "lpj=7999923", + "root=/dev/hda1", + ] + + self.workload.command_line = " ".join(boot_options) + + # Create the CPUs for our system. + self.createCPU(num_cpus) + + # self.intrctrl = IntrControl() + self._createMemoryControllers( + assoc, + dcache_size, + policy, + is_link, + link_lat, + bypass, + ) + + if self._restore: + cpus = self.o3Cpu + else: + cpus = self.cpu + + # Create the cache hierarchy for the system. + self.caches = MESITwoLevelCache() + self.caches.setup( + self, + cpus, + [self.kernel_mem_ctrl] + self.mem_ctrl, + [self.mem_ranges[0]] + self._data_ranges, + [self.pc.south_bridge.ide.dma, self.iobus.mem_side_ports], + self.iobus, + ) + + self.caches.access_backing_store = True + self.caches.phys_mem = [ + SimpleMemory(range=self.mem_ranges[0], in_addr_map=False), + SimpleMemory(range=AddrRange(0x100000000, size=main_mem_size), in_addr_map=False), + ] + + if self._host_parallel: + # To get the KVM CPUs to run on different host CPUs + # Specify a different event queue for each CPU + for i, cpu in enumerate(self.cpu): + for obj in cpu.descendants(): + obj.eventq_index = 0 + + # the number of eventqs are set based + # on experiments with few benchmarks + + cpu.eventq_index = i + 1 + + def getHostParallel(self): + return self._host_parallel + + def totalInsts(self): + return sum([cpu.totalInsts() for cpu in self.cpu]) + + def createCPUThreads(self, cpu): + for c in cpu: + c.createThreads() + + def createCPU(self, num_cpus): + + if not self._restore: + # Note KVM needs a VM and atomic_noncaching + self.cpu = [X86KvmCPU(cpu_id=i) for i in range(num_cpus)] + self.kvm_vm = KvmVM() + self.mem_mode = "atomic_noncaching" + self.createCPUThreads(self.cpu) + + self.atomicNoncachingCpu = [ + X86NonCachingSimpleCPU(cpu_id=i, switched_out=True) + for i in range(num_cpus) + ] + self.createCPUThreads(self.atomicNoncachingCpu) + + self.atomicCpu = [ + X86AtomicSimpleCPU(cpu_id=i, switched_out=True) + for i in range(num_cpus) + ] + self.createCPUThreads(self.atomicCpu) + + self.timingCpu = [ + X86TimingSimpleCPU(cpu_id=i, switched_out=True) + for i in range(num_cpus) + ] + self.createCPUThreads(self.timingCpu) + + self.o3Cpu = [ + X86O3CPU(cpu_id=i, switched_out=True) for i in range(num_cpus) + ] + self.createCPUThreads(self.o3Cpu) + else: + self.o3Cpu = [X86O3CPU(cpu_id=i) for i in range(num_cpus)] + self.mem_mode = "timing" + self.createCPUThreads(self.o3Cpu) + + def switchCpus(self, old, new): + assert new[0].switchedOut() + m5.switchCpus(self, list(zip(old, new))) + + def setDiskImages(self, img_path_1, img_path_2): + disk0 = CowDisk(img_path_1) + disk2 = CowDisk(img_path_2) + self.pc.south_bridge.ide.disks = [disk0, disk2] + + def _createKernelMemoryController(self, cls): + return MemCtrl(dram=cls(range=self.mem_ranges[0], kvm_map=False)) + + def _createMemoryControllers( + self, + assoc, + dcache_size, + policy, + is_link, + link_lat, + bypass, + ): + self.kernel_mem_ctrl = self._createKernelMemoryController( + DDR3_1600_8x8 + ) + + self.mem_ctrl = [ + PolicyManager(range=r, kvm_map=False, channel_index=str(i)) for i, r in enumerate(self.mem_ranges[2:]) + ] + self.loc_mem_ctrl = [MemCtrl() for i in range(8)] + + self.membusPolManFarMem = L2XBar(width=64) + self.membusPolManFarMem.frontend_latency = link_lat + self.membusPolManFarMem.response_latency = link_lat + + for i in range(0, 8): + self.mem_ctrl[i].static_frontend_latency = "10ns" + self.mem_ctrl[i].static_backend_latency = "10ns" + self.mem_ctrl[i].loc_mem_policy = policy + self.mem_ctrl[i].assoc = assoc + self.mem_ctrl[i].orb_max_size = 128 + self.mem_ctrl[i].dram_cache_size = dcache_size + if bypass == 0: + self.mem_ctrl[i].bypass_dcache = False + elif bypass == 1: + self.mem_ctrl[i].bypass_dcache = True + self.membusPolManFarMem.cpu_side_ports = self.mem_ctrl[ + i + ].far_req_port + + # TDRAM cache + for i in range(8): + self.loc_mem_ctrl[i].consider_oldest_write = True + self.loc_mem_ctrl[i].oldest_write_age_threshold = 2500000 + self.loc_mem_ctrl[i].dram = TDRAM( + range=self._data_ranges[i], in_addr_map=False, kvm_map=False, null = True + ) + self.loc_mem_ctrl[i].dram.device_size = dcache_size + self.mem_ctrl[i].loc_mem = self.loc_mem_ctrl[i].dram + self.loc_mem_ctrl[i].static_frontend_latency = "1ns" + self.loc_mem_ctrl[i].static_backend_latency = "1ns" + self.loc_mem_ctrl[i].static_frontend_latency_tc = "0ns" + self.loc_mem_ctrl[i].static_backend_latency_tc = "0ns" + self.loc_mem_ctrl[i].dram.read_buffer_size = 64 + self.loc_mem_ctrl[i].dram.write_buffer_size = 64 + self.loc_mem_ctrl[i].port = self.mem_ctrl[i].loc_req_port + + # main memory + self.far_mem_ctrl = MemCtrl() + self.far_mem_ctrl.dram = DDR5_4400_4x8( + range=AddrRange( + start=0x100000000, + size=self._main_mem_size, + # masks=[1 << 6], + # intlvMatch=i, + ), + in_addr_map=False, kvm_map=False, null = True + ) + self.far_mem_ctrl.dram.device_size = self._main_mem_size + self.far_mem_ctrl.static_frontend_latency = "1ns" + self.far_mem_ctrl.static_backend_latency = "1ns" + self.far_mem_ctrl.dram.read_buffer_size = 64 + self.far_mem_ctrl.dram.write_buffer_size = 64 + self.membusPolManFarMem.mem_side_ports = self.far_mem_ctrl.port + + def initFS(self, cpus): + self.pc = Pc() + + self.workload = X86FsLinux() + + # North Bridge + self.iobus = IOXBar() + + # connect the io bus + # Note: pass in a reference to where Ruby will connect to in the future + # so the port isn't connected twice. + self.pc.attachIO(self.iobus, [self.pc.south_bridge.ide.dma]) + + ############################################### + + # Add in a Bios information structure. + self.workload.smbios_table.structures = [X86SMBiosBiosInformation()] + + # Set up the Intel MP table + base_entries = [] + ext_entries = [] + for i in range(cpus): + bp = X86IntelMPProcessor( + local_apic_id=i, + local_apic_version=0x14, + enable=True, + bootstrap=(i == 0), + ) + base_entries.append(bp) + io_apic = X86IntelMPIOAPIC( + id=cpus, version=0x11, enable=True, address=0xFEC00000 + ) + self.pc.south_bridge.io_apic.apic_id = io_apic.id + base_entries.append(io_apic) + pci_bus = X86IntelMPBus(bus_id=0, bus_type="PCI ") + base_entries.append(pci_bus) + isa_bus = X86IntelMPBus(bus_id=1, bus_type="ISA ") + base_entries.append(isa_bus) + connect_busses = X86IntelMPBusHierarchy( + bus_id=1, subtractive_decode=True, parent_bus=0 + ) + ext_entries.append(connect_busses) + pci_dev4_inta = X86IntelMPIOIntAssignment( + interrupt_type="INT", + polarity="ConformPolarity", + trigger="ConformTrigger", + source_bus_id=0, + source_bus_irq=0 + (4 << 2), + dest_io_apic_id=io_apic.id, + dest_io_apic_intin=16, + ) + base_entries.append(pci_dev4_inta) + + def assignISAInt(irq, apicPin): + assign_8259_to_apic = X86IntelMPIOIntAssignment( + interrupt_type="ExtInt", + polarity="ConformPolarity", + trigger="ConformTrigger", + source_bus_id=1, + source_bus_irq=irq, + dest_io_apic_id=io_apic.id, + dest_io_apic_intin=0, + ) + base_entries.append(assign_8259_to_apic) + assign_to_apic = X86IntelMPIOIntAssignment( + interrupt_type="INT", + polarity="ConformPolarity", + trigger="ConformTrigger", + source_bus_id=1, + source_bus_irq=irq, + dest_io_apic_id=io_apic.id, + dest_io_apic_intin=apicPin, + ) + base_entries.append(assign_to_apic) + + assignISAInt(0, 2) + assignISAInt(1, 1) + for i in range(3, 15): + assignISAInt(i, i) + self.workload.intel_mp_table.base_entries = base_entries + self.workload.intel_mp_table.ext_entries = ext_entries + + entries = [ + # Mark the first megabyte of memory as reserved + X86E820Entry(addr=0, size="639kB", range_type=1), + X86E820Entry(addr=0x9FC00, size="385kB", range_type=2), + # Mark the rest of physical memory as available + X86E820Entry( + addr=0x100000, + size="%dB" % (self.mem_ranges[0].size() - 0x100000), + range_type=1, + ), + X86E820Entry( + addr=0x100000000, + size="%dB" % (self._total_data_range[0].size()), + range_type=1, + ), + ] + + # Reserve the last 16kB of the 32-bit address space for m5ops + entries.append( + X86E820Entry(addr=0xFFFF0000, size="64kB", range_type=2) + ) + + self.workload.e820_table.entries = entries diff --git a/configs-npb-gapbs-chkpt-restore/system/system.py b/configs-npb-gapbs-chkpt-restore/system/system.py new file mode 100755 index 0000000000..6365b39d63 --- /dev/null +++ b/configs-npb-gapbs-chkpt-restore/system/system.py @@ -0,0 +1,414 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2018 The Regents of the University of California +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Jason Lowe-Power + +import m5 +from m5.objects import * +from .fs_tools import * +from .caches import * + + +class MySystem(System): + + def __init__(self, kernel, disk, num_cpus, opts, no_kvm=False): + super(MySystem, self).__init__() + self._opts = opts + self._no_kvm = no_kvm + + self._host_parallel = not self._opts.no_host_parallel + + # Set up the clock domain and the voltage domain + self.clk_domain = SrcClockDomain() + self.clk_domain.clock = '2.3GHz' + self.clk_domain.voltage_domain = VoltageDomain() + + #mem_size = '32GB' + #self.mem_ranges = [AddrRange('100MB'), # For kernel + # AddrRange(0xC0000000, size=0x100000), # For I/0 + # AddrRange(Addr('4GB'), size = mem_size) # All data + # ] + + + self.mem_ranges = [AddrRange(Addr('2GiB')), # All data + AddrRange(0xC0000000, size=0x100000), # For I/0 + ] + + # Create the main memory bus + # This connects to main memory + self.membus = SystemXBar(width = 64) # 64-byte width + self.membus.badaddr_responder = BadAddr() + self.membus.default = Self.badaddr_responder.pio + + # Set up the system port for functional access from the simulator + self.system_port = self.membus.cpu_side_ports + + self.initFS(self.membus, num_cpus) + + + # Replace these paths with the path to your disk images. + # The first disk is the root disk. The second could be used for swap + # or anything else. + + self.setDiskImages(disk, disk) + + if opts.second_disk: + self.setDiskImages(disk, opts.second_disk) + else: + self.setDiskImages(disk, disk) + + # Change this path to point to the kernel you want to use + self.workload.object_file = kernel + # Options specified on the kernel command line + boot_options = ['earlyprintk=ttyS0', 'console=ttyS0', 'lpj=7999923', + 'root=/dev/hda1'] + + self.workload.command_line = ' '.join(boot_options) + + # Create the CPUs for our system. + self.createCPU(num_cpus) + + # Create the cache heirarchy for the system. + self.createCacheHierarchy() + + # Set up the interrupt controllers for the system (x86 specific) + self.setupInterrupts() + + # self.intrctrl = IntrControl() + + self.createMemoryControllersDDR4() + + if self._host_parallel: + # To get the KVM CPUs to run on different host CPUs + # Specify a different event queue for each CPU + for i,cpu in enumerate(self.cpu): + for obj in cpu.descendants(): + obj.eventq_index = 0 + + # the number of eventqs are set based + # on experiments with few benchmarks + + if len(self.cpu) > 16: + cpu.eventq_index = (i/4) + 1 + else: + cpu.eventq_index = (i/2) + 1 + def getHostParallel(self): + return self._host_parallel + + def totalInsts(self): + return sum([cpu.totalInsts() for cpu in self.cpu]) + + def createCPUThreads(self, cpu): + for c in cpu: + c.createThreads() + + def createCPU(self, num_cpus): + if self._no_kvm: + self.cpu = [AtomicSimpleCPU(cpu_id = i, switched_out = False) + for i in range(num_cpus)] + self.createCPUThreads(self.cpu) + self.mem_mode = 'timing' + + else: + # Note KVM needs a VM and atomic_noncaching + self.cpu = [X86KvmCPU(cpu_id = i) + for i in range(num_cpus)] + self.createCPUThreads(self.cpu) + self.kvm_vm = KvmVM() + self.mem_mode = 'atomic_noncaching' + + self.atomicCpu = [AtomicSimpleCPU(cpu_id = i, + switched_out = True) + for i in range(num_cpus)] + self.createCPUThreads(self.atomicCpu) + + self.timingCpu = [TimingSimpleCPU(cpu_id = i, + switched_out = True) + for i in range(num_cpus)] + + self.createCPUThreads(self.timingCpu) + + def switchCpus(self, old, new): + assert(new[0].switchedOut()) + m5.switchCpus(self, list(zip(old, new))) + + def setDiskImages(self, img_path_1, img_path_2): + disk0 = CowDisk(img_path_1) + disk2 = CowDisk(img_path_2) + self.pc.south_bridge.ide.disks = [disk0, disk2] + + def createCacheHierarchy(self): + # Create an L3 cache (with crossbar) + self.l3bus = L2XBar(width = 64, + snoop_filter = SnoopFilter(max_capacity='32MB')) + + for cpu in self.cpu: + # Create a memory bus, a coherent crossbar, in this case + cpu.l2bus = L2XBar() + + # Create an L1 instruction and data cache + cpu.icache = L1ICache(self._opts) + cpu.dcache = L1DCache(self._opts) + cpu.mmucache = MMUCache() + + # Connect the instruction and data caches to the CPU + cpu.icache.connectCPU(cpu) + cpu.dcache.connectCPU(cpu) + cpu.mmucache.connectCPU(cpu) + + # Hook the CPU ports up to the l2bus + cpu.icache.connectBus(cpu.l2bus) + cpu.dcache.connectBus(cpu.l2bus) + cpu.mmucache.connectBus(cpu.l2bus) + + # Create an L2 cache and connect it to the l2bus + cpu.l2cache = L2Cache(self._opts) + cpu.l2cache.connectCPUSideBus(cpu.l2bus) + + # Connect the L2 cache to the L3 bus + cpu.l2cache.connectMemSideBus(self.l3bus) + + self.l3cache = L3Cache(self._opts) + self.l3cache.connectCPUSideBus(self.l3bus) + + # Connect the L3 cache to the membus + self.l3cache.connectMemSideBus(self.membus) + + def setupInterrupts(self): + for cpu in self.cpu: + # create the interrupt controller CPU and connect to the membus + cpu.createInterruptController() + + # For x86 only, connect interrupts to the memory + # Note: these are directly connected to the memory bus and + # not cached + cpu.interrupts[0].pio = self.membus.mem_side_ports + cpu.interrupts[0].int_requestor = self.membus.cpu_side_ports + cpu.interrupts[0].int_responder = self.membus.mem_side_ports + + # Memory latency: Using the smaller number from [3]: 96ns + def createMemoryControllersDDR4(self): + self._createMemoryControllers(1, DDR4_2400_16x4) + + def _createMemoryControllers(self, num, cls): + + self.mem_ctrl = PolicyManager(range=self.mem_ranges[0]) + # FOR DDR4 + # self.mem_ctrl.tRP = '14.16ns' + # self.mem_ctrl.tRCD_RD = '14.16ns' + # self.mem_ctrl.tRL = '14.16ns' + + # self.loc_mem_ctrl = HBMCtrl() + # self.loc_mem_ctrl.dram = HBM_2000_4H_1x64(range=AddrRange(start = '0', end = '1GiB', masks = [1 << 6], intlvMatch = 0), in_addr_map=False, kvm_map=False, null=True) + # self.loc_mem_ctrl.dram_2 = HBM_2000_4H_1x64(range=AddrRange(start = '0', end = '1GiB', masks = [1 << 6], intlvMatch = 1), in_addr_map=False, kvm_map=False, null=True) + + self.loc_mem_ctrl = MemCtrl() + self.loc_mem_ctrl.dram = DDR4_2400_16x4(range=self.mem_ranges[0], in_addr_map=False, kvm_map=False) + + self.far_mem_ctrl = MemCtrl() + self.far_mem_ctrl.dram = DDR4_2400_16x4(range=self.mem_ranges[0], in_addr_map=False, kvm_map=False) + + self.loc_mem_ctrl.port = self.mem_ctrl.loc_req_port + self.far_mem_ctrl.port = self.mem_ctrl.far_req_port + + self.mem_ctrl.dram_cache_size = "128MiB" + + # self.mem_ctrl = MemCtrl() + # self.mem_ctrl.dram = DDR4_2400_16x4(range=self.mem_ranges[0]) + + def _createKernelMemoryController(self, cls): + return MemCtrl(dram = cls(range = self.mem_ranges[0]), + port = self.membus.mem_side_ports) + + def _getInterleaveRanges(self, rng, num, intlv_low_bit, xor_low_bit): + from math import log + bits = int(log(num, 2)) + if 2**bits != num: + m5.fatal("Non-power of two number of memory controllers") + + intlv_bits = bits + ranges = [ + AddrRange(start=rng.start, + end=rng.end, + intlvHighBit = intlv_low_bit + intlv_bits - 1, + xorHighBit = xor_low_bit + intlv_bits - 1, + intlvBits = intlv_bits, + intlvMatch = i) + for i in range(num) + ] + + return ranges + + def initFS(self, membus, cpus): + self.pc = Pc() + self.workload = X86FsLinux() + + # Constants similar to x86_traits.hh + IO_address_space_base = 0x8000000000000000 + pci_config_address_space_base = 0xc000000000000000 + interrupts_address_space_base = 0xa000000000000000 + APIC_range_size = 1 << 12 + + # North Bridge + self.iobus = IOXBar() + self.bridge = Bridge(delay='50ns') + self.bridge.mem_side_port = self.iobus.cpu_side_ports + self.bridge.cpu_side_port = membus.mem_side_ports + # Allow the bridge to pass through: + # 1) kernel configured PCI device memory map address: address range + # [0xC0000000, 0xFFFF0000). (The upper 64kB are reserved for m5ops.) + # 2) the bridge to pass through the IO APIC (two pages, already + # contained in 1), + # 3) everything in the IO address range up to the local APIC, and + # 4) then the entire PCI address space and beyond. + self.bridge.ranges = \ + [ + AddrRange(0xC0000000, 0xFFFF0000), + AddrRange(IO_address_space_base, + interrupts_address_space_base - 1), + AddrRange(pci_config_address_space_base, + Addr.max) + ] + + # Create a bridge from the IO bus to the memory bus to allow access + # to the local APIC (two pages) + self.apicbridge = Bridge(delay='50ns') + self.apicbridge.cpu_side_port = self.iobus.mem_side_ports + self.apicbridge.mem_side_port = membus.cpu_side_ports + self.apicbridge.ranges = [AddrRange(interrupts_address_space_base, + interrupts_address_space_base + + cpus * APIC_range_size + - 1)] + + # connect the io bus + self.pc.attachIO(self.iobus) + + # Add a tiny cache to the IO bus. + # This cache is required for the classic memory model for coherence + self.iocache = Cache(assoc=8, + tag_latency = 50, + data_latency = 50, + response_latency = 50, + mshrs = 20, + size = '1kB', + tgts_per_mshr = 12, + addr_ranges = self.mem_ranges) + self.iocache.cpu_side = self.iobus.mem_side_ports + self.iocache.mem_side = self.membus.cpu_side_ports + + ############################################### + + # Add in a Bios information structure. + self.workload.smbios_table.structures = [X86SMBiosBiosInformation()] + + # Set up the Intel MP table + base_entries = [] + ext_entries = [] + for i in range(cpus): + bp = X86IntelMPProcessor( + local_apic_id = i, + local_apic_version = 0x14, + enable = True, + bootstrap = (i ==0)) + base_entries.append(bp) + io_apic = X86IntelMPIOAPIC( + id = cpus, + version = 0x11, + enable = True, + address = 0xfec00000) + self.pc.south_bridge.io_apic.apic_id = io_apic.id + base_entries.append(io_apic) + pci_bus = X86IntelMPBus(bus_id = 0, bus_type='PCI ') + base_entries.append(pci_bus) + isa_bus = X86IntelMPBus(bus_id = 1, bus_type='ISA ') + base_entries.append(isa_bus) + connect_busses = X86IntelMPBusHierarchy(bus_id=1, + subtractive_decode=True, parent_bus=0) + ext_entries.append(connect_busses) + pci_dev4_inta = X86IntelMPIOIntAssignment( + interrupt_type = 'INT', + polarity = 'ConformPolarity', + trigger = 'ConformTrigger', + source_bus_id = 0, + source_bus_irq = 0 + (4 << 2), + dest_io_apic_id = io_apic.id, + dest_io_apic_intin = 16) + base_entries.append(pci_dev4_inta) + def assignISAInt(irq, apicPin): + assign_8259_to_apic = X86IntelMPIOIntAssignment( + interrupt_type = 'ExtInt', + polarity = 'ConformPolarity', + trigger = 'ConformTrigger', + source_bus_id = 1, + source_bus_irq = irq, + dest_io_apic_id = io_apic.id, + dest_io_apic_intin = 0) + base_entries.append(assign_8259_to_apic) + assign_to_apic = X86IntelMPIOIntAssignment( + interrupt_type = 'INT', + polarity = 'ConformPolarity', + trigger = 'ConformTrigger', + source_bus_id = 1, + source_bus_irq = irq, + dest_io_apic_id = io_apic.id, + dest_io_apic_intin = apicPin) + base_entries.append(assign_to_apic) + assignISAInt(0, 2) + assignISAInt(1, 1) + for i in range(3, 15): + assignISAInt(i, i) + self.workload.intel_mp_table.base_entries = base_entries + self.workload.intel_mp_table.ext_entries = ext_entries + + entries = \ + [ + # Mark the first megabyte of memory as reserved + X86E820Entry(addr = 0, size = '639kB', range_type = 1), + X86E820Entry(addr = 0x9fc00, size = '385kB', range_type = 2), + # Mark the rest of physical memory as available + X86E820Entry(addr = 0x100000, + size = '%dB' % (self.mem_ranges[0].size() - 0x100000), + range_type = 1), + ] + # Mark [mem_size, 3GB) as reserved if memory less than 3GB, which + # force IO devices to be mapped to [0xC0000000, 0xFFFF0000). Requests + # to this specific range can pass though bridge to iobus. + #entries.append(X86E820Entry(addr = self.mem_ranges[0].size(), + # size='%dB' % (0xC0000000 - self.mem_ranges[0].size()), + # range_type=2)) + + # Reserve the last 16kB of the 32-bit address space for m5ops + entries.append(X86E820Entry(addr = 0xFFFF0000, size = '64kB', + range_type=2)) + + # Add the rest of memory. This is where all the actual data is + #entries.append(X86E820Entry(addr = self.mem_ranges[-1].start, + # size='%dB' % (self.mem_ranges[-1].size()), + # range_type=1)) + + self.workload.e820_table.entries = entries + diff --git a/configs-npb-gapbs-chkpt-restore/system/system_back.py b/configs-npb-gapbs-chkpt-restore/system/system_back.py new file mode 100755 index 0000000000..8a645b918f --- /dev/null +++ b/configs-npb-gapbs-chkpt-restore/system/system_back.py @@ -0,0 +1,397 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2018 The Regents of the University of California +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Jason Lowe-Power + +import m5 +from m5.objects import * +from .fs_tools import * +from .caches import * + + +class MySystem(System): + + def __init__(self, kernel, disk, num_cpus, opts, no_kvm=False): + super(MySystem, self).__init__() + self._opts = opts + self._no_kvm = no_kvm + + self._host_parallel = not self._opts.no_host_parallel + + # Set up the clock domain and the voltage domain + self.clk_domain = SrcClockDomain() + self.clk_domain.clock = '2.3GHz' + self.clk_domain.voltage_domain = VoltageDomain() + + mem_size = '32GB' + self.mem_ranges = [AddrRange('100MB'), # For kernel + AddrRange(0xC0000000, size=0x100000), # For I/0 + AddrRange(Addr('4GB'), size = mem_size) # All data + ] + + + self.mem_ranges = [AddrRange(Addr('3GB')), # All data + AddrRange(0xC0000000, size=0x100000), # For I/0 + ] + + # Create the main memory bus + # This connects to main memory + self.membus = SystemXBar(width = 64) # 64-byte width + self.membus.badaddr_responder = BadAddr() + self.membus.default = Self.badaddr_responder.pio + + # Set up the system port for functional access from the simulator + self.system_port = self.membus.cpu_side_ports + + self.initFS(self.membus, num_cpus) + + + # Replace these paths with the path to your disk images. + # The first disk is the root disk. The second could be used for swap + # or anything else. + + self.setDiskImages(disk, disk) + + if opts.second_disk: + self.setDiskImages(disk, opts.second_disk) + else: + self.setDiskImages(disk, disk) + + # Change this path to point to the kernel you want to use + self.workload.object_file = kernel + # Options specified on the kernel command line + boot_options = ['earlyprintk=ttyS0', 'console=ttyS0', 'lpj=7999923', + 'root=/dev/hda1'] + + self.workload.command_line = ' '.join(boot_options) + + # Create the CPUs for our system. + self.createCPU(num_cpus) + + # Create the cache heirarchy for the system. + self.createCacheHierarchy() + + # Set up the interrupt controllers for the system (x86 specific) + self.setupInterrupts() + + self.createMemoryControllersDDR4() + + if self._host_parallel: + # To get the KVM CPUs to run on different host CPUs + # Specify a different event queue for each CPU + for i,cpu in enumerate(self.cpu): + for obj in cpu.descendants(): + obj.eventq_index = 0 + + # the number of eventqs are set based + # on experiments with few benchmarks + + if len(self.cpu) > 16: + cpu.eventq_index = (i/4) + 1 + else: + cpu.eventq_index = (i/2) + 1 + def getHostParallel(self): + return self._host_parallel + + def totalInsts(self): + return sum([cpu.totalInsts() for cpu in self.cpu]) + + def createCPUThreads(self, cpu): + for c in cpu: + c.createThreads() + + def createCPU(self, num_cpus): + if self._no_kvm: + self.cpu = [AtomicSimpleCPU(cpu_id = i, switched_out = False) + for i in range(num_cpus)] + self.createCPUThreads(self.cpu) + self.mem_mode = 'timing' + + else: + # Note KVM needs a VM and atomic_noncaching + self.cpu = [X86KvmCPU(cpu_id = i) + for i in range(num_cpus)] + self.createCPUThreads(self.cpu) + self.kvm_vm = KvmVM() + self.mem_mode = 'atomic_noncaching' + + self.atomicCpu = [AtomicSimpleCPU(cpu_id = i, + switched_out = True) + for i in range(num_cpus)] + self.createCPUThreads(self.atomicCpu) + + self.timingCpu = [TimingSimpleCPU(cpu_id = i, + switched_out = True) + for i in range(num_cpus)] + + self.createCPUThreads(self.timingCpu) + + def switchCpus(self, old, new): + assert(new[0].switchedOut()) + m5.switchCpus(self, list(zip(old, new))) + + def setDiskImages(self, img_path_1, img_path_2): + disk0 = CowDisk(img_path_1) + disk2 = CowDisk(img_path_2) + self.pc.south_bridge.ide.disks = [disk0, disk2] + + def createCacheHierarchy(self): + # Create an L3 cache (with crossbar) + self.l3bus = L2XBar(width = 64, + snoop_filter = SnoopFilter(max_capacity='32MB')) + + for cpu in self.cpu: + # Create a memory bus, a coherent crossbar, in this case + cpu.l2bus = L2XBar() + + # Create an L1 instruction and data cache + cpu.icache = L1ICache(self._opts) + cpu.dcache = L1DCache(self._opts) + cpu.mmucache = MMUCache() + + # Connect the instruction and data caches to the CPU + cpu.icache.connectCPU(cpu) + cpu.dcache.connectCPU(cpu) + cpu.mmucache.connectCPU(cpu) + + # Hook the CPU ports up to the l2bus + cpu.icache.connectBus(cpu.l2bus) + cpu.dcache.connectBus(cpu.l2bus) + cpu.mmucache.connectBus(cpu.l2bus) + + # Create an L2 cache and connect it to the l2bus + cpu.l2cache = L2Cache(self._opts) + cpu.l2cache.connectCPUSideBus(cpu.l2bus) + + # Connect the L2 cache to the L3 bus + cpu.l2cache.connectMemSideBus(self.l3bus) + + self.l3cache = L3Cache(self._opts) + self.l3cache.connectCPUSideBus(self.l3bus) + + # Connect the L3 cache to the membus + self.l3cache.connectMemSideBus(self.membus) + + def setupInterrupts(self): + for cpu in self.cpu: + # create the interrupt controller CPU and connect to the membus + cpu.createInterruptController() + + # For x86 only, connect interrupts to the memory + # Note: these are directly connected to the memory bus and + # not cached + cpu.interrupts[0].pio = self.membus.mem_side_ports + cpu.interrupts[0].int_requestor = self.membus.cpu_side_ports + cpu.interrupts[0].int_responder = self.membus.mem_side_ports + + # Memory latency: Using the smaller number from [3]: 96ns + def createMemoryControllersDDR4(self): + self._createMemoryControllers(8, DDR4_2400_16x4) + + def _createMemoryControllers(self, num, cls): + kernel_controller = self._createKernelMemoryController(cls) + + ranges = self._getInterleaveRanges(self.mem_ranges[-1], num, 7, 20) + + self.mem_cntrls = [ + MemCtrl(dram = cls(range = ranges[i]), + port = self.membus.mem_side_ports) + for i in range(num) + ] + [kernel_controller] + + def _createKernelMemoryController(self, cls): + return MemCtrl(dram = cls(range = self.mem_ranges[0]), + port = self.membus.mem_side_ports) + + def _getInterleaveRanges(self, rng, num, intlv_low_bit, xor_low_bit): + from math import log + bits = int(log(num, 2)) + if 2**bits != num: + m5.fatal("Non-power of two number of memory controllers") + + intlv_bits = bits + ranges = [ + AddrRange(start=rng.start, + end=rng.end, + intlvHighBit = intlv_low_bit + intlv_bits - 1, + xorHighBit = xor_low_bit + intlv_bits - 1, + intlvBits = intlv_bits, + intlvMatch = i) + for i in range(num) + ] + + return ranges + + def initFS(self, membus, cpus): + self.pc = Pc() + self.workload = X86FsLinux() + + # Constants similar to x86_traits.hh + IO_address_space_base = 0x8000000000000000 + pci_config_address_space_base = 0xc000000000000000 + interrupts_address_space_base = 0xa000000000000000 + APIC_range_size = 1 << 12; + + # North Bridge + self.iobus = IOXBar() + self.bridge = Bridge(delay='50ns') + self.bridge.mem_side_port = self.iobus.cpu_side_ports + self.bridge.cpu_side_port = membus.mem_side_ports + # Allow the bridge to pass through: + # 1) kernel configured PCI device memory map address: address range + # [0xC0000000, 0xFFFF0000). (The upper 64kB are reserved for m5ops.) + # 2) the bridge to pass through the IO APIC (two pages, already + # contained in 1), + # 3) everything in the IO address range up to the local APIC, and + # 4) then the entire PCI address space and beyond. + self.bridge.ranges = \ + [ + AddrRange(0xC0000000, 0xFFFF0000), + AddrRange(IO_address_space_base, + interrupts_address_space_base - 1), + AddrRange(pci_config_address_space_base, + Addr.max) + ] + + # Create a bridge from the IO bus to the memory bus to allow access + # to the local APIC (two pages) + self.apicbridge = Bridge(delay='50ns') + self.apicbridge.cpu_side_port = self.iobus.mem_side_ports + self.apicbridge.mem_side_port = membus.cpu_side_ports + self.apicbridge.ranges = [AddrRange(interrupts_address_space_base, + interrupts_address_space_base + + cpus * APIC_range_size + - 1)] + + # connect the io bus + self.pc.attachIO(self.iobus) + + # Add a tiny cache to the IO bus. + # This cache is required for the classic memory model for coherence + self.iocache = Cache(assoc=8, + tag_latency = 50, + data_latency = 50, + response_latency = 50, + mshrs = 20, + size = '1kB', + tgts_per_mshr = 12, + addr_ranges = self.mem_ranges) + self.iocache.cpu_side = self.iobus.mem_side_ports + self.iocache.mem_side = self.membus.cpu_side_ports + + ############################################### + + # Add in a Bios information structure. + self.workload.smbios_table.structures = [X86SMBiosBiosInformation()] + + # Set up the Intel MP table + base_entries = [] + ext_entries = [] + for i in range(cpus): + bp = X86IntelMPProcessor( + local_apic_id = i, + local_apic_version = 0x14, + enable = True, + bootstrap = (i ==0)) + base_entries.append(bp) + io_apic = X86IntelMPIOAPIC( + id = cpus, + version = 0x11, + enable = True, + address = 0xfec00000) + self.pc.south_bridge.io_apic.apic_id = io_apic.id + base_entries.append(io_apic) + pci_bus = X86IntelMPBus(bus_id = 0, bus_type='PCI ') + base_entries.append(pci_bus) + isa_bus = X86IntelMPBus(bus_id = 1, bus_type='ISA ') + base_entries.append(isa_bus) + connect_busses = X86IntelMPBusHierarchy(bus_id=1, + subtractive_decode=True, parent_bus=0) + ext_entries.append(connect_busses) + pci_dev4_inta = X86IntelMPIOIntAssignment( + interrupt_type = 'INT', + polarity = 'ConformPolarity', + trigger = 'ConformTrigger', + source_bus_id = 0, + source_bus_irq = 0 + (4 << 2), + dest_io_apic_id = io_apic.id, + dest_io_apic_intin = 16) + base_entries.append(pci_dev4_inta) + def assignISAInt(irq, apicPin): + assign_8259_to_apic = X86IntelMPIOIntAssignment( + interrupt_type = 'ExtInt', + polarity = 'ConformPolarity', + trigger = 'ConformTrigger', + source_bus_id = 1, + source_bus_irq = irq, + dest_io_apic_id = io_apic.id, + dest_io_apic_intin = 0) + base_entries.append(assign_8259_to_apic) + assign_to_apic = X86IntelMPIOIntAssignment( + interrupt_type = 'INT', + polarity = 'ConformPolarity', + trigger = 'ConformTrigger', + source_bus_id = 1, + source_bus_irq = irq, + dest_io_apic_id = io_apic.id, + dest_io_apic_intin = apicPin) + base_entries.append(assign_to_apic) + assignISAInt(0, 2) + assignISAInt(1, 1) + for i in range(3, 15): + assignISAInt(i, i) + self.workload.intel_mp_table.base_entries = base_entries + self.workload.intel_mp_table.ext_entries = ext_entries + + entries = \ + [ + # Mark the first megabyte of memory as reserved + X86E820Entry(addr = 0, size = '639kB', range_type = 1), + X86E820Entry(addr = 0x9fc00, size = '385kB', range_type = 2), + # Mark the rest of physical memory as available + X86E820Entry(addr = 0x100000, + size = '%dB' % (self.mem_ranges[0].size() - 0x100000), + range_type = 1), + ] + # Mark [mem_size, 3GB) as reserved if memory less than 3GB, which + # force IO devices to be mapped to [0xC0000000, 0xFFFF0000). Requests + # to this specific range can pass though bridge to iobus. + entries.append(X86E820Entry(addr = self.mem_ranges[0].size(), + size='%dB' % (0xC0000000 - self.mem_ranges[0].size()), + range_type=2)) + + # Reserve the last 16kB of the 32-bit address space for m5ops + entries.append(X86E820Entry(addr = 0xFFFF0000, size = '64kB', + range_type=2)) + + # Add the rest of memory. This is where all the actual data is + entries.append(X86E820Entry(addr = self.mem_ranges[-1].start, + size='%dB' % (self.mem_ranges[-1].size()), + range_type=1)) + + self.workload.e820_table.entries = entries + diff --git a/configs/example/gem5_library/checkpoints/riscv-hello-save-checkpoint.py b/configs/example/gem5_library/checkpoints/riscv-hello-save-checkpoint.py index 234153a57f..08c7801960 100644 --- a/configs/example/gem5_library/checkpoints/riscv-hello-save-checkpoint.py +++ b/configs/example/gem5_library/checkpoints/riscv-hello-save-checkpoint.py @@ -53,6 +53,36 @@ from gem5.components.cachehierarchies.classic.no_cache import NoCache from gem5.components.processors.simple_processor import SimpleProcessor from gem5.simulate.simulator import Simulator +from gem5.components.cachehierarchies.ruby.mesi_two_level_cache_hierarchy import ( + MESITwoLevelCacheHierarchy, +) +#from gem5.components.cachehierarchies.ruby.mesi_three_level_cache_hierarchy import ( +# MESIThreeLevelCacheHierarchy, +#) + +""" +cache_hierarchy = MESIThreeLevelCacheHierarchy( + l1i_size = "32KiB", + l1i_assoc = 8, + l1d_size = "32KiB", + l1d_assoc = 8, + l2_size = "512KiB", + l2_assoc = 8, + l3_size = "32MiB", + l3_assoc = 32, + num_l3_banks=1, + ) +""" + +cache_hierarchy = MESITwoLevelCacheHierarchy( + l1d_size="32kB", + l1d_assoc=8, + l1i_size="32kB", + l1i_assoc=8, + l2_size="256kB", + l2_assoc=16, + num_l2_banks=2, +) parser = argparse.ArgumentParser() @@ -68,17 +98,17 @@ # This check ensures the gem5 binary is compiled to the RISCV ISA target. # If not, an exception will be thrown. -requires(isa_required=ISA.RISCV) +#requires(isa_required=ISA.RISCV) # In this setup we don't have a cache. `NoCache` can be used for such setups. -cache_hierarchy = NoCache() +#cache_hierarchy = NoCache() # We use a single channel DDR3_1600 memory system memory = SingleChannelDDR3_1600(size="32MB") # We use a simple Timing processor with one core. processor = SimpleProcessor( - cpu_type=CPUTypes.TIMING, isa=ISA.RISCV, num_cores=1 + cpu_type=CPUTypes.O3, isa=ISA.RISCV, num_cores=1 ) # The gem5 library simble board which can be used to run simple SE-mode diff --git a/configs/example/gem5_library/x86-gapbs-benchmarks.py b/configs/example/gem5_library/x86-gapbs-benchmarks.py index c20d2ea4cc..902596ec6a 100644 --- a/configs/example/gem5_library/x86-gapbs-benchmarks.py +++ b/configs/example/gem5_library/x86-gapbs-benchmarks.py @@ -46,7 +46,7 @@ --size ``` """ - +#./build/X86/gem5.opt configs/example/gem5_library/x86-gabps-benchmarks.py --benchmark bc --synthetic 1 --size 4 import argparse import time import sys @@ -195,7 +195,7 @@ ) exit(-1) - command = f"./{args.benchmark} -g {args.size}\n" + command = "./{} -n 1 -g {}\n".format(args.benchmark, args.size) else: command = f"./{args.benchmark} -sf ../{args.size}" diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py index d3c2efbb3f..5438dad889 100644 --- a/configs/ruby/Ruby.py +++ b/configs/ruby/Ruby.py @@ -41,7 +41,7 @@ import m5 from m5.objects import * from m5.defines import buildEnv -from m5.util import addToPath, fatal +from m5.util import addToPath, fatal, warn from gem5.isas import ISA from gem5.runtime import get_runtime_isa @@ -293,9 +293,13 @@ def create_system( # Create a backing copy of physical memory in case required if options.access_backing_store: ruby.access_backing_store = True - ruby.phys_mem = SimpleMemory( - range=system.mem_ranges[0], in_addr_map=False - ) + if len(system.mem_ranges) > 1: + warn("Backing store not supported for multiple memory ranges") + # Note: to make this support multiple memory ranges you need to create + # one SimpleMemory for each physical memory range + ruby.phys_mem = [ + SimpleMemory(range=system.mem_ranges[0], in_addr_map=False) + ] def create_directories(options, bootmem, ruby_system, system): diff --git a/src/cpu/kvm/vm.cc b/src/cpu/kvm/vm.cc index e714a40b22..6c73ef4f23 100644 --- a/src/cpu/kvm/vm.cc +++ b/src/cpu/kvm/vm.cc @@ -467,10 +467,11 @@ KvmVM::setUserMemoryRegion(uint32_t slot, m.userspace_addr = (__u64)host_addr; if (ioctl(KVM_SET_USER_MEMORY_REGION, (void *)&m) == -1) { + perror("Error!"); panic("Failed to setup KVM memory region:\n" - "\tHost Address: 0x%p\n" - "\tGuest Address: 0x%llx\n", - "\tSize: %ll\n", + "\tHost Address: 0x%llx\n" + "\tGuest Address: 0x%llx\n" + "\tSize: %llu\n" "\tFlags: 0x%x\n", m.userspace_addr, m.guest_phys_addr, m.memory_size, m.flags); diff --git a/src/cpu/o3/probe/O3LooppointAnalysis.py b/src/cpu/o3/probe/O3LooppointAnalysis.py new file mode 100644 index 0000000000..949fe0b0db --- /dev/null +++ b/src/cpu/o3/probe/O3LooppointAnalysis.py @@ -0,0 +1,28 @@ +from m5.params import * +from m5.objects.Probe import ProbeListenerObject +from m5.objects import SimObject +from m5.util.pybind import * + +class O3LooppointAnalysis(ProbeListenerObject): + + type = "O3LooppointAnalysis" + cxx_header = "cpu/o3/probe/o3looppoint_analysis.hh" + cxx_class = "gem5::o3::O3LooppointAnalysis" + + ptmanager = Param.O3LooppointAnalysisManager("the PcCountAnalsi manager") + validAddrRangeStart = Param.Addr(0, "the starting address of the valid " + "insturction address range") + validAddrRangeSize = Param.Addr(0, "the size of the valid address range") + +class O3LooppointAnalysisManager(SimObject): + + type = "O3LooppointAnalysisManager" + cxx_header = "cpu/o3/probe/o3looppoint_analysis.hh" + cxx_class = "gem5::o3::O3LooppointAnalysisManager" + + cxx_exports = [ + PyBindMethod("getCounter"), + PyBindMethod("getPcCount"), + PyBindMethod("getMostRecentPc"), + PyBindMethod("getCurrentPc") + ] diff --git a/src/cpu/o3/probe/SConscript b/src/cpu/o3/probe/SConscript index 6039ef2eb9..94fc552a10 100644 --- a/src/cpu/o3/probe/SConscript +++ b/src/cpu/o3/probe/SConscript @@ -45,3 +45,8 @@ if not env['CONF']['USE_NULL_ISA']: SimObject('ElasticTrace.py', sim_objects=['ElasticTrace'], tags='protobuf') Source('elastic_trace.cc', tags='protobuf') DebugFlag('ElasticTrace', tags='protobuf') + + SimObject('O3LooppointAnalysis.py', sim_objects=['O3LooppointAnalysis','O3LooppointAnalysisManager']) + Source('o3looppoint_analysis.cc') + + DebugFlag("O3LooppointAnalysis") \ No newline at end of file diff --git a/src/cpu/o3/probe/o3looppoint_analysis.cc b/src/cpu/o3/probe/o3looppoint_analysis.cc new file mode 100644 index 0000000000..16111dc99a --- /dev/null +++ b/src/cpu/o3/probe/o3looppoint_analysis.cc @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2023 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/probe/o3looppoint_analysis.hh" +#include "cpu/o3/dyn_inst.hh" + + +namespace gem5 +{ + +namespace o3 +{ + +O3LooppointAnalysis::O3LooppointAnalysis(const O3LooppointAnalysisParams &p) + : ProbeListenerObject(p), + manager(p.ptmanager), + validAddrLowerBound(p.validAddrRangeStart), + validAddrUpperBound(p.validAddrRangeStart+p.validAddrRangeSize) +{ + DPRINTF(O3LooppointAnalysis, "the valid address range start from %i to " + " %i \n", validAddrLowerBound, validAddrUpperBound); +} + +/** + * ProbeListenerArg generates a listener for the class of Arg and the + * class type T which is the class containing the function that notify will + * call. + * + * Note that the function is passed as a pointer on construction. + */ + +void +O3LooppointAnalysis::regProbeListeners() +{ + // connect the probe listener with the probe "RetriedInstsPC" in the + // corresponding core. + // when "RetiredInstsPC" notifies the probe listener, then the function + // 'check_pc' is automatically called + typedef ProbeListenerArg + O3LooppointAnalysisListener; + listeners.push_back(new O3LooppointAnalysisListener(this, "Commit", + &O3LooppointAnalysis::checkPc)); +} + +void +O3LooppointAnalysis::checkPc(const DynInstConstPtr& dynInst) { + + auto &pcstate = dynInst->pcState().as(); + if (dynInst->staticInst->isMicroop() && !dynInst->staticInst->isLastMicroop()) + return; + if(validAddrUpperBound!=0) { + if(pcstate.pc() < validAddrLowerBound || pcstate.pc() > validAddrUpperBound) + return; + } + if (dynInst->staticInst->isControl() && dynInst->staticInst->isDirectCtrl() && dynInst->tcBase()->getIsaPtr()->inUserMode()) { + if(pcstate.npc() < pcstate.pc()) + manager->countPc(pcstate.npc()); + } +} + +O3LooppointAnalysisManager::O3LooppointAnalysisManager(const O3LooppointAnalysisManagerParams &p) + : SimObject(p), + currentPc(0) +{ + +} + +void +O3LooppointAnalysisManager::countPc(const Addr pc) +{ + if (counter.find(pc) == counter.end()){ + counter.insert(std::make_pair(pc,std::make_pair(Addr(0), curTick()))); + } + else{ + ++counter.find(pc)->second.first; + // update the tick when this pc is last accessed + counter.find(pc)->second.second = curTick(); + } + currentPc = pc; + auto it = std::find_if(mostRecentPc.begin(), mostRecentPc.end(), + [&pc](const std::pair& p) { return p.first == pc; }); + if (it == mostRecentPc.end()) { + // If pc is not in the list, then add it to the front of the list + while (mostRecentPc.size() >= 5) { + mostRecentPc.pop_back(); + } + mostRecentPc.push_front(std::make_pair(pc,curTick())); + } else { + if (it != mostRecentPc.begin()) { + // If pc is in the list, but not at the front, then move it to the + // front of the list + mostRecentPc.push_front(*it); + mostRecentPc.erase(it); + } + it->second = curTick(); + } +} + +} // namespace o3 +} // namespace gem5 diff --git a/src/cpu/o3/probe/o3looppoint_analysis.hh b/src/cpu/o3/probe/o3looppoint_analysis.hh new file mode 100644 index 0000000000..94d323e378 --- /dev/null +++ b/src/cpu/o3/probe/o3looppoint_analysis.hh @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2023 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_SIMPLE_PROBES_O3LOOPPOINT_ANALYSIS_HH__ +#define __CPU_SIMPLE_PROBES_O3LOOPPOINT_ANALYSIS_HH__ + +#include +#include + +#include "params/O3LooppointAnalysis.hh" +#include "params/O3LooppointAnalysisManager.hh" +#include "sim/probe/probe.hh" +#include "cpu/simple_thread.hh" +#include "arch/generic/pcstate.hh" +#include "cpu/probes/pc_count_pair.hh" +#include "debug/O3LooppointAnalysis.hh" +#include "cpu/o3/dyn_inst_ptr.hh" + +namespace gem5 +{ + +namespace o3 +{ +class O3LooppointAnalysis : public ProbeListenerObject +{ + public: + O3LooppointAnalysis(const O3LooppointAnalysisParams ¶ms); + + virtual void regProbeListeners(); + + void checkPc(const DynInstConstPtr& dynInst); + + private: + + O3LooppointAnalysisManager *manager; + Addr validAddrLowerBound; + Addr validAddrUpperBound; +}; + +class O3LooppointAnalysisManager : public SimObject +{ + public: + O3LooppointAnalysisManager(const O3LooppointAnalysisManagerParams ¶ms); + void countPc(Addr pc); + + private: + /** + * a set of Program Counter addresses that should notify the + * PcCounterTrackerManager for + * counter maps addresses to a pair of + * counter and the last tick the address was accessed + */ + std::map> counter; + std::list> mostRecentPc; + Addr currentPc; + + + public: + std::map> + getCounter() const + { + return counter; + } + + // returns a pair of the count and last tick + // the count was incremented + std::pair + getPcCount(Addr pc) const + { + if(counter.find(pc) != counter.end()) { + return counter.find(pc)->second; + } + return std::make_pair(-1, -1); + } + + // returns a vector of the most recently + // accessed PCs + std::vector> + getMostRecentPc() const + { + std::vector> recent_pcs; + for (auto it = mostRecentPc.begin(); it != mostRecentPc.end(); it++) { + recent_pcs.push_back(*it); + } + return recent_pcs; + } + + Addr + getCurrentPc() const + { + return currentPc; + } + +}; + + + +} // namespace o3 +} // namespace gem5 + +#endif // __CPU_SIMPLE_PROBES_O3LOOPPOINT_ANALYSIS_HH__ diff --git a/src/cpu/probes/pc_count_pair.hh b/src/cpu/probes/pc_count_pair.hh index fd6bc639fe..3d25b852b5 100644 --- a/src/cpu/probes/pc_count_pair.hh +++ b/src/cpu/probes/pc_count_pair.hh @@ -86,9 +86,9 @@ class PcCountPair { size_t operator()(const PcCountPair& item) const { - size_t xHash = std::hash()(item.pc); + size_t xHash = std::hash()(item.pc); size_t yHash = std::hash()(item.count); - return xHash * 2 + yHash; + return xHash ^ yHash << 32; } }; diff --git a/src/cpu/testers/dr_trace_player/DRTracePlayer.py b/src/cpu/testers/dr_trace_player/DRTracePlayer.py new file mode 100644 index 0000000000..fcc014be40 --- /dev/null +++ b/src/cpu/testers/dr_trace_player/DRTracePlayer.py @@ -0,0 +1,106 @@ +# Copyright (c) 2022 The Regents of the University of California. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +from m5.params import * +from m5.proxy import * + +from m5.objects.ClockedObject import ClockedObject +from m5.objects.SimObject import SimObject + + +class DRTraceReader(SimObject): + """This is a trace reader which is shared between possibly many different + trace players. + """ + + type = "DRTraceReader" + cxx_class = "gem5::DRTraceReader" + cxx_header = "cpu/testers/dr_trace_player/trace_reader.hh" + + directory = Param.String("Directory which contains the memtrace files") + + num_players = Param.Int("Total number of players connected to this reader") + + # Since this is the thing which sees the timestamps, this is what will do + # the "context switches." So, if we want to add different context switch + # options, this is the place. + + +class DRTracePlayer(ClockedObject): + """This is a trace player object. One of these represents a "core." + + You can limit the amount of ILP by using the `max_ipc` and/or the + `max_outstanding_reqs` parameters. If these are set to 0, then there is no + limit. + + This model assumes that instructions are executed in order, except for + memory instructions. I.e., instructions are executed sequentially, the + memory instructions are sent to memory sequentially, but other + instructions can execute (including memory instructions) while prior + instructions are waiting for memory. + + When used with caches, `send_data` should be true. + + The addresses are virtual-ish addresses in the trace. Currently, there is + no address translation. Instead, if the backing address space (i.e., main + memory) has a limited range you should set the `compress_address_range` to + be the backing memory's address range. + You can set memory range to be much larger when using `is_null=True` on + the abstract memory. + """ + + type = "DRTracePlayer" + cxx_class = "gem5::DRTracePlayer" + cxx_header = "cpu/testers/dr_trace_player/trace_player.hh" + + # Port used for sending requests and receiving responses + port = RequestPort("This port sends requests and receives responses") + + # System used to determine the mode of the memory system + system = Param.System(Parent.any, "System this generator is part of") + + reader = Param.DRTraceReader("The reader for this player") + + max_ipc = Param.Int( + 1, + "Max number of instructions per cycle. Zero means no limit.", + ) + + max_outstanding_reqs = Param.Int( + 16, + "Max number of memory instructions outstanding. Zero means no limit.", + ) + + send_data = Param.Bool( + False, + "If true, this player will send dummy data on writes and make space " + "for reads. If false, system.memory.null should be true.", + ) + + compress_address_range = Param.AddrRange( + 0, "Compress the addresses into the given range if valid." + ) diff --git a/src/cpu/testers/dr_trace_player/SConscript b/src/cpu/testers/dr_trace_player/SConscript new file mode 100644 index 0000000000..2881fba8b8 --- /dev/null +++ b/src/cpu/testers/dr_trace_player/SConscript @@ -0,0 +1,35 @@ +# Copyright (c) 2022 The Regents of the University of California. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +Import("*") + +SimObject("DRTracePlayer.py", sim_objects=["DRTracePlayer", "DRTraceReader"]) + +Source("trace_player.cc") +Source("trace_reader.cc") + +DebugFlag("DRTrace") diff --git a/src/cpu/testers/dr_trace_player/trace_player.cc b/src/cpu/testers/dr_trace_player/trace_player.cc new file mode 100644 index 0000000000..4ee814f94f --- /dev/null +++ b/src/cpu/testers/dr_trace_player/trace_player.cc @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2022 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/testers/dr_trace_player/trace_player.hh" + +#include "base/trace.hh" +#include "debug/DRTrace.hh" +#include "sim/sim_exit.hh" +#include "sim/system.hh" + +namespace gem5 +{ + +DRTracePlayer::DRTracePlayer(const Params ¶ms) : + ClockedObject(params), + executeNextInstEvent([this]{ executeNextInst(); }, + name()+".exec_next_event"), + retryExecuteInstEvent([this]{ retryExecuteInst(); }, + name()+".retry_exec_event"), + reader(params.reader), + playerId(0), + requestorId(params.system->getRequestorId(this)), + maxOutstandingMemReqs(params.max_outstanding_reqs), + maxInstsPerCycle(params.max_ipc), + compressAddressRange(params.compress_address_range), + cacheLineSize(params.system->cacheLineSize()), + port(name() + ".port", *this), + stats(this) +{ + +} + +void +DRTracePlayer::startup() +{ + // Get the first instruction and schedule it to be run in the first cycle + schedule(executeNextInstEvent, 0); +} + +void +DRTracePlayer::executeNextInst() +{ + assert(!stalled); + nextRef = reader->getNextTraceReference(playerId); + tryExecuteInst(nextRef); +} + +void +DRTracePlayer::retryExecuteInst() +{ + assert(stalled); + stalled = false; + tryExecuteInst(nextRef); +} + +void +DRTracePlayer::tryExecuteInst(DRTraceReader::TraceRef &cur_ref) +{ + assert(!stalled); + if (!cur_ref.isValid) { + // End of trace for this player exit the simulation + // TODO: Move this to when the last instruction is completed + exitSimLoopNow("End of DRTrace"); + } + + DPRINTF(DRTrace, "Exec reference pc: %0#x, addr: %0#x, size: %d, " + "%s, type: %d, valid: %d\n", cur_ref.pc, cur_ref.addr, + cur_ref.size, cur_ref.isMemRef() ? "memory" : "other", + cur_ref.type, cur_ref.isValid); + + stalled = executeGenericInst(cur_ref); + if (stalled) { + return; // Note: executeGenericInst scheduled the event + } + + if (cur_ref.isMemRef()) { + stalled = executeMemInst(cur_ref); + if (stalled) { + return; // Note: recvRetry will schedule the retry event + } + } + + // If we got here, then we know there are more instructions to execute + // this cycle + assert(!executeNextInstEvent.scheduled()); + assert(!retryExecuteInstEvent.scheduled()); + schedule(executeNextInstEvent, curTick()); +} + +void +DRTracePlayer::scheduleInstRetry() +{ + assert(!executeNextInstEvent.scheduled()); + if (!retryExecuteInstEvent.scheduled()) { + schedule(retryExecuteInstEvent, nextCycle()); + numExecutingInsts = 0; + } else { + assert(numExecutingInsts == 0); + } +} + +bool +DRTracePlayer::executeGenericInst(DRTraceReader::TraceRef &cur_inst) +{ + if (maxInstsPerCycle && (numExecutingInsts++ > maxInstsPerCycle)) { + DPRINTF(DRTrace, "Stalling for instruction limit\n"); + scheduleInstRetry(); + return true; // Stall for a cycle + } + + if (cur_inst.isInstRef()) { + assert(cur_inst.pc != 0); + curPC = cur_inst.pc; + stats.numInsts++; + } + + return false; +} + +bool +DRTracePlayer::executeMemInst(DRTraceReader::TraceRef &mem_ref) +{ + assert(mem_ref.addr != 0); + + if (maxOutstandingMemReqs && + (numOutstandingMemReqs + 1 > maxOutstandingMemReqs)) { + DPRINTF(DRTrace, "Stalling for outstanding memory limit\n"); + stats.memStalls++; + return true; // Will be unstalled in recvResponse + } + + if (!trySendMemRef(mem_ref)) { + stats.numMemInsts++; + return false; + } else { + return true; + } +} + +bool +DRTracePlayer::trySendMemRef(DRTraceReader::TraceRef &mem_ref) +{ + // split_pkt will be nullptr if not a split req + auto [pkt, split_pkt] = getPacket(mem_ref); + + // ensure that currently we are not in process of + // retrying to send the second part of a previously + // stalled request to avoid duplicate first pkt in + // the memory system. + // Also, the assumption is that we cannot start a + // new memory request in parallel. + if (!retrySplitPkt) { + DPRINTF(DRTrace, "Trying to send %s\n", pkt->print()); + + if (!port.sendTimingReq(pkt)) { + DPRINTF(DRTrace, "Failed to send pkt\n"); + if (stats.memStallStart == 0) { + stats.memStallStart = curTick(); + } + delete pkt; + delete split_pkt; + + // return true if we have to stall on the first pkt + // irrespective of if this is a split req + return true; + } else { + numOutstandingMemReqs++; + stats.outstandingMemReqs.sample(numOutstandingMemReqs); + stats.latencyTracker[pkt] = curTick(); + if (split_pkt == nullptr) { + // if this is not a split req, we can + // return here + return false; + } + } + } else { + // we should delete the first pkt here + // if we are only trying to resend the + // second pkt + delete pkt; + } + + DPRINTF(DRTrace, "Trying to send split %s\n", split_pkt->print()); + + // if the first pkt is sent out, and the current + // request is a split request, try to send out + // the second pkt + if (!port.sendTimingReq(split_pkt)) { + DPRINTF(DRTrace, "Failed to send pkt (split pkt) \n"); + if (stats.memStallStart == 0) { + stats.memStallStart = curTick(); + } + delete split_pkt; + // also remember that we only need to retry on second part of + // the split pkt + retrySplitPkt = true; + return true; + } else { + numOutstandingMemReqs++; + stats.outstandingMemReqs.sample(numOutstandingMemReqs); + stats.latencyTracker[split_pkt] = curTick(); + retrySplitPkt = false; + // At this point, we are sure that both pkts of the split req + // are received by the port + return false; + } +} + +std::tuple +DRTracePlayer::getPacket(DRTraceReader::TraceRef &mem_ref) +{ + Request::Flags flags = Request::PHYSICAL; + if (mem_ref.type == DRTraceReader::TraceRef::PREFETCH) { + flags = flags | Request::PREFETCH; + } + + Addr addr = mem_ref.addr; + if (compressAddressRange.size()) { + addr -= compressAddressRange.start(); + addr %= compressAddressRange.size(); + } + + bool split_req = false; + unsigned size = mem_ref.size; + Addr split_addr = roundDown(addr + size - 1, cacheLineSize); + if (split_addr > addr) { + DPRINTF(DRTrace, "Split pkt (crosses cache line boundary) created\n"); + size = split_addr - addr; + split_req = true; + } + + // Create new request + RequestPtr req = std::make_shared(addr, size, flags, + requestorId); + req->setPC(curPC); + + MemCmd cmd; + if (mem_ref.type == DRTraceReader::TraceRef::READ || + mem_ref.type == DRTraceReader::TraceRef::PREFETCH) { + cmd = MemCmd::ReadReq; + } else { + assert(mem_ref.type == DRTraceReader::TraceRef::WRITE); + cmd = MemCmd::WriteReq; + } + // Embed it in a packet + PacketPtr pkt = new Packet(req, cmd); + + if (params().send_data) { + uint8_t* pkt_data = new uint8_t[req->getSize()]; + pkt->dataDynamic(pkt_data); + + if (cmd.isWrite()) { + std::fill_n(pkt_data, req->getSize(), (uint8_t)requestorId); + } + } + + PacketPtr split_pkt = nullptr; + + // In case of split packets when we want to send two requests. + // For the second request, the starting address + // will be split_addr and the size will be cacheLineSize - size + + if (split_req) { + + // Create the split request + RequestPtr split_req = std::make_shared(split_addr, + cacheLineSize - size, flags, requestorId); + split_req->setPC(curPC); + + // Embed it in a packet + split_pkt = new Packet(split_req, cmd); + + if (params().send_data) { + uint8_t* split_pkt_data = new uint8_t[split_req->getSize()]; + split_pkt->dataDynamic(split_pkt_data); + + if (cmd.isWrite()) { + std::fill_n(split_pkt_data, split_req->getSize(), + (uint8_t)requestorId); + } + } + } + + return {pkt, split_pkt}; +} + +Port & +DRTracePlayer::getPort(const std::string &if_name, PortID idx) +{ + if (if_name == "port") { + return port; + } else { + return ClockedObject::getPort(if_name, idx); + } +} + +void +DRTracePlayer::recvReqRetry() +{ + DPRINTF(DRTrace, "Received retry request\n"); + assert(stalled); + + stats.memStalledTime.sample(curTick() - stats.memStallStart); + stats.memStallStart = 0; + + scheduleInstRetry(); +} + +bool +DRTracePlayer::recvTimingResp(PacketPtr pkt) +{ + DPRINTF(DRTrace, "Received response for %s\n", pkt->print()); + numOutstandingMemReqs--; + + stats.memLatency.sample(curTick() - stats.latencyTracker[pkt]); + stats.latencyTracker.erase(pkt); + + delete pkt; + + if (stalled) { + scheduleInstRetry(); + } + + return true; +} + +DRTracePlayer::Stats::Stats(statistics::Group *parent) : + statistics::Group(parent), + ADD_STAT(numInsts, statistics::units::Count::get(), + "Number of instructions executed (not counting memory)"), + ADD_STAT(numMemInsts, statistics::units::Count::get(), + "Number of memory instructions executed"), + ADD_STAT(memStalledTime, statistics::units::Tick::get(), + "Total time stalled for memory each time stalled"), + ADD_STAT(memLatency, statistics::units::Tick::get(), + "Latency for each memory access"), + ADD_STAT(memStalls, statistics::units::Count::get(), + "Number of times stalled for outstanding memory limit"), + ADD_STAT(instStalls, statistics::units::Count::get(), + "Number of times stalled for IPC limit"), + ADD_STAT(outstandingMemReqs, statistics::units::Count::get(), + "Number of outstanding requests for each new request") +{ + memStalledTime + .init(16) + .flags(statistics::pdf | statistics::dist); + memLatency + .init(16) + .flags(statistics::pdf | statistics::dist); + outstandingMemReqs + .init(16) + .flags(statistics::pdf | statistics::dist); +} + +} // namespace gem5 diff --git a/src/cpu/testers/dr_trace_player/trace_player.hh b/src/cpu/testers/dr_trace_player/trace_player.hh new file mode 100644 index 0000000000..8f614054b6 --- /dev/null +++ b/src/cpu/testers/dr_trace_player/trace_player.hh @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2022 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef __CPU_TESTERS_DR_TRACE_PLAYER_TRACE_PLAYER_HH__ +#define __CPU_TESTERS_DR_TRACE_PLAYER_TRACE_PLAYER_HH__ + +/** + * @file + * Contains the player for dynamario traces + * This object works with the trace reader to play dynamorio traces + */ + +#include + +#include "base/addr_range.hh" +#include "cpu/testers/dr_trace_player/trace_reader.hh" +#include "mem/port.hh" +#include "params/DRTracePlayer.hh" +#include "sim/clocked_object.hh" + +namespace gem5 +{ + +/** + * An object to play dynamorio traces. + * This object represents on "core." The core can execute instructions from + * multiple different threads. The trace reader acts as the scheduler and + * chooses which cores will execute which thread and when. + * The cores (players) must request the next item from the centralized trace + * reader. + */ +class DRTracePlayer : public ClockedObject +{ + private: + // Events + EventFunctionWrapper executeNextInstEvent; + EventFunctionWrapper retryExecuteInstEvent; + + // Parameters + DRTraceReader *reader; + int playerId; + int requestorId; + int maxOutstandingMemReqs; + int maxInstsPerCycle; + AddrRange compressAddressRange; + int cacheLineSize; + + // variable to keep track of retries for split pkts + bool retrySplitPkt = false; + + // State + bool stalled = false; + Addr curPC = 0; + DRTraceReader::TraceRef nextRef; + int numExecutingInsts = 0; + int numOutstandingMemReqs = 0; + + /** + * @brief Take the current reference in nextRef and try to execute it. + * + * The instruction may no be able to be executed (stalled), if so, this + * function will ensure the correct events will be scheduled. + * + * @param cur_ref the instruction to execute + */ + void tryExecuteInst(DRTraceReader::TraceRef &cur_ref); + + /** + * @brief Called from the similarly named event. + * + * Gets a new instruction and calls `tryExecuteInst` + */ + void executeNextInst(); + + /** + * @brief Like `executeNextInst`, but does not get a new instruction + */ + void retryExecuteInst(); + + /** + * @brief Helper function to schedule instruction retries + */ + void scheduleInstRetry(); + + /** + * @brief Do the timing execution for a generic instruction + * + * This should be called for *all* instructions that are executed. + * + * @param cur_inst The instruction to execute + * @return true if we should now stall (retry will be scheduled) + * @return false if we can execute more instructions this cycle + */ + bool executeGenericInst(DRTraceReader::TraceRef &cur_inst); + + /** + * @brief Do the timing execution for a memory instruction + * + * @param mem_ref The memory instruction to execute + * @return true if we should now stall (retry will be scheduled) + * @return false if we can execute more instructions this cycle + */ + bool executeMemInst(DRTraceReader::TraceRef &mem_ref); + + void recvReqRetry(); + + bool recvTimingResp(PacketPtr pkt); + + std::tuple + getPacket(DRTraceReader::TraceRef &mem_ref); + + /** + * @brief Send a request to memory based on the trace + * + * @param mem_ref + * @return true if the port is stalled + * @return false if the port accepted the packet + */ + bool trySendMemRef(DRTraceReader::TraceRef &mem_ref); + + public: + PARAMS(DRTracePlayer); + DRTracePlayer(const Params ¶ms); + + void startup() override; + + Port &getPort(const std::string &if_name, + PortID idx=InvalidPortID) override; + + private: + class DataPort : public RequestPort + { + public: + DataPort(const std::string &name, DRTracePlayer &player) : + RequestPort(name, &player), player(player) + { } + + protected: + void recvReqRetry() override { player.recvReqRetry(); } + + bool recvTimingResp(PacketPtr pkt) override + { return player.recvTimingResp(pkt); } + + void recvTimingSnoopReq(PacketPtr pkt) override { } + + void recvFunctionalSnoop(PacketPtr pkt) override { } + + Tick recvAtomicSnoop(PacketPtr pkt) override { return 0; } + + private: + DRTracePlayer &player; + }; + + DataPort port; + + struct Stats : public statistics::Group + { + Stats(statistics::Group *parent); + + statistics::Scalar numInsts; + statistics::Scalar numMemInsts; + + statistics::Histogram memStalledTime; + statistics::Histogram memLatency; + + statistics::Scalar memStalls; + statistics::Scalar instStalls; + + statistics::Histogram outstandingMemReqs; + + Tick memStallStart = 0; + std::map latencyTracker; + } stats; +}; + +} // namespace gem5 + +#endif //__CPU_TESTERS_DR_TRACE_PLAYER_TRACE_PLAYER_HH__ diff --git a/src/cpu/testers/dr_trace_player/trace_reader.cc b/src/cpu/testers/dr_trace_player/trace_reader.cc new file mode 100644 index 0000000000..afaa3950da --- /dev/null +++ b/src/cpu/testers/dr_trace_player/trace_reader.cc @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2022 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/testers/dr_trace_player/trace_reader.hh" + +#include +#include +#include +#include + +#include + +#include "base/trace.hh" +#include "debug/DRTrace.hh" + +namespace gem5 +{ + +DRTraceReader::DRTraceReader(const Params ¶ms) : + SimObject(params), + directory(params.directory) +{ + // Check to make sure the directory exists and has files in it named + // *.memtrace.gz + fatal_if(!is_directory(directory), "DRTraceReader requires the " + "directory parameter to point to valid directory. %s is not a" + "directory", directory.string().c_str()); + + for (auto const& dir_entry : + std::filesystem::directory_iterator{directory}) { + // get all files that end with ".memtrace.gz" + std::filesystem::path p(dir_entry); + if (p.extension().string() == std::string(".gz") && + p.stem().extension().string() == std::string(".memtrace")) { + filenames.emplace_back(p.string()); + } + } + + fatal_if(filenames.empty(), "Did not find any trace files in %s. Make sure" + " you pass a directory which has files *.memtrace.gz to " + "DRTraceReader.", directory.string().c_str()); + + DPRINTF(DRTrace, "Found %d trace files in %s\n", + filenames.size(), directory.string().c_str()); + + currentTids.resize(params.num_players, -1); +} + +// TODO: Close all files on exit + +void +DRTraceReader::init() +{ + // open the files + for (auto const &file : filenames) { + gzFile fdz = gzopen(file.c_str(), "rb"); + fatal_if(!fdz, "Could not open the file %s.", file); + traceFiles.push_back(fdz); + } + timestamps.resize(traceFiles.size(), 0); + + // Get the first timestamp in each file. + for (int i = 0; i < traceFiles.size(); i++) { + DRTraceEntry raw_entry = _getNextEntry(i); + while (raw_entry.size != TRACE_MARKER_TYPE_TIMESTAMP) { + // Keep getting entries until we get the first timestamp + raw_entry = _getNextEntry(i); + } + timestamps[i] = raw_entry.addr; // set the timestamp + } +} + +DRTraceReader::DRTraceEntry +DRTraceReader::_getNextEntry(unsigned int tid) +{ + panic_if(tid > traceFiles.size(), "tid (%d) out of range.", tid); + auto bytes = sizeof(DRTraceReader::DRTraceEntry); + DRTraceReader::DRTraceEntry entry{}; + auto bytes_read = gzread(traceFiles[tid], (void*)&entry, bytes); + assert(bytes_read == bytes); + return entry; +} + +DRTraceReader::TraceRef +DRTraceReader::getNextTraceReference(unsigned player_id) +{ + assert(player_id < currentTids.size()); + + TraceRef ref; + + int cur_tid = currentTids[player_id]; + if (cur_tid < 0) { + // Nothing executing on this player. See if there's something else + // to execute + unsigned int next_tid = getLowestTimestampThread(); + if (next_tid == std::numeric_limits::max()) { + // nothing else to do, return invalid reference + return ref; + } + currentTids[player_id] = next_tid; + return getNextTraceReference(player_id); + } + + DRTraceReader::DRTraceEntry raw_entry = _getNextEntry(cur_tid); + + switch (raw_entry.type) { + case TRACE_TYPE_MARKER: + if (raw_entry.size == TRACE_MARKER_TYPE_TIMESTAMP) { + uint64_t delta = raw_entry.addr - timestamps[cur_tid]; + // Switch??? + if (delta > 0) { + // for now, assert true as we shouldn't see the same + // timestamp twice. In the future, we want to make this + // delta check a parameter. + timestamps[cur_tid] = raw_entry.addr; + unsigned int next_tid = getLowestTimestampThread(); + if (next_tid == std::numeric_limits::max()) { + // nothing else to do + currentTids[player_id] = -1; + } else { + currentTids[player_id] = next_tid; + // Use this new TID to get the next instruction + return getNextTraceReference(player_id); + } + } + } else { + warn_once("Skipping unknown marker type: %d, value: %d.\n", + raw_entry.size, raw_entry.addr); + return getNextTraceReference(player_id); + } + break; + case TRACE_TYPE_READ: + ref.addr = raw_entry.addr; + ref.size = raw_entry.size; + ref.isValid = true; + ref.type = TraceRef::READ; + break; + case TRACE_TYPE_WRITE: + ref.addr = raw_entry.addr; + ref.size = raw_entry.size; + ref.isValid = true; + ref.type = TraceRef::WRITE; + break; + case TRACE_TYPE_PREFETCH: + case TRACE_TYPE_PREFETCH_READ_L1: + case TRACE_TYPE_PREFETCHT1: + case TRACE_TYPE_PREFETCHT2: + case TRACE_TYPE_PREFETCHNTA: + ref.addr = raw_entry.addr; + ref.size = raw_entry.size; + ref.isValid = true; + ref.type = TraceRef::PREFETCH; + break; + case TRACE_TYPE_INSTR: + case TRACE_TYPE_INSTR_SYSENTER: + case TRACE_TYPE_INSTR_NO_FETCH: + ref.pc = raw_entry.addr; + ref.size = raw_entry.size; + ref.isValid = true; + ref.type = TraceRef::GENERIC_INST; + break; + case TRACE_TYPE_INSTR_DIRECT_JUMP: + ref.pc = raw_entry.addr; + ref.size = raw_entry.size; + ref.isValid = true; + ref.type = TraceRef::DIRECT_JUMP; + break; + case TRACE_TYPE_INSTR_INDIRECT_JUMP: + ref.pc = raw_entry.addr; + ref.size = raw_entry.size; + ref.isValid = true; + ref.type = TraceRef::INDIRECT_JUMP; + break; + case TRACE_TYPE_INSTR_CONDITIONAL_JUMP: + ref.pc = raw_entry.addr; + ref.size = raw_entry.size; + ref.isValid = true; + ref.type = TraceRef::CONDITIONAL_JUMP; + break; + case TRACE_TYPE_INSTR_DIRECT_CALL: + ref.pc = raw_entry.addr; + ref.size = raw_entry.size; + ref.isValid = true; + ref.type = TraceRef::DIRECT_CALL; + break; + case TRACE_TYPE_INSTR_INDIRECT_CALL: + ref.pc = raw_entry.addr; + ref.size = raw_entry.size; + ref.isValid = true; + ref.type = TraceRef::INDIRECT_CALL; + break; + case TRACE_TYPE_INSTR_RETURN: + ref.pc = raw_entry.addr; + ref.size = raw_entry.size; + ref.isValid = true; + ref.type = TraceRef::RETURN; + break; + case TRACE_TYPE_HEADER: + DPRINTF(DRTrace, "Found header in trace. Version %d\n", + raw_entry.addr); + panic_if(raw_entry.addr != 3, + "DRTraceReader only works with version 3"); + return getNextTraceReference(player_id); + break; + case TRACE_TYPE_FOOTER: + case TRACE_TYPE_THREAD_EXIT: + DPRINTF(DRTrace, "Found footer or thread exit in trace\n"); + // TODO: Close the file + // Return an invalid reference since we're at the end + break; + case TRACE_TYPE_THREAD: + DPRINTF(DRTrace, "Found thread %d\n", raw_entry.addr); + return getNextTraceReference(player_id); + case TRACE_TYPE_PID: + DPRINTF(DRTrace, "Found pid %d\n", raw_entry.addr); + return getNextTraceReference(player_id); + default: + panic("Unknown trace type %d\n", raw_entry.type); + } + + return ref; +} + +} // namespace gem5 diff --git a/src/cpu/testers/dr_trace_player/trace_reader.hh b/src/cpu/testers/dr_trace_player/trace_reader.hh new file mode 100644 index 0000000000..e8b492dbc9 --- /dev/null +++ b/src/cpu/testers/dr_trace_player/trace_reader.hh @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2022 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_TESTERS_DR_TRACE_PLAYER_TRACE_READER_HH__ +#define __CPU_TESTERS_DR_TRACE_PLAYER_TRACE_READER_HH__ + +/** + * @file + * Contains the reader for dynamario traces + * See https://dynamorio.org/sec_drcachesim_format.html for details on the + * trace format. + * This file reimplements some of the code from drcachesim. See the dynamario + * git repo for the original code. This is built using version 9.0.19202. + * https://github.com/DynamoRIO/dynamorio + */ + +#include + +#include +#include +#include +#include + +#include "params/DRTraceReader.hh" +#include "sim/sim_object.hh" + +namespace gem5 +{ + + +/** + * An object which reads Dynamorio traces. + * Note that this is not a complete reader. It is currently only designed to + * read the google workload traces + * https://dynamorio.org/google_workload_traces.html + */ +class DRTraceReader : public SimObject +{ + public: + struct TraceRef + { + // Only used for instruction references + // (Note: we could add this to memory references in the future) + Addr pc = 0; + // Memory address referenced. Only used for memory references + Addr addr = 0; + unsigned int size = 0; + // True if this reference is valid. An invalid reference means the + // stream is over. + bool isValid = false; + // Thread id of the reference + int tid = 0; + // See dynamorio/clients/drcachesim/common/trace_entry.h for details + // There is not necessarily a 1-to-1 mapping with TRACE_TYPE + enum + { + // Inst types + GENERIC_INST, + DIRECT_JUMP, + INDIRECT_JUMP, + CONDITIONAL_JUMP, + DIRECT_CALL, + INDIRECT_CALL, + RETURN, + // memory types + READ, + WRITE, + PREFETCH + } type = {}; + + bool + isMemRef() + { + return type == READ || type == WRITE || type == PREFETCH; + } + + bool + isInstRef() + { + return !isMemRef(); + } + }; + + private: + /** + * Slightly modified trace entry from dynamorio. + * See dynamorio/clients/drcachesim/common/trace_entry.h for original. + * This is compatible with "version 3" of the trace + */ + struct GEM5_PACKED DRTraceEntry + { + unsigned short type; + unsigned short size; + uint64_t addr; + }; + + private: + + /// The directory which contains the trace files (one per thread) + std::filesystem::path directory; + /// All of the filenames of the trace files + std::vector filenames; + + /// The open trace files (one per thread) + std::vector traceFiles; + + /// The current timestamps for each trace file. Should be the same length + /// as the traceFiles above. + std::vector timestamps; + + /// The current thread being executed on each player. If -1, then nothing + /// is executing on that player + std::vector currentTids; + + /** + * @brief Get the Next Entry object for the give thread id + * + * @param tid The thread id of the trace file to get + * @return TraceEntry + */ + DRTraceEntry _getNextEntry(unsigned int tid); + + unsigned int + getLowestTimestampThread() + { + return std::distance(timestamps.begin(), + std::min_element(timestamps.begin(), timestamps.end())); + } + + enum + { + TRACE_TYPE_READ, + TRACE_TYPE_WRITE, + TRACE_TYPE_PREFETCH, + TRACE_TYPE_PREFETCHT0, + TRACE_TYPE_PREFETCH_READ_L1 = + TRACE_TYPE_PREFETCHT0, + TRACE_TYPE_PREFETCHT1, + TRACE_TYPE_PREFETCH_READ_L2 = + TRACE_TYPE_PREFETCHT1, + TRACE_TYPE_PREFETCHT2, + TRACE_TYPE_PREFETCH_READ_L3 = + TRACE_TYPE_PREFETCHT2, + TRACE_TYPE_PREFETCHNTA, + TRACE_TYPE_PREFETCH_READ, + TRACE_TYPE_PREFETCH_WRITE, + TRACE_TYPE_PREFETCH_INSTR, + TRACE_TYPE_INSTR, + TRACE_TYPE_INSTR_DIRECT_JUMP, + TRACE_TYPE_INSTR_INDIRECT_JUMP, + TRACE_TYPE_INSTR_CONDITIONAL_JUMP, + TRACE_TYPE_INSTR_DIRECT_CALL, + TRACE_TYPE_INSTR_INDIRECT_CALL, + TRACE_TYPE_INSTR_RETURN, + TRACE_TYPE_INSTR_BUNDLE, + TRACE_TYPE_INSTR_FLUSH, + TRACE_TYPE_INSTR_FLUSH_END, + TRACE_TYPE_DATA_FLUSH, + TRACE_TYPE_DATA_FLUSH_END, + TRACE_TYPE_THREAD, + TRACE_TYPE_THREAD_EXIT, + TRACE_TYPE_PID, + TRACE_TYPE_HEADER, + TRACE_TYPE_FOOTER, + TRACE_TYPE_HARDWARE_PREFETCH, + TRACE_TYPE_MARKER, + TRACE_TYPE_INSTR_NO_FETCH, + TRACE_TYPE_INSTR_MAYBE_FETCH, + TRACE_TYPE_INSTR_SYSENTER, + TRACE_TYPE_PREFETCH_READ_L1_NT, + TRACE_TYPE_PREFETCH_READ_L2_NT, + TRACE_TYPE_PREFETCH_READ_L3_NT, + TRACE_TYPE_PREFETCH_INSTR_L1, + TRACE_TYPE_PREFETCH_INSTR_L1_NT, + TRACE_TYPE_PREFETCH_INSTR_L2, + TRACE_TYPE_PREFETCH_INSTR_L2_NT, + TRACE_TYPE_PREFETCH_INSTR_L3, + TRACE_TYPE_PREFETCH_INSTR_L3_NT, + TRACE_TYPE_PREFETCH_WRITE_L1, + TRACE_TYPE_PREFETCH_WRITE_L1_NT, + TRACE_TYPE_PREFETCH_WRITE_L2, + TRACE_TYPE_PREFETCH_WRITE_L2_NT, + TRACE_TYPE_PREFETCH_WRITE_L3, + TRACE_TYPE_PREFETCH_WRITE_L3_NT, + }; + enum + { + TRACE_MARKER_TYPE_KERNEL_EVENT, + TRACE_MARKER_TYPE_KERNEL_XFER, + TRACE_MARKER_TYPE_TIMESTAMP, + TRACE_MARKER_TYPE_CPU_ID, + TRACE_MARKER_TYPE_FUNC_ID, + TRACE_MARKER_TYPE_FUNC_RETADDR, + TRACE_MARKER_TYPE_FUNC_ARG, + TRACE_MARKER_TYPE_FUNC_RETVAL, + TRACE_MARKER_TYPE_SPLIT_VALUE, + TRACE_MARKER_TYPE_FILETYPE, + TRACE_MARKER_TYPE_CACHE_LINE_SIZE, + TRACE_MARKER_TYPE_INSTRUCTION_COUNT, + TRACE_MARKER_TYPE_VERSION, + TRACE_MARKER_TYPE_RSEQ_ABORT, + TRACE_MARKER_TYPE_WINDOW_ID, + TRACE_MARKER_TYPE_PHYSICAL_ADDRESS, + TRACE_MARKER_TYPE_PHYSICAL_ADDRESS_NOT_AVAILABLE, + TRACE_MARKER_TYPE_VIRTUAL_ADDRESS, + TRACE_MARKER_TYPE_PAGE_SIZE, + TRACE_MARKER_TYPE_RESERVED_END = 100, + }; + + public: + PARAMS(DRTraceReader); + DRTraceReader(const Params ¶ms); + + void init() override; + + ~DRTraceReader() + { + for (auto &file : traceFiles) { + gzclose(file); + } + } + + /** + * Used by the DRTracePlayers to get the next traced instruction + * + */ + TraceRef getNextTraceReference(unsigned player_id); + + +}; + +} // namespace gem5 + + +#endif //__CPU_TESTERS_DR_TRACE_PLAYER_TRACE_READER_HH__ diff --git a/src/mem/DCacheCtrl.py b/src/mem/DCacheCtrl.py new file mode 100644 index 0000000000..0609b09641 --- /dev/null +++ b/src/mem/DCacheCtrl.py @@ -0,0 +1,60 @@ +# Copyright (c) 2012-2020 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2013 Amin Farmahini-Farahani +# Copyright (c) 2015 University of Kaiserslautern +# Copyright (c) 2015 The University of Bologna +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.params import * +from m5.proxy import * +from m5.objects.MemCtrl import * + +class DCacheCtrl(MemCtrl): + type = 'DCacheCtrl' + cxx_header = "mem/dram_cache_ctrl.hh" + cxx_class = 'gem5::memory::DCacheCtrl' + + req_port = RequestPort("This port responds to DRAM cache controller " + "requests for backing memory") + + dram_cache_size = Param.MemorySize('128MiB', + "DRAM cache block size in bytes") + block_size = Param.Unsigned(64, "DRAM cache block size in bytes") + addr_size = Param.Unsigned(64,"Addr size of the request from outside world") + orb_max_size = Param.Unsigned(256, "Outstanding Requests Buffer size") + crb_max_size = Param.Unsigned(32, "Conflicting Requests Buffer size") + always_hit = Param.Bool(True, "") + always_dirty = Param.Bool(True, "") diff --git a/src/mem/DRAMInterface.py b/src/mem/DRAMInterface.py index dea62a6be1..6417aca443 100644 --- a/src/mem/DRAMInterface.py +++ b/src/mem/DRAMInterface.py @@ -39,6 +39,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from m5.objects.MemCtrl import MemCtrl +# from m5.objects.PolicyManager import PolicyManager from m5.objects.MemInterface import * @@ -53,6 +54,13 @@ class DRAMInterface(MemInterface): cxx_header = "mem/dram_interface.hh" cxx_class = "gem5::memory::DRAMInterface" + # pol_man = Param.PolicyManager("Policy Manager which is used just in HBM2_Rambus " + # "for sending dirty data from flush buffer to the " + # "controller during refresh") + enable_read_flush_buffer = Param.Bool(False, "Enable reading from flush buffer " + "during refresh, only for HBM2_Rambus") + is_alloy = Param.Bool(False, "Alloy needs a different decode packet") + # scheduler page policy page_policy = Param.PageManage("open_adaptive", "Page management policy") @@ -179,6 +187,15 @@ class DRAMInterface(MemInterface): # time to exit self-refresh mode with locked DLL tXSDLL = Param.Latency("0ns", "Self-refresh exit latency DLL") + tTAGBURST = Param.Latency("0ns", "tRL_FAST") + tRL_FAST = Param.Latency("0ns", "tRL_FAST") + tHM2DQ = Param.Latency("0ns", "tHM2DQ") + tRTW_int = Param.Latency("0ns", "tRTW_int") + tRFBD = Param.Latency("0ns", "tRFBD") + tRCD_FAST = Param.Latency("0ns", "tRCD_FAST") + tRC_FAST = Param.Latency("0ns", "tRCD_FAST") + flushBuffer_high_thresh_perc = Param.Percent(0, "Threshold to force writes") + # number of data beats per clock. with DDR, default is 2, one per edge # used in drampower.cc beats_per_clock = Param.Unsigned(2, "Data beats per clock") @@ -549,7 +566,7 @@ class DDR4_2400_16x4(DRAMInterface): # CAS-to-CAS delay for bursts to the same bank group # tBURST is equivalent to tCCD_S; no explicit parameter required # for CAS-to-CAS delay for bursts to different bank groups - tCCD_L = "5ns" + tCCD_L = '5ns' # DDR4-2400 17-17-17 tRCD = "14.16ns" @@ -1256,6 +1273,94 @@ class HBM_2000_4H_1x64(DRAMInterface): two_cycle_activate = True +class TDRAM(DRAMInterface): + + # 64-bit interface for a single pseudo channel + device_bus_width = 32 + + # HBM2 supports BL4 + burst_length = 16 + + # size of channel in bytes, 4H stack of 8Gb dies is 4GiB per stack; + # with 16 pseudo channels, 256MiB per pseudo channel + device_size = "1GiB" + + device_rowbuffer_size = "2KiB" + + # 1x128 configuration + devices_per_rank = 1 + + ranks_per_channel = 1 + + banks_per_rank = 32 + + bank_groups_per_rank = 8 + + # 1000 MHz for 2Gbps DDR data rate + tCK = "0.5ns" + + # new + tTAGBURST = "0.5ns" + tRL_FAST = "7.5ns" + tHM2DQ = "0ns" + tRTW_int = "1ns" + tRFBD = "1ns" + tRCD_FAST = "7.5ns" + enable_read_flush_buffer = True + flushBuffer_high_thresh_perc = 80 + + tRP = "14ns" + + tCCD_L = "2ns" + + tRCD = "12ns" + tRCD_WR = "6ns" + tCL = "18ns" + tCWL = "7ns" + tRAS = "28ns" + + tBURST = "2ns" + + # value for 2Gb device from JEDEC spec + tRFC = "220ns" + + # value for 2Gb device from JEDEC spec + tREFI = "3.9us" + + tWR = "14ns" + tRTP = "5ns" + tWTR = "4ns" + tWTR_L = "9ns" + tRTW = "18ns" + + #tAAD from RBus + tAAD = "1ns" + + # single rank device, set to 0 + tCS = "0ns" + + tRRD = "2ns" + tRRD_L = "2ns" + + # for a single pseudo channel + tXAW = "16ns" + activation_limit = 8 + + # 4tCK + tXP = "8ns" + + # start with tRFC + tXP -> 160ns + 8ns = 168ns + tXS = "216ns" + + page_policy = 'close' + + read_buffer_size = 64 + write_buffer_size = 64 + + two_cycle_activate = True + + addr_mapping = 'RoCoRaBaCh' + # A single DDR5-4400 32bit channel (4x8 configuration) # A DDR5 DIMM is made up of two (32 bit) channels. # Following configuration is modeling only a single 32bit channel. diff --git a/src/mem/MemCtrl.py b/src/mem/MemCtrl.py index 62e4d97b26..b25b437701 100644 --- a/src/mem/MemCtrl.py +++ b/src/mem/MemCtrl.py @@ -77,7 +77,7 @@ class MemCtrl(QoSMemCtrl): # threshold in percentage for when to start writes if the read # queue is empty write_low_thresh_perc = Param.Percent(50, "Threshold to start writes") - + oldest_write_age_threshold = Param.Unsigned(2500000, "The age of oldest write request in the write queue in ticks") # minimum write bursts to schedule before switching back to reads min_writes_per_switch = Param.Unsigned( 16, "Minimum write bursts before switching to reads" @@ -98,7 +98,14 @@ class MemCtrl(QoSMemCtrl): static_frontend_latency = Param.Latency("10ns", "Static frontend latency") static_backend_latency = Param.Latency("10ns", "Static backend latency") + static_frontend_latency_tc = Param.Latency("0ns", "Static frontend latency") + static_backend_latency_tc = Param.Latency("0ns", "Static backend latency") + command_window = Param.Latency("10ns", "Static backend latency") + + consider_oldest_write = Param.Bool(False, "a flag to consider age of write " + "reqs for bus switching") + disable_sanity_check = Param.Bool(False, "Disable port resp Q size check") @@ -120,4 +127,4 @@ class MemCtrl(QoSMemCtrl): doi = {10.1109/ISPASS.2014.6844484} } """, -) +) \ No newline at end of file diff --git a/src/mem/PolicyManager.py b/src/mem/PolicyManager.py new file mode 100644 index 0000000000..14dfb4363b --- /dev/null +++ b/src/mem/PolicyManager.py @@ -0,0 +1,48 @@ +from m5.params import * +from m5.proxy import * +from m5.SimObject import SimObject +from m5.objects.ReplacementPolicies import * +from m5.objects.AbstractMemory import AbstractMemory +from m5.objects.DRAMInterface import * + +class Policy(Enum): vals = ['CascadeLakeNoPartWrs', 'Oracle', 'BearWriteOpt', 'Rambus', 'RambusTagProbOpt'] +class ReplPolicySetAssoc(Enum): vals = ['bip_rp', 'brrip_rp', 'dueling_rp', 'fifo_rp', 'lfu_rp', 'lru_rp', 'mru_rp', 'random_rp', 'second_chance_rp', 'ship_rp', 'tree_plru_rp', 'weighted_lru_rp'] + +class PolicyManager(AbstractMemory): + type = 'PolicyManager' + cxx_header = "mem/policy_manager.hh" + cxx_class = 'gem5::memory::PolicyManager' + + port = ResponsePort("This port responds to memory requests") + loc_req_port = RequestPort("This port responds to requests for DRAM cache controller") + far_req_port = RequestPort("This port responds to requests for backing store controller") + + loc_burst_size = Param.Unsigned(64, "Local memory burst size") + far_burst_size = Param.Unsigned(64, "Far memory burst size") + + loc_mem_policy = Param.Policy('CascadeLakeNoPartWrs', "DRAM Cache Policy") + loc_mem = Param.AbstractMemory("local memory device") + + assoc = Param.Unsigned(1, "Number of ways per each set in DRAM cache, if it is set-associative") + replPol = Param.ReplPolicySetAssoc('lru_rp', "Replacement policy, if it is set-associative") + replacement_policy = Param.BaseReplacementPolicy(LRURP(), "Replacement policy") + + dram_cache_size = Param.MemorySize('128MiB', "DRAM cache block size in bytes") + block_size = Param.Unsigned(64, "DRAM cache block size in bytes") + addr_size = Param.Unsigned(64,"Addr size of the request from outside world") + orb_max_size = Param.Unsigned(256, "Outstanding Requests Buffer size") + crb_max_size = Param.Unsigned(32, "Conflicting Requests Buffer size") + extreme = Param.Bool(False, "Control flag for enforcing hit/miss & dirty/clean") + always_hit = Param.Bool(True, "Control flag for enforcing hit/miss") + always_dirty = Param.Bool(False, "Control flag for enforcing clean/dirty") + + static_frontend_latency = Param.Latency("10ns", "Static frontend latency") + static_backend_latency = Param.Latency("10ns", "Static backend latency") + + cache_warmup_ratio = Param.Float(0.95, "DRAM cache warmup ratio") + + bypass_dcache = Param.Bool(False, "if the DRAM cache needs to be bypassed") + + channel_index = Param.String("0","number of DRAM cache channels in the system") + + diff --git a/src/mem/SConscript b/src/mem/SConscript index e2a91146d0..52cbe7f95e 100644 --- a/src/mem/SConscript +++ b/src/mem/SConscript @@ -52,6 +52,8 @@ SimObject('MemCtrl.py', sim_objects=['MemCtrl'], enums=['MemSched']) SimObject('HeteroMemCtrl.py', sim_objects=['HeteroMemCtrl']) SimObject('HBMCtrl.py', sim_objects=['HBMCtrl']) +SimObject('DCacheCtrl.py', sim_objects=['DCacheCtrl']) +SimObject('PolicyManager.py', sim_objects=['PolicyManager'], enums=['Policy', 'ReplPolicySetAssoc']) SimObject('MemInterface.py', sim_objects=['MemInterface'], enums=['AddrMap']) SimObject('DRAMInterface.py', sim_objects=['DRAMInterface'], enums=['PageManage']) @@ -81,6 +83,8 @@ Source('external_slave.cc') Source('mem_ctrl.cc') Source('hetero_mem_ctrl.cc') Source('hbm_ctrl.cc') +Source('dram_cache_ctrl.cc') +Source('policy_manager.cc') Source('mem_interface.cc') Source('dram_interface.cc') Source('nvm_interface.cc') @@ -144,6 +148,8 @@ CompoundFlag('XBar', ['BaseXBar', 'CoherentXBar', 'NoncoherentXBar', DebugFlag('Bridge') DebugFlag('CommMonitor') DebugFlag('DRAM') +DebugFlag('DRAMT') +DebugFlag('DecodePkt') DebugFlag('DRAMPower') DebugFlag('DRAMState') DebugFlag('NVM') @@ -151,6 +157,9 @@ DebugFlag('ExternalPort') DebugFlag('HtmMem', 'Hardware Transactional Memory (Mem side)') DebugFlag('LLSC') DebugFlag('MemCtrl') +DebugFlag('DCacheCtrl') +DebugFlag('PolicyManager') +DebugFlag('ChkptRstrTest') DebugFlag('MMU') DebugFlag('MemoryAccess') DebugFlag('PacketQueue') diff --git a/src/mem/abstract_mem.cc b/src/mem/abstract_mem.cc index 9340f7e96f..d0cc1d8189 100644 --- a/src/mem/abstract_mem.cc +++ b/src/mem/abstract_mem.cc @@ -395,7 +395,16 @@ AbstractMemory::access(PacketPtr pkt) uint8_t *host_addr = toHostAddr(pkt->getAddr()); - if (pkt->cmd == MemCmd::SwapReq) { + if (pkt->cmd == MemCmd::SwapResp) { + if (pkt->isAtomicOp()) { + if (pmemAddr) { + pkt->writeData(host_addr); + (*(pkt->getAtomicOp()))(host_addr); + } + } else { + fatal("Did not expect this access in timing mode"); + } + } else if (pkt->cmd == MemCmd::SwapReq) { if (pkt->isAtomicOp()) { if (pmemAddr) { pkt->setData(host_addr); diff --git a/src/mem/abstract_mem.hh b/src/mem/abstract_mem.hh index 7f12487421..572e28ff03 100644 --- a/src/mem/abstract_mem.hh +++ b/src/mem/abstract_mem.hh @@ -46,6 +46,7 @@ #ifndef __MEM_ABSTRACT_MEMORY_HH__ #define __MEM_ABSTRACT_MEMORY_HH__ +#include "enums/Policy.hh" #include "mem/backdoor.hh" #include "mem/port.hh" #include "params/AbstractMemory.hh" @@ -225,6 +226,32 @@ class AbstractMemory : public ClockedObject void initState() override; + enums::Policy locMemPolicy; + + virtual Tick get_tRP() { panic("AbstractMemory get_tRP should not be executed from here.\n"); + return false;} + virtual Tick get_tRCD_RD() { panic("AbstractMemory get_tRCD_RD should not be executed from here.\n"); + return false;} + virtual Tick get_tRL() { panic("AbstractMemory get_tRL should not be executed from here.\n"); + return false;} + + virtual bool recvReadFlushBuffer(Addr addr) + { + panic("AbstractMemory recvReadFlushBuffer should not be executed from here.\n"); + return false; + } + + virtual void setPolicyManager(AbstractMemory* _polMan) + { + panic("AbstractMemory setPolicyManager should not be executed from here.\n"); + } + + virtual bool checkFwdMrgeInFB(Addr addr) + { + panic("AbstractMemory checkFwdMrgeInFB should not be executed from here.\n"); + return false; + } + /** * See if this is a null memory that should never store data and * always return zero. diff --git a/src/mem/cache/replacement_policies/base.hh b/src/mem/cache/replacement_policies/base.hh index 2c23c950b2..fbd855985b 100644 --- a/src/mem/cache/replacement_policies/base.hh +++ b/src/mem/cache/replacement_policies/base.hh @@ -58,6 +58,12 @@ class Base : public SimObject Base(const Params &p) : SimObject(p) {} virtual ~Base() = default; + virtual Tick getLastTouchTick(const std::shared_ptr& + replacement_data) { return MaxTick; } + + virtual void setLastTouchTick(const std::shared_ptr& + replacement_data, Tick tick) {} + /** * Invalidate replacement data to set it as the next probable victim. * diff --git a/src/mem/cache/replacement_policies/lru_rp.cc b/src/mem/cache/replacement_policies/lru_rp.cc index cbec50eb73..f9a00c6705 100644 --- a/src/mem/cache/replacement_policies/lru_rp.cc +++ b/src/mem/cache/replacement_policies/lru_rp.cc @@ -45,6 +45,22 @@ LRU::LRU(const Params &p) { } +Tick +LRU::getLastTouchTick(const std::shared_ptr& + replacement_data) +{ + return std::static_pointer_cast(replacement_data)->lastTouchTick; +} + +void +LRU::setLastTouchTick(const std::shared_ptr& replacement_data, + Tick tick) +{ + // used for unserialization of a checkpoint + std::static_pointer_cast( + replacement_data)->lastTouchTick = tick; +} + void LRU::invalidate(const std::shared_ptr& replacement_data) { diff --git a/src/mem/cache/replacement_policies/lru_rp.hh b/src/mem/cache/replacement_policies/lru_rp.hh index 6feaa4f73d..af734518f0 100644 --- a/src/mem/cache/replacement_policies/lru_rp.hh +++ b/src/mem/cache/replacement_policies/lru_rp.hh @@ -65,6 +65,13 @@ class LRU : public Base LRU(const Params &p); ~LRU() = default; + Tick getLastTouchTick(const std::shared_ptr& + replacement_data) override; + + void setLastTouchTick(const std::shared_ptr& + replacement_data, Tick tick) override; + + /** * Invalidate replacement data to set it as the next probable victim. * Sets its last touch tick as the starting tick. diff --git a/src/mem/cache/replacement_policies/replaceable_entry.hh b/src/mem/cache/replacement_policies/replaceable_entry.hh index bb88cefd1d..d8424c2cc7 100644 --- a/src/mem/cache/replacement_policies/replaceable_entry.hh +++ b/src/mem/cache/replacement_policies/replaceable_entry.hh @@ -34,6 +34,7 @@ #include "base/compiler.hh" #include "base/cprintf.hh" +#include "base/types.hh" namespace gem5 { @@ -61,6 +62,25 @@ struct ReplacementData {}; */ class ReplaceableEntry { + public: + Addr tagDC; + Addr indexDC; + bool validLine; + bool dirtyLine; + Addr farMemAddr; + unsigned counter; + uint64_t tickEntered; + + ReplaceableEntry(Addr _tagDC, Addr _indexDC, bool _validLine, bool _dirtyLine, Addr _farMemAddr) : + tagDC(_tagDC), + indexDC(_indexDC), + validLine(_validLine), + dirtyLine(_dirtyLine), + farMemAddr(_farMemAddr), + counter(0), + tickEntered(MaxTick) + { } + protected: /** * Set to which this entry belongs. diff --git a/src/mem/coherent_xbar.cc b/src/mem/coherent_xbar.cc index 74ef1ead36..a600cda34e 100644 --- a/src/mem/coherent_xbar.cc +++ b/src/mem/coherent_xbar.cc @@ -291,6 +291,7 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID cpu_side_port_id) // since it is a normal request, attempt to send the packet success = memSidePorts[mem_side_port_id]->sendTimingReq(pkt); + // std::cout << "4: " << pkt->getAddr() << " : " << xbar_delay << "\n"; } else { // no need to forward, turn this packet around and respond // directly @@ -343,9 +344,9 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID cpu_side_port_id) assert(routeTo.find(pkt->req) == routeTo.end()); routeTo[pkt->req] = cpu_side_port_id; - panic_if(routeTo.size() > maxRoutingTableSizeCheck, - "%s: Routing table exceeds %d packets\n", - name(), maxRoutingTableSizeCheck); + // panic_if(routeTo.size() > maxRoutingTableSizeCheck, + // "%s: Routing table exceeds %d packets\n", + // name(), maxRoutingTableSizeCheck); } // update the layer state and schedule an idle event @@ -438,8 +439,9 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID cpu_side_port_id) rsp_pkt->headerDelay = 0; cpuSidePorts[rsp_port_id]->schedTimingResp(rsp_pkt, response_time); - } + // std::cout << "1: " << pkt->getAddr() << " : " << response_time-clockEdge() << "\n"; + } return success; } @@ -492,6 +494,7 @@ CoherentXBar::recvTimingResp(PacketPtr pkt, PortID mem_side_port_id) pkt->headerDelay = 0; cpuSidePorts[cpu_side_port_id]->schedTimingResp(pkt, curTick() + latency); + // std::cout << "2: " << pkt->getAddr() << " : " << latency << "\n"; // remove the request from the routing table routeTo.erase(route_lookup); @@ -680,6 +683,7 @@ CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID cpu_side_port_id) pkt->headerDelay = 0; cpuSidePorts[dest_port_id]->schedTimingResp(pkt, curTick() + latency); + // std::cout << "3: " << pkt->getAddr() << " : " << latency << "\n"; respLayers[dest_port_id]->succeededTiming(packetFinishTime); } @@ -700,7 +704,7 @@ void CoherentXBar::forwardTiming(PacketPtr pkt, PortID exclude_cpu_side_port_id, const std::vector& dests) { - DPRINTF(CoherentXBar, "%s for %s\n", __func__, pkt->print()); + DPRINTF(CoherentXBar, "%s for %s \n", __func__, pkt->print()); // snoops should only happen if the system isn't bypassing caches assert(!system->bypassCaches()); @@ -714,6 +718,7 @@ CoherentXBar::forwardTiming(PacketPtr pkt, PortID exclude_cpu_side_port_id, // from if (exclude_cpu_side_port_id == InvalidPortID || p->getId() != exclude_cpu_side_port_id) { + // std::cout << "here\n"; // cache is not allowed to refuse snoop p->sendTimingSnoopReq(pkt); fanout++; diff --git a/src/mem/dram_cache_ctrl.cc b/src/mem/dram_cache_ctrl.cc new file mode 100644 index 0000000000..7db3960519 --- /dev/null +++ b/src/mem/dram_cache_ctrl.cc @@ -0,0 +1,1917 @@ +/* + * Copyright (c) 2010-2020 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2013 Amin Farmahini-Farahani + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mem/dram_cache_ctrl.hh" + +#include "base/trace.hh" +#include "debug/DCacheCtrl.hh" +#include "debug/DRAM.hh" +#include "debug/Drain.hh" +#include "debug/QOS.hh" +#include "mem/dram_interface.hh" +#include "mem/mem_interface.hh" +#include "sim/system.hh" + +namespace gem5 +{ + +namespace memory +{ + +DCacheCtrl::DCacheCtrl(const DCacheCtrlParams &p): + MemCtrl(p), + reqPort(name() + ".req_port", *this), + dramCacheSize(p.dram_cache_size), + blockSize(p.block_size), + addrSize(p.addr_size), + orbMaxSize(p.orb_max_size), orbSize(0), + crbMaxSize(p.crb_max_size), crbSize(0), + alwaysHit(p.always_hit), alwaysDirty(p.always_dirty), + retry(false), retryFMW(false), + stallRds(false), sendFarRdReq(true), + waitingForRetryReqPort(false), + rescheduleLocRead(false), + rescheduleLocWrite(false), + locWrCounter(0), farWrCounter(0), + maxConf(0), + maxLocRdEvQ(0), maxLocRdRespEvQ(0), maxLocWrEvQ(0), + maxFarRdEvQ(0), maxFarRdRespEvQ(0), maxFarWrEvQ(0), + locMemReadEvent([this]{ processLocMemReadEvent(); }, name()), + locMemReadRespEvent([this]{ processLocMemReadRespEvent(); }, name()), + locMemWriteEvent([this]{ processLocMemWriteEvent(); }, name()), + farMemReadEvent([this]{ processFarMemReadEvent(); }, name()), + farMemReadRespEvent([this]{ processFarMemReadRespEvent(); }, name()), + farMemWriteEvent([this]{ processFarMemWriteEvent(); }, name()), + dcstats(*this) +{ + fatal_if(!dram, "DRAM cache controller must have a DRAM interface.\n"); + + panic_if(orbMaxSize<8, "ORB maximum size must be at least 8.\n"); + + // hook up interfaces to the controller + dram->setCtrl(this, commandWindow); + + tagMetadataStore.resize(dramCacheSize/blockSize); + dirtAdrGen(); + pktLocMemRead.resize(1); + pktLocMemWrite.resize(1); + + // This is actually a locWriteHighThreshold + writeHighThreshold = 0.5 * orbMaxSize; + + minLocWrPerSwitch = 0.25 * orbMaxSize; +} + +void +DCacheCtrl::init() +{ + if (!port.isConnected()) { + fatal("DCacheCtrl %s is unconnected!\n", name()); + } else if (!reqPort.isConnected()) { + fatal("DCacheCtrl %s is unconnected!\n", name()); + } else { + port.sendRangeChange(); + //reqPort.recvRangeChange(); + } +} + +DCacheCtrl::DCCtrlStats::DCCtrlStats(DCacheCtrl &_ctrl) + : statistics::Group(&_ctrl), + ctrl(_ctrl), + +///// + ADD_STAT(readReqs, statistics::units::Count::get(), + "Number of read requests accepted"), + ADD_STAT(writeReqs, statistics::units::Count::get(), + "Number of write requests accepted"), + + ADD_STAT(readBursts, statistics::units::Count::get(), + "Number of controller read bursts, including those serviced by " + "the write queue"), + ADD_STAT(writeBursts, statistics::units::Count::get(), + "Number of controller write bursts, including those merged in " + "the write queue"), + ADD_STAT(servicedByWrQ, statistics::units::Count::get(), + "Number of controller read bursts serviced by the write queue"), + ADD_STAT(mergedWrBursts, statistics::units::Count::get(), + "Number of controller write bursts merged with an existing one"), + + ADD_STAT(neitherReadNorWriteReqs, statistics::units::Count::get(), + "Number of requests that are neither read nor write"), + + ADD_STAT(avgRdQLen, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average read queue length when enqueuing"), + ADD_STAT(avgWrQLen, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average write queue length when enqueuing"), + + ADD_STAT(numRdRetry, statistics::units::Count::get(), + "Number of times read queue was full causing retry"), + ADD_STAT(numWrRetry, statistics::units::Count::get(), + "Number of times write queue was full causing retry"), + + ADD_STAT(readPktSize, statistics::units::Count::get(), + "Read request sizes (log2)"), + ADD_STAT(writePktSize, statistics::units::Count::get(), + "Write request sizes (log2)"), + + ADD_STAT(rdQLenPdf, statistics::units::Count::get(), + "What read queue length does an incoming req see"), + ADD_STAT(wrQLenPdf, statistics::units::Count::get(), + "What write queue length does an incoming req see"), + + ADD_STAT(rdPerTurnAround, statistics::units::Count::get(), + "Reads before turning the bus around for writes"), + ADD_STAT(wrPerTurnAround, statistics::units::Count::get(), + "Writes before turning the bus around for reads"), + + ADD_STAT(bytesReadWrQ, statistics::units::Byte::get(), + "Total number of bytes read from write queue"), + ADD_STAT(bytesReadSys, statistics::units::Byte::get(), + "Total read bytes from the system interface side"), + ADD_STAT(bytesWrittenSys, statistics::units::Byte::get(), + "Total written bytes from the system interface side"), + + ADD_STAT(avgRdBWSys, statistics::units::Rate< + statistics::units::Byte, statistics::units::Second>::get(), + "Average system read bandwidth in Byte/s"), + ADD_STAT(avgWrBWSys, statistics::units::Rate< + statistics::units::Byte, statistics::units::Second>::get(), + "Average system write bandwidth in Byte/s"), + + ADD_STAT(totGap, statistics::units::Tick::get(), + "Total gap between requests"), + ADD_STAT(avgGap, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), + "Average gap between requests"), + + ADD_STAT(requestorReadBytes, statistics::units::Byte::get(), + "Per-requestor bytes read from memory"), + ADD_STAT(requestorWriteBytes, statistics::units::Byte::get(), + "Per-requestor bytes write to memory"), + ADD_STAT(requestorReadRate, statistics::units::Rate< + statistics::units::Byte, statistics::units::Second>::get(), + "Per-requestor bytes read from memory rate"), + ADD_STAT(requestorWriteRate, statistics::units::Rate< + statistics::units::Byte, statistics::units::Second>::get(), + "Per-requestor bytes write to memory rate"), + ADD_STAT(requestorReadAccesses, statistics::units::Count::get(), + "Per-requestor read serviced memory accesses"), + ADD_STAT(requestorWriteAccesses, statistics::units::Count::get(), + "Per-requestor write serviced memory accesses"), + ADD_STAT(requestorReadTotalLat, statistics::units::Tick::get(), + "Per-requestor read total memory access latency"), + ADD_STAT(requestorWriteTotalLat, statistics::units::Tick::get(), + "Per-requestor write total memory access latency"), + ADD_STAT(requestorReadAvgLat, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), + "Per-requestor read average memory access latency"), + ADD_STAT(requestorWriteAvgLat, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), + "Per-requestor write average memory access latency"), +//////// + + ADD_STAT(avgORBLen, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average ORB length"), + ADD_STAT(avgLocRdQLenStrt, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average local read queue length"), + ADD_STAT(avgLocWrQLenStrt, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average local write queue length"), + ADD_STAT(avgFarRdQLenStrt, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average far read queue length"), + ADD_STAT(avgFarWrQLenStrt, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average far write queue length"), + + ADD_STAT(avgLocRdQLenEnq, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average local read queue length when enqueuing"), + ADD_STAT(avgLocWrQLenEnq, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average local write queue length when enqueuing"), + ADD_STAT(avgFarRdQLenEnq, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average far read queue length when enqueuing"), + ADD_STAT(avgFarWrQLenEnq, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average far write queue length when enqueuing"), + + ADD_STAT(numWrBacks, + "Total number of write backs from DRAM cache to main memory"), + ADD_STAT(totNumConf, + "Total number of packets conflicted on DRAM cache"), + ADD_STAT(totNumORBFull, + "Total number of packets ORB full"), + ADD_STAT(totNumConfBufFull, + "Total number of packets conflicted yet couldn't " + "enter confBuffer"), + + ADD_STAT(maxNumConf, + "Maximum number of packets conflicted on DRAM cache"), + ADD_STAT(maxLocRdEvQ, + "Maximum number of packets in locMemReadEvent concurrently"), + ADD_STAT(maxLocRdRespEvQ, + "Maximum number of packets in locMemReadRespEvent concurrently"), + ADD_STAT(maxLocWrEvQ, + "Maximum number of packets in locMemRWriteEvent concurrently"), + ADD_STAT(maxFarRdEvQ, + "Maximum number of packets in farMemReadEvent concurrently"), + ADD_STAT(maxFarRdRespEvQ, + "Maximum number of packets in farMemReadRespEvent concurrently"), + ADD_STAT(maxFarWrEvQ, + "Maximum number of packets in farMemWriteEvent concurrently"), + + ADD_STAT(rdToWrTurnAround, + "Maximum number of packets in farMemReadRespEvent concurrently"), + ADD_STAT(wrToRdTurnAround, + "Maximum number of packets in farMemWriteEvent concurrently"), + + ADD_STAT(sentRdPort, + "Number of read packets successfully sent through the request port to the far memory."), + ADD_STAT(failedRdPort, + "Number of read packets failed to be sent through the request port to the far memory."), + ADD_STAT(recvdRdPort, + "Number of read packets resp recvd through the request port from the far memory."), + ADD_STAT(sentWrPort, + "Number of write packets successfully sent through the request port to the far memory."), + ADD_STAT(failedWrPort, + "Number of write packets failed to be sent through the request port to the far memory."), + ADD_STAT(totPktsServiceTime, + "stat"), + ADD_STAT(totPktsORBTime, + "stat"), + ADD_STAT(totTimeFarRdtoSend, + "stat"), + ADD_STAT(totTimeFarRdtoRecv, + "stat"), + ADD_STAT(totTimeFarWrtoSend, + "stat"), + ADD_STAT(totTimeInLocRead, + "stat"), + ADD_STAT(totTimeInLocWrite, + "stat"), + ADD_STAT(totTimeInFarRead, + "stat"), + ADD_STAT(QTLocRd, + "stat"), + ADD_STAT(QTLocWr, + "stat") +{ +} + +void +DCacheCtrl::DCCtrlStats::regStats() +{ + using namespace statistics; + + assert(ctrl.system()); + const auto max_requestors = ctrl.system()->maxRequestors(); + + avgRdQLen.precision(2); + avgWrQLen.precision(2); + + avgORBLen.precision(4); + avgLocRdQLenStrt.precision(2); + avgLocWrQLenStrt.precision(2); + avgFarRdQLenStrt.precision(2); + avgFarWrQLenStrt.precision(2); + + avgLocRdQLenEnq.precision(2); + avgLocWrQLenEnq.precision(2); + avgFarRdQLenEnq.precision(2); + avgFarWrQLenEnq.precision(2); + + readPktSize.init(ceilLog2(ctrl.system()->cacheLineSize()) + 1); + writePktSize.init(ceilLog2(ctrl.system()->cacheLineSize()) + 1); + + rdQLenPdf.init(ctrl.readBufferSize); + wrQLenPdf.init(ctrl.writeBufferSize); + + rdPerTurnAround + .init(ctrl.readBufferSize) + .flags(nozero); + wrPerTurnAround + .init(ctrl.writeBufferSize) + .flags(nozero); + + avgRdBWSys.precision(8); + avgWrBWSys.precision(8); + avgGap.precision(2); + + // per-requestor bytes read and written to memory + requestorReadBytes + .init(max_requestors) + .flags(nozero | nonan); + + requestorWriteBytes + .init(max_requestors) + .flags(nozero | nonan); + + // per-requestor bytes read and written to memory rate + requestorReadRate + .flags(nozero | nonan) + .precision(12); + + requestorReadAccesses + .init(max_requestors) + .flags(nozero); + + requestorWriteAccesses + .init(max_requestors) + .flags(nozero); + + requestorReadTotalLat + .init(max_requestors) + .flags(nozero | nonan); + + requestorReadAvgLat + .flags(nonan) + .precision(2); + + requestorWriteRate + .flags(nozero | nonan) + .precision(12); + + requestorWriteTotalLat + .init(max_requestors) + .flags(nozero | nonan); + + requestorWriteAvgLat + .flags(nonan) + .precision(2); + + for (int i = 0; i < max_requestors; i++) { + const std::string requestor = ctrl.system()->getRequestorName(i); + requestorReadBytes.subname(i, requestor); + requestorReadRate.subname(i, requestor); + requestorWriteBytes.subname(i, requestor); + requestorWriteRate.subname(i, requestor); + requestorReadAccesses.subname(i, requestor); + requestorWriteAccesses.subname(i, requestor); + requestorReadTotalLat.subname(i, requestor); + requestorReadAvgLat.subname(i, requestor); + requestorWriteTotalLat.subname(i, requestor); + requestorWriteAvgLat.subname(i, requestor); + } + + // Formula stats + avgRdBWSys = (bytesReadSys) / simSeconds; + avgWrBWSys = (bytesWrittenSys) / simSeconds; + + avgGap = totGap / (readReqs + writeReqs); + + requestorReadRate = requestorReadBytes / simSeconds; + requestorWriteRate = requestorWriteBytes / simSeconds; + requestorReadAvgLat = requestorReadTotalLat / requestorReadAccesses; + requestorWriteAvgLat = requestorWriteTotalLat / requestorWriteAccesses; +} + +void +DCacheCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency, + MemInterface* mem_intr) +{ + bool needsResponse = pkt->needsResponse(); + // do the actual memory access which also turns the packet into a + // response + panic_if(!mem_intr->getAddrRange().contains(pkt->getAddr()), + "Can't handle address range for packet %s\n", pkt->print()); + mem_intr->access(pkt); + // PacketPtr copyOwPkt = new Packet(pkt, false, pkt->isRead()); + // reqPort.sendFunctional(copyOwPkt); + + // turn packet around to go back to requestor if response expected + if (needsResponse) { + // access already turned the packet into a response + assert(pkt->isResponse()); + // response_time consumes the static latency and is charged also + // with headerDelay that takes into account the delay provided by + // the xbar and also the payloadDelay that takes into account the + // number of data beats. + Tick response_time = curTick() + static_latency + pkt->headerDelay + + pkt->payloadDelay; + // Here we reset the timing of the packet before sending it out. + pkt->headerDelay = pkt->payloadDelay = 0; + + // queue the packet in the response queue to be sent out after + // the static latency has passed + port.schedTimingResp(pkt, response_time); + } else { + // @todo the packet is going to be deleted, and the MemPacket + // is still having a pointer to it + pendingDelete.reset(pkt); + } + + return; +} + +bool +DCacheCtrl::recvTimingReq(PacketPtr pkt) +{ + // This is where we enter from the outside world + DPRINTF(DCacheCtrl, "dc: got %s %lld\n", pkt->cmdString(), pkt->getAddr()); + + panic_if(pkt->cacheResponding(), "Should not see packets where cache " + "is responding"); + + panic_if(!(pkt->isRead() || pkt->isWrite()), + "Should only see read and writes at memory controller\n"); + + // Calc avg gap between requests + if (prevArrival != 0) { + dcstats.totGap += curTick() - prevArrival; + } + prevArrival = curTick(); + + // Validate that pkt's address maps to the dram + assert(dram && dram->getAddrRange().contains(pkt->getAddr())); + + + // Find out how many memory packets a pkt translates to + // If the burst size is equal or larger than the pkt size, then a pkt + // translates to only one memory packet. Otherwise, a pkt translates to + // multiple memory packets + + if ( + ((pktFarMemWrite.size() >= (orbMaxSize/2)) || (!pktFarMemWrite.empty() && pktFarMemRead.empty())) && + !waitingForRetryReqPort + ) { + if (!farMemWriteEvent.scheduled() && !farMemReadEvent.scheduled()) { + sendFarRdReq = false; + schedule(farMemWriteEvent, curTick()+1000); + } + } + + Addr addr = pkt->getAddr(); + + unsigned burst_size = dram->bytesPerBurst(); + + unsigned size = std::min((addr | (burst_size - 1)) + 1, + addr + pkt->getSize()) - addr; + + // check merging for writes + if (pkt->isWrite()) { + + dcstats.writePktSize[ceilLog2(size)]++; + dcstats.writeBursts++; + dcstats.requestorWriteAccesses[pkt->requestorId()]++; + + assert(pkt->getSize() != 0); + + bool merged = isInWriteQueue.find(pkt->getAddr()) != + isInWriteQueue.end(); + + if (merged) { + + dcstats.mergedWrBursts++; + + accessAndRespond(pkt, frontendLatency, dram); + + return true; + } + } + + // check forwarding for reads + bool foundInORB = false; + bool foundInCRB = false; + bool foundInFarMemWrite = false; + + if (pkt->isRead()) { + + assert(pkt->getSize() != 0); + + if (isInWriteQueue.find(pkt->getAddr()) != isInWriteQueue.end()) { + + if (!ORB.empty()) { + for (const auto& e : ORB) { + + // check if the read is subsumed in the write queue + // packet we are looking at + if (e.second->validEntry && + e.second->owPkt->isWrite() && + e.second->owPkt->getAddr() <= addr && + ((addr + size) <= + (e.second->owPkt->getAddr() + + e.second->owPkt->getSize()))) { + + foundInORB = true; + + dcstats.servicedByWrQ++; + + dcstats.bytesReadWrQ += burst_size; + + break; + } + } + } + + if (!foundInORB && !CRB.empty()) { + for (const auto& e : CRB) { + + // check if the read is subsumed in the write queue + // packet we are looking at + if (e.second->isWrite() && + e.second->getAddr() <= addr && + ((addr + size) <= + (e.second->getAddr() + e.second->getSize()))) { + + foundInCRB = true; + + dcstats.servicedByWrQ++; + + dcstats.bytesReadWrQ += burst_size; + + break; + } + } + } + + if (!foundInORB && !foundInCRB && !pktFarMemWrite.empty()) { + for (const auto& e : pktFarMemWrite) { + // check if the read is subsumed in the write queue + // packet we are looking at + if (e.second->getAddr() <= addr && + ((addr + size) <= + (e.second->getAddr() + + e.second->getSize()))) { + + foundInFarMemWrite = true; + + dcstats.servicedByWrQ++; + + dcstats.bytesReadWrQ += burst_size; + + break; + } + } + } + } + + if (foundInORB || foundInCRB || foundInFarMemWrite) { + dcstats.readPktSize[ceilLog2(size)]++; + dcstats.readBursts++; + dcstats.requestorReadAccesses[pkt->requestorId()]++; + + accessAndRespond(pkt, frontendLatency, dram); + + return true; + } + } + + // process conflicting requests. + // conflicts are checked only based on Index of DRAM cache + if (checkConflictInDramCache(pkt)) { + + dcstats.totNumConf++; + + if (CRB.size()>=crbMaxSize) { + + dcstats.totNumConfBufFull++; + + retry = true; + + if (pkt->isRead()) { + dcstats.numRdRetry++; + } + else { + dcstats.numWrRetry++; + } + return false; + } + + CRB.push_back(std::make_pair(curTick(), pkt)); + + if (pkt->isWrite()) { + isInWriteQueue.insert(pkt->getAddr()); + } + + if (CRB.size() > maxConf) { + maxConf = CRB.size(); + dcstats.maxNumConf = CRB.size(); + } + + return true; + } + // check if ORB or FMWB is full and set retry + if (pktFarMemWrite.size()>= (orbMaxSize / 2)) { + DPRINTF(DCacheCtrl, "FMWBfull: %lld\n", pkt->getAddr()); + retryFMW = true; + + if (pkt->isRead()) { + dcstats.numRdRetry++; + } + else { + dcstats.numWrRetry++; + } + return false; + } + + if (ORB.size() >= orbMaxSize) { + DPRINTF(DCacheCtrl, "ORBfull: addr %lld\n", pkt->getAddr()); + dcstats.totNumORBFull++; + retry = true; + + if (pkt->isRead()) { + dcstats.numRdRetry++; + } + else { + dcstats.numWrRetry++; + } + return false; + } + + // if none of the above cases happens, + // then add the pkt to the outstanding requests buffer + handleRequestorPkt(pkt); + + if (pkt->isWrite()) { + isInWriteQueue.insert(pkt->getAddr()); + } + + pktLocMemRead[0].push_back(ORB.at(pkt->getAddr())->dccPkt); + + dcstats.avgLocRdQLenEnq = pktLocMemRead[0].size() + addrLocRdRespReady.size(); + + if (!stallRds && !rescheduleLocRead && !locMemReadEvent.scheduled()) { + schedule(locMemReadEvent, std::max(dram->nextReqTime, curTick())); + } + + ORB.at(pkt->getAddr())->locRdEntered = curTick(); + + if (pktLocMemRead[0].size() > maxLocRdEvQ) { + maxLocRdEvQ = pktLocMemRead[0].size(); + dcstats.maxLocRdEvQ = pktLocMemRead[0].size(); + } + + DPRINTF(DCacheCtrl, "DRAM cache controller accepted packet %lld\n", pkt->getAddr()); + + return true; +} + +void +DCacheCtrl::processLocMemReadEvent() +{ + if (stallRds || dram->isBusy(false, false) || rescheduleLocRead) { + // it's possible that dram is busy and we return here before + // reching to read_found check to set rescheduleLocRead + if (dram->isBusy(false, false)) { + rescheduleLocRead = true; + } + return; + } + + assert(!pktLocMemRead[0].empty()); + + MemPacketQueue::iterator to_read; + + bool read_found = false; + + bool switched_cmd_type = (busState == DCacheCtrl::WRITE); + + for (auto queue = pktLocMemRead.rbegin(); + queue != pktLocMemRead.rend(); ++queue) { + to_read = MemCtrl::chooseNext((*queue), switched_cmd_type ? + minWriteToReadDataGap() : 0, dram); + // to_read = MemCtrl::chooseNext((*queue), 0, dram); + if (to_read != queue->end()) { + // candidate read found + read_found = true; + break; + } + } + + if (!read_found) { + DPRINTF(DCacheCtrl, " !read_found LocRd: %lld\n", curTick()); + // Probably dram is refreshing. + // Simply return, let the dram device + // reschedule again once refresh is done. + rescheduleLocRead = true; + return; + } + + auto orbEntry = ORB.at((*to_read)->getAddr()); + + DPRINTF(DCacheCtrl, "LocRd: %lld\n", orbEntry->owPkt->getAddr()); + + // sanity check for the chosen packet + assert(orbEntry->validEntry); + assert(orbEntry->dccPkt->isDram()); + assert(orbEntry->dccPkt->isRead()); + assert(orbEntry->state == locMemRead); + assert(!orbEntry->issued); + + busState = DCacheCtrl::READ; + + if (switched_cmd_type) { + dcstats.wrToRdTurnAround++; + } + + Tick cmdAt = MemCtrl::doBurstAccess(orbEntry->dccPkt, dram); + dcstats.QTLocRd += ((cmdAt-orbEntry->locRdEntered)/1000); + + // sanity check + //assert(orbEntry->dccPkt->size <= dram->bytesPerBurst()); + assert(orbEntry->dccPkt->readyTime >= curTick()); + + if (orbEntry->owPkt->isRead() && orbEntry->isHit) { + logResponse(DCacheCtrl::READ, + orbEntry->dccPkt->requestorId(), + orbEntry->dccPkt->qosValue(), + orbEntry->owPkt->getAddr(), 1, + orbEntry->dccPkt->readyTime - orbEntry->dccPkt->entryTime); + } + + if (addrLocRdRespReady.empty()) { + assert(!locMemReadRespEvent.scheduled()); + schedule(locMemReadRespEvent, orbEntry->dccPkt->readyTime); + } + else { + assert(ORB.at(addrLocRdRespReady.back())->dccPkt->readyTime + <= orbEntry->dccPkt->readyTime); + + assert(locMemReadRespEvent.scheduled()); + } + + addrLocRdRespReady.push_back(orbEntry->owPkt->getAddr()); + + if (addrLocRdRespReady.size() > maxLocRdRespEvQ) { + maxLocRdRespEvQ = addrLocRdRespReady.size(); + dcstats.maxLocRdRespEvQ = addrLocRdRespReady.size(); + } + + // keep the state as it is, no transition + orbEntry->state = locMemRead; + // mark the entry as issued (while in locMemRead) + orbEntry->issued = true; + // record the tick it was issued + orbEntry->locRdIssued = curTick(); + orbEntry->locRdExit = orbEntry->dccPkt->readyTime; + + pktLocMemRead[0].erase(to_read); + + unsigned rdsNum = pktLocMemRead[0].size(); + unsigned wrsNum = pktLocMemWrite[0].size(); + + if ((rdsNum == 0 && wrsNum != 0) || + (wrsNum >= writeHighThreshold)) { + stallRds = true; + if (!locMemWriteEvent.scheduled()) { + schedule(locMemWriteEvent, std::max(dram->nextReqTime, curTick())); + } + return; + } + + if (!pktLocMemRead[0].empty() && !locMemReadEvent.scheduled()) { + //assert(!locMemReadEvent.scheduled()); + schedule(locMemReadEvent, std::max(dram->nextReqTime, curTick())); + } +} + +void +DCacheCtrl::processLocMemReadRespEvent() +{ + assert(!addrLocRdRespReady.empty()); + + reqBufferEntry* orbEntry = ORB.at(addrLocRdRespReady.front()); + + DPRINTF(DCacheCtrl, "LocRdResp: %lld\n", orbEntry->owPkt->getAddr()); + + // A series of sanity check + assert(orbEntry->validEntry); + assert(orbEntry->dccPkt->isDram()); + assert(orbEntry->dccPkt->isRead()); + assert(orbEntry->state == locMemRead); + assert(orbEntry->issued); + assert(orbEntry->dccPkt->readyTime == curTick()); + + orbEntry->issued = false; + + if (orbEntry->handleDirtyLine) { + handleDirtyCacheLine(orbEntry); + } + + // A flag which is used for retrying read requests + // in case one slot in ORB becomes available here + // (happens only for read hits) + bool canRetry = false; + + dram->respondEvent(orbEntry->dccPkt->rank); + + // Read Hit + if (orbEntry->owPkt->isRead() && + orbEntry->dccPkt->isDram() && + orbEntry->isHit) { + + DPRINTF(DCacheCtrl, "Read Hit: %lld\n", orbEntry->owPkt->getAddr()); + + + PacketPtr copyOwPkt = new Packet(orbEntry->owPkt, + false, + orbEntry->owPkt->isRead()); + + accessAndRespond(orbEntry->owPkt, + frontendLatency + backendLatency, + dram); + + ORB.at(copyOwPkt->getAddr()) = new reqBufferEntry(orbEntry->validEntry, + orbEntry->arrivalTick, + orbEntry->tagDC, + orbEntry->indexDC, + copyOwPkt, + orbEntry->dccPkt, + orbEntry->state, + orbEntry->issued, + orbEntry->isHit, + orbEntry->conflict, + orbEntry->dirtyLineAddr, + orbEntry->handleDirtyLine, + orbEntry->locRdEntered, + orbEntry->locRdIssued, + orbEntry->locRdExit, + orbEntry->locWrEntered, + orbEntry->locWrExit, + orbEntry->farRdEntered, + orbEntry->farRdIssued, + orbEntry->farRdRecvd, + orbEntry->farRdExit); + + delete orbEntry; + + orbEntry = ORB.at(addrLocRdRespReady.front()); + } + + // Write (Hit & Miss) + if (orbEntry->owPkt->isWrite() && + orbEntry->dccPkt->isRead() && + orbEntry->dccPkt->isDram()) { + // This is a write request which has done a tag check. + // Delete its dcc packet which is read and create + // a new one which is write. + delete orbEntry->dccPkt; + + orbEntry->dccPkt = dram->decodePacket(orbEntry->owPkt, + orbEntry->owPkt->getAddr(), + orbEntry->owPkt->getSize(), + false); + + orbEntry->dccPkt->entryTime = orbEntry->arrivalTick; + + // pass the second argument "false" to + // indicate a write access to DRAM + dram->setupRank(orbEntry->dccPkt->rank, false); + + //** transition to locMemWrite + orbEntry->state = locMemWrite; + orbEntry->issued = false; + orbEntry->locWrEntered = curTick(); + + pktLocMemWrite[0].push_back(orbEntry->dccPkt); + + dcstats.avgLocWrQLenEnq = pktLocMemWrite[0].size(); + + unsigned rdsNum = pktLocMemRead[0].size(); + unsigned wrsNum = pktLocMemWrite[0].size(); + + if ((rdsNum == 0 && wrsNum != 0) || + (wrsNum >= writeHighThreshold)) { + // stall reads, switch to writes + stallRds = true; + if (!locMemWriteEvent.scheduled() && !rescheduleLocWrite) { + schedule(locMemWriteEvent, + std::max(dram->nextReqTime, curTick())); + } + } + + if (pktLocMemWrite[0].size() > maxLocWrEvQ) { + maxLocWrEvQ = pktLocMemWrite[0].size(); + dcstats.maxLocWrEvQ = pktLocMemWrite[0].size(); + } + } + + // Read Miss + if (orbEntry->owPkt->isRead() && + orbEntry->dccPkt->isRead() && + orbEntry->dccPkt->isDram() && + !orbEntry->isHit) { + DPRINTF(DCacheCtrl, "Read Miss: %lld\n", orbEntry->owPkt->getAddr()); + // initiate a read from far memory + // delete the current dcc pkt which is for read from local memory + delete orbEntry->dccPkt; + + // orbEntry->dccPkt->entryTime = orbEntry->arrivalTick; + // orbEntry->dccPkt->readyTime = MaxTick; + //** transition to farMemRead + orbEntry->state = farMemRead; + orbEntry->issued = false; + orbEntry->farRdEntered = curTick(); + + // if (pktFarMemRead.empty() && sendFarRdReq) { + // assert(!farMemReadEvent.scheduled()); + // schedule(farMemReadEvent, std::max(dram->nextReqTime, curTick())); + // } else { + // assert(farMemReadEvent.scheduled() || !sendFarRdReq || waitingForRetryReqPort); + // } + + PacketPtr copyOwPkt_2 = new Packet(orbEntry->owPkt, + false, + orbEntry->owPkt->isRead()); + + pktFarMemRead.push_back(copyOwPkt_2); + + dcstats.avgFarRdQLenEnq = countFarRdInORB(); + + if (pktFarMemRead.size() > maxFarRdEvQ) { + maxFarRdEvQ = pktFarMemRead.size(); + dcstats.maxFarRdEvQ = pktFarMemRead.size(); + } + + if (!farMemReadEvent.scheduled() && sendFarRdReq && !waitingForRetryReqPort) { + schedule(farMemReadEvent, std::max(dram->nextReqTime, curTick())); + } + + if (pktFarMemRead.size() > maxFarRdEvQ) { + maxFarRdEvQ = pktFarMemRead.size(); + dcstats.maxFarRdEvQ = pktFarMemRead.size(); + } + } + + // if (orbEntry->handleDirtyLine) { + // numDirtyLinesInDrRdRespQ--; + // } + + addrLocRdRespReady.pop_front(); + + if (!addrLocRdRespReady.empty()) { + assert(ORB.at(addrLocRdRespReady.front())->dccPkt->readyTime + >= curTick()); + assert(!locMemReadRespEvent.scheduled()); + schedule(locMemReadRespEvent, + ORB.at(addrLocRdRespReady.front())->dccPkt->readyTime); + } else { + + unsigned rdsNum = pktLocMemRead[0].size(); + unsigned wrsNum = pktLocMemWrite[0].size(); + + // if there is nothing left in any queue, signal a drain + if (drainState() == DrainState::Draining && + !wrsNum && !rdsNum && + allIntfDrained()) { + DPRINTF(Drain, "Controller done draining\n"); + signalDrainDone(); + } else /*if (orbEntry->owPkt->isRead() && + orbEntry->dccPkt->isDram() && + orbEntry->isHit)*/ { + // check the refresh state and kick the refresh event loop + // into action again if banks already closed and just waiting + // for read to complete + dram->checkRefreshState(orbEntry->dccPkt->rank); + } + } + + if (orbEntry->owPkt->isRead() && + orbEntry->dccPkt->isDram() && + orbEntry->isHit) { + DPRINTF(DCacheCtrl, "resu conf: %lld\n", orbEntry->owPkt->getAddr()); + // Remove the request from the ORB and + // bring in a conflicting req waiting + // in the CRB, if any. + canRetry = !resumeConflictingReq(orbEntry); + } + + if (retry && canRetry) { + retry = false; + port.sendRetryReq(); + } +} + +void +DCacheCtrl::processLocMemWriteEvent() +{ + if (dram->isBusy(false, false) || rescheduleLocWrite) { + // it's possible that dram is busy and we reach here before + // reching to write_found check to set rescheduleLocWrite + if (dram->isBusy(false, false)) { + rescheduleLocWrite = true; + } + return; + } + + assert(stallRds); + + assert(!pktLocMemWrite[0].empty()); + + MemPacketQueue::iterator to_write; + + bool write_found = false; + + bool switched_cmd_type = (busState == DCacheCtrl::READ); + + if (switched_cmd_type) { + dcstats.rdToWrTurnAround++; + } + + for (auto queue = pktLocMemWrite.rbegin(); + queue != pktLocMemWrite.rend(); ++queue) { + to_write = chooseNext((*queue), switched_cmd_type ? + minReadToWriteDataGap() : 0, dram); + // to_write = chooseNext((*queue), 0, dram); + if (to_write != queue->end()) { + // candidate write found + write_found = true; + break; + } + } + + if (!write_found) { + // Probably dram is refreshing. + // Simply return, let the dram device + // reschedule again once refresh is done. + rescheduleLocWrite = true; + return; + } + + auto orbEntry = ORB.at((*to_write)->getAddr()); + + DPRINTF(DCacheCtrl, "LocWr: %lld\n", orbEntry->owPkt->getAddr()); + + bool canRetry = false; + + assert(orbEntry->validEntry); + + if (orbEntry->owPkt->isRead()) { + assert(!orbEntry->isHit); + } + assert(orbEntry->dccPkt->isDram()); + assert(orbEntry->state == locMemWrite); + //assert(orbEntry->dccPkt->size <= dram->bytesPerBurst()); + + busState = DCacheCtrl::WRITE; + + Tick cmdAt = MemCtrl::doBurstAccess(orbEntry->dccPkt, dram); + dcstats.QTLocWr += ((cmdAt - orbEntry->locWrEntered)/1000); + + orbEntry->locWrExit = orbEntry->dccPkt->readyTime; + + locWrCounter++; + + if (orbEntry->owPkt->isWrite()) { + // log the response + logResponse(DCacheCtrl::WRITE, + orbEntry->dccPkt->requestorId(), + orbEntry->dccPkt->qosValue(), + orbEntry->owPkt->getAddr(), + 1, + orbEntry->dccPkt->readyTime - orbEntry->arrivalTick); + } + + // Remove the request from the ORB and + // bring in a conflicting req waiting + // in the CRB, if any. + canRetry = !resumeConflictingReq(orbEntry); + + pktLocMemWrite[0].erase(to_write); + + if (retry && canRetry) { + retry = false; + port.sendRetryReq(); + } + + + if (locWrCounter < minLocWrPerSwitch && !pktLocMemWrite[0].empty() + // && !pktLocMemRead[0].empty() + ) { + // assert(!locMemWriteEvent.scheduled()); + stallRds = true; + if (!locMemWriteEvent.scheduled()) { + schedule(locMemWriteEvent, std::max(dram->nextReqTime, curTick())); + } + return; + } + else if (pktLocMemRead[0].empty() && !pktLocMemWrite[0].empty()) { + // assert(!locMemWriteEvent.scheduled()); + stallRds = true; + locWrCounter = 0; + if (!locMemWriteEvent.scheduled()) { + schedule(locMemWriteEvent, std::max(dram->nextReqTime, curTick())); + } + return; + } + else if (!pktLocMemRead[0].empty() && (pktLocMemWrite[0].empty()||locWrCounter>=(minLocWrPerSwitch))) { + // assert(!locMemReadEvent.scheduled()); + stallRds = false; + locWrCounter = 0; + if (!locMemReadEvent.scheduled()) { + schedule(locMemReadEvent, std::max(dram->nextReqTime, curTick())); + } + return; + } + else if (pktLocMemRead[0].empty() && pktLocMemWrite[0].empty()) { + stallRds = false; + locWrCounter = 0; + } +} + + +void +DCacheCtrl::processFarMemReadEvent() +{ + if (!sendFarRdReq || waitingForRetryReqPort) { + return; + } + + assert(!pktFarMemRead.empty()); + + auto rdPkt = pktFarMemRead.front(); + if (reqPort.sendTimingReq(rdPkt)) { + DPRINTF(DCacheCtrl, "FarRdSent: %lld\n", rdPkt->getAddr()); + pktFarMemRead.pop_front(); + dcstats.sentRdPort++; + ORB.at(rdPkt->getAddr())->farRdIssued = curTick(); + // delete rdPkt; + } else { + DPRINTF(DCacheCtrl, "FarRdRetry: %lld\n", rdPkt->getAddr()); + waitingForRetryReqPort = true; + dcstats.failedRdPort++; + return; + } + + if ((pktFarMemWrite.size() >= (orbMaxSize/2)) || + (!pktFarMemWrite.empty() && pktFarMemRead.empty())) { + + sendFarRdReq = false; + if (!farMemWriteEvent.scheduled()) { + schedule(farMemWriteEvent, curTick()+1000); + } + + return; + } + + if (!pktFarMemRead.empty() && !farMemReadEvent.scheduled()) { + + sendFarRdReq = true; + + schedule(farMemReadEvent, curTick()+1000); + + return; + } +} + +void +DCacheCtrl::processFarMemReadRespEvent() +{ + assert(!pktFarMemReadResp.empty()); + + auto orbEntry = ORB.at(pktFarMemReadResp.front()->getAddr()); + + DPRINTF(DCacheCtrl, "FarMemReadRespEvent %lld\n", orbEntry->owPkt->getAddr()); + + // sanity check for the chosen packet + assert(orbEntry->validEntry); + assert(orbEntry->state == farMemRead); + //assert(orbEntry->issued); + assert(orbEntry->owPkt->isRead()); + assert(!orbEntry->isHit); + + // Read miss from dram cache, now is available + // to send the response back to requestor + if (orbEntry->owPkt->isRead() && !orbEntry->isHit) { + + logResponse(DCacheCtrl::READ, + orbEntry->owPkt->requestorId(), + orbEntry->owPkt->qosValue(), + orbEntry->owPkt->getAddr(), 1, + curTick() - orbEntry->arrivalTick); + + PacketPtr copyOwPkt = new Packet(orbEntry->owPkt, + false, + orbEntry->owPkt->isRead()); + accessAndRespond(orbEntry->owPkt, + frontendLatency + backendLatency, + dram); + ORB.at(copyOwPkt->getAddr()) = new reqBufferEntry(orbEntry->validEntry, + orbEntry->arrivalTick, + orbEntry->tagDC, + orbEntry->indexDC, + copyOwPkt, + orbEntry->dccPkt, + orbEntry->state, + orbEntry->issued, + orbEntry->isHit, + orbEntry->conflict, + orbEntry->dirtyLineAddr, + orbEntry->handleDirtyLine, + orbEntry->locRdEntered, + orbEntry->locRdIssued, + orbEntry->locRdExit, + orbEntry->locWrEntered, + orbEntry->locWrExit, + orbEntry->farRdEntered, + orbEntry->farRdIssued, + orbEntry->farRdRecvd, + orbEntry->farRdExit); + delete orbEntry; + orbEntry = ORB.at(pktFarMemReadResp.front()->getAddr()); + + } + + orbEntry->dccPkt = dram->decodePacket(pktFarMemReadResp.front(), + pktFarMemReadResp.front()->getAddr(), + pktFarMemReadResp.front()->getSize(), + false); + orbEntry->dccPkt->entryTime = orbEntry->arrivalTick; + + // pass the second argument "false" to + // indicate a write access to DRAM + dram->setupRank(orbEntry->dccPkt->rank, false); + + //** transition to locMemWrite + orbEntry->state = locMemWrite; + orbEntry->issued = false; + orbEntry->farRdExit = curTick(); + orbEntry->locWrEntered = curTick(); + + pktLocMemWrite[0].push_back(orbEntry->dccPkt); + + dcstats.avgLocWrQLenEnq = pktLocMemWrite[0].size(); + + unsigned rdsNum = pktLocMemRead[0].size(); + unsigned wrsNum = pktLocMemWrite[0].size(); + + if ((rdsNum == 0 && wrsNum != 0) || + (wrsNum >= writeHighThreshold)) { + // stall reads, switch to writes + stallRds = true; + if (!locMemWriteEvent.scheduled() && !rescheduleLocWrite) { + schedule(locMemWriteEvent, + std::max(dram->nextReqTime, curTick())); + } + } + + if (pktLocMemWrite[0].size() > maxLocWrEvQ) { + maxLocWrEvQ = pktLocMemWrite[0].size(); + dcstats.maxLocWrEvQ = pktLocMemWrite[0].size(); + } + + delete pktFarMemReadResp.front(); + pktFarMemReadResp.pop_front(); + + if (!pktFarMemReadResp.empty() && !farMemReadRespEvent.scheduled()) { + schedule(farMemReadRespEvent, curTick()); + } + + /*std::cout << curTick() << " : " << + ORB.size() << ", " << + CRB.size() << ", " << + pktLocMemRead[0].size() << ", " << + pktLocMemWrite[0].size() << ", " << + pktFarMemRead.size() << ", " << + pktFarMemWrite.size() << ", " << + addrLocRdRespReady.size() << ", " << + pktFarMemReadResp.size() << " // " << + locMemReadEvent.scheduled() << ", " << + locMemWriteEvent.scheduled() << ", " << + farMemReadEvent.scheduled() << ", " << + farMemWriteEvent.scheduled() << ", " << + locMemReadRespEvent.scheduled() << ", " << + farMemReadRespEvent.scheduled() << " // " << + stallRds << ", " << + rescheduleLocRead << ", " << + rescheduleLocWrite << " // " << dram->isBusy(false, false) << "\n";*/ +} + +void +DCacheCtrl::processFarMemWriteEvent() +{ + assert(!pktFarMemWrite.empty()); + assert(!sendFarRdReq); + assert(!waitingForRetryReqPort); + auto wrPkt = pktFarMemWrite.front().second; + if (reqPort.sendTimingReq(wrPkt)) { + DPRINTF(DCacheCtrl, "FarWrSent: %lld\n", wrPkt->getAddr()); + dcstats.totTimeFarWrtoSend += ((curTick() - pktFarMemWrite.front().first)/1000); + pktFarMemWrite.pop_front(); + farWrCounter++; + dcstats.sentWrPort++; + } else { + DPRINTF(DCacheCtrl, "FarWrRetry: %lld\n", wrPkt->getAddr()); + waitingForRetryReqPort = true; + dcstats.failedWrPort++; + return; + } + + if (retryFMW && pktFarMemWrite.size()< (orbMaxSize / 2)) { + retryFMW = false; + port.sendRetryReq(); + } + + if (!pktFarMemWrite.empty() && + (farWrCounter < (orbMaxSize/8) || pktFarMemRead.empty())) { + + sendFarRdReq = false; + if (!farMemWriteEvent.scheduled()) { + schedule(farMemWriteEvent, curTick()+1000); + } + + return; + } + + if (farWrCounter >= (orbMaxSize/8) && !pktFarMemRead.empty()) { + + sendFarRdReq = true; + if (!farMemReadEvent.scheduled()) { + schedule(farMemReadEvent, curTick()+1000); + } + + return; + } +} + +void +DCacheCtrl::recvReqRetry() +{ + assert(waitingForRetryReqPort); + waitingForRetryReqPort = false; + + if (sendFarRdReq) { + if (!farMemReadEvent.scheduled()) { + schedule(farMemReadEvent, curTick()); + } + return; + } else { + if (!farMemWriteEvent.scheduled()) { + schedule(farMemWriteEvent, curTick()); + } + return; + } +} + +bool +DCacheCtrl::recvTimingResp(PacketPtr pkt) // This is equivalant of farMemReadRespEvent +{ + DPRINTF(DCacheCtrl, "recvTimingResp %lld, %s\n", pkt->getAddr(), pkt->cmdString()); + + if (pkt->isRead()) { + pktFarMemReadResp.push_back(pkt); + if (pktFarMemReadResp.size() > maxFarRdRespEvQ) { + maxFarRdRespEvQ = pktFarMemReadResp.size(); + dcstats.maxFarRdRespEvQ = pktFarMemReadResp.size(); + } + + if (!farMemReadRespEvent.scheduled()) { + schedule(farMemReadRespEvent, curTick()); + } + + ORB.at(pkt->getAddr())->farRdRecvd = curTick(); + dcstats.recvdRdPort++; + } else{ + assert(pkt->isWrite()); + + delete pkt; + } + + return true; +} + +bool +DCacheCtrl::requestEventScheduled(uint8_t pseudo_channel) const +{ + assert(pseudo_channel == 0); + return (locMemReadEvent.scheduled() || locMemWriteEvent.scheduled()); +} + +void +DCacheCtrl::restartScheduler(Tick tick, uint8_t pseudo_channel) +{ + assert(pseudo_channel == 0); + if (!stallRds) { + //assert(rescheduleLocRead); + rescheduleLocRead = false; + if (!locMemReadEvent.scheduled() && !pktLocMemRead[0].empty()) { + schedule(locMemReadEvent, tick); + } + return; + } else { + //assert(rescheduleLocWrite); + rescheduleLocWrite = false; + if (!locMemWriteEvent.scheduled() && !pktLocMemWrite[0].empty()) { + schedule(locMemWriteEvent, tick); + } + return; + } + +} + +Port & +DCacheCtrl::getPort(const std::string &if_name, PortID idx) +{ + panic_if(idx != InvalidPortID, "This object doesn't support vector ports"); + + // This is the name from the Python SimObject declaration (SimpleMemobj.py) + if (if_name == "port") { + return port; + } else if (if_name == "req_port") { + return reqPort; + } else { + // pass it along to our super class + return qos::MemCtrl::getPort(if_name, idx); + } +} + + +///////////////////////////////////////////////////////////////////////////////////////// + +bool +DCacheCtrl::checkConflictInDramCache(PacketPtr pkt) +{ + unsigned indexDC = returnIndexDC(pkt->getAddr(), pkt->getSize()); + + for (auto e = ORB.begin(); e != ORB.end(); ++e) { + if (indexDC == e->second->indexDC && e->second->validEntry) { + + e->second->conflict = true; + + return true; + } + } + return false; +} + +Addr +DCacheCtrl::returnIndexDC(Addr request_addr, unsigned size) +{ + int index_bits = ceilLog2(dramCacheSize/blockSize); + int block_bits = ceilLog2(size); + return bits(request_addr, block_bits + index_bits-1, block_bits); +} + +Addr +DCacheCtrl::returnTagDC(Addr request_addr, unsigned size) +{ + int index_bits = ceilLog2(dramCacheSize/blockSize); + int block_bits = ceilLog2(size); + return bits(request_addr, addrSize-1, (index_bits+block_bits)); +} + +void +DCacheCtrl::checkHitOrMiss(reqBufferEntry* orbEntry) +{ + // access the tagMetadataStore data structure to + // check if it's hit or miss + + // orbEntry->isHit = + // tagMetadataStore.at(orbEntry->indexDC).validLine && + // (orbEntry->tagDC == tagMetadataStore.at(orbEntry->indexDC).tagDC); + + // if (!tagMetadataStore.at(orbEntry->indexDC).validLine && + // !orbEntry->isHit) { + // dcstats.numColdMisses++; + // } else if (tagMetadataStore.at(orbEntry->indexDC).validLine && + // !orbEntry->isHit) { + // dcstats.numHotMisses++; + // } + + // always hit + // orbEntry->isHit = true; + + // always miss + // orbEntry->isHit = false; + + orbEntry->isHit = alwaysHit; +} + +bool +DCacheCtrl::checkDirty(Addr addr) +{ + // Addr index = returnIndexDC(addr, blockSize); + // return (tagMetadataStore.at(index).validLine && + // tagMetadataStore.at(index).dirtyLine); + + + // always dirty + // return true; + + // always clean + // return false; + + return alwaysDirty; +} + +void +DCacheCtrl::handleRequestorPkt(PacketPtr pkt) +{ + // Set is_read and is_dram to + // "true", to do initial DRAM Read + MemPacket* dcc_pkt = dram->decodePacket(pkt, + pkt->getAddr(), + pkt->getSize(), + true); + + // pass the second argument "true", for + // initial DRAM Read for all the received packets + dram->setupRank(dcc_pkt->rank, true); + + reqBufferEntry* orbEntry = new reqBufferEntry( + true, curTick(), + returnTagDC(pkt->getAddr(), pkt->getSize()), + returnIndexDC(pkt->getAddr(), pkt->getSize()), + pkt, dcc_pkt, + locMemRead, false, + false, false, + -1, false, + curTick(), MaxTick, MaxTick, + MaxTick, MaxTick, + MaxTick, MaxTick, MaxTick, MaxTick + ); + + ORB.emplace(pkt->getAddr(), orbEntry); + + // dcstats.avgORBLen = ORB.size(); + dcstats.avgORBLen = ORB.size(); + dcstats.avgLocRdQLenStrt = countLocRdInORB(); + dcstats.avgFarRdQLenStrt = countFarRdInORB(); + dcstats.avgLocWrQLenStrt = countLocWrInORB(); + dcstats.avgFarWrQLenStrt = countFarWr(); + + if (pkt->isRead()) { + logRequest(DCacheCtrl::READ, pkt->requestorId(), pkt->qosValue(), + pkt->getAddr(), 1); + } else { + //copying the packet + PacketPtr copyOwPkt = new Packet(pkt, false, pkt->isRead()); + + accessAndRespond(pkt, frontendLatency, dram); + + ORB.at(copyOwPkt->getAddr()) = new reqBufferEntry(orbEntry->validEntry, + orbEntry->arrivalTick, + orbEntry->tagDC, + orbEntry->indexDC, + copyOwPkt, + orbEntry->dccPkt, + orbEntry->state, + orbEntry->issued, + orbEntry->isHit, + orbEntry->conflict, + orbEntry->dirtyLineAddr, + orbEntry->handleDirtyLine, + orbEntry->locRdEntered, + orbEntry->locRdIssued, + orbEntry->locRdExit, + orbEntry->locWrEntered, + orbEntry->locWrExit, + orbEntry->farRdEntered, + orbEntry->farRdIssued, + orbEntry->farRdRecvd, + orbEntry->farRdExit); + + delete orbEntry; + + orbEntry = ORB.at(copyOwPkt->getAddr()); + + logRequest(DCacheCtrl::WRITE, copyOwPkt->requestorId(), + copyOwPkt->qosValue(), + copyOwPkt->getAddr(), 1); + } + + checkHitOrMiss(orbEntry); + + if (checkDirty(orbEntry->owPkt->getAddr()) && !orbEntry->isHit) { + orbEntry->dirtyLineAddr = tagMetadataStore.at(orbEntry->indexDC).farMemAddr; + orbEntry->handleDirtyLine = true; + } + + // Updating Tag & Metadata + tagMetadataStore.at(orbEntry->indexDC).tagDC = orbEntry->tagDC; + tagMetadataStore.at(orbEntry->indexDC).indexDC = orbEntry->indexDC; + tagMetadataStore.at(orbEntry->indexDC).validLine = true; + + if (orbEntry->owPkt->isRead()) { + if (orbEntry->isHit) { + tagMetadataStore.at(orbEntry->indexDC).dirtyLine = + tagMetadataStore.at(orbEntry->indexDC).dirtyLine; + } else { + tagMetadataStore.at(orbEntry->indexDC).dirtyLine = false; + } + } else { + tagMetadataStore.at(orbEntry->indexDC).dirtyLine = true; + } + + tagMetadataStore.at(orbEntry->indexDC).farMemAddr = + orbEntry->owPkt->getAddr(); + + + Addr addr = pkt->getAddr(); + + unsigned burst_size = dram->bytesPerBurst(); + + unsigned size = std::min((addr | (burst_size - 1)) + 1, + addr + pkt->getSize()) - addr; + + if (pkt->isRead()) { + dcstats.readPktSize[ceilLog2(size)]++; + dcstats.readBursts++; + dcstats.requestorReadAccesses[pkt->requestorId()]++; + dcstats.readReqs++; + } else { + dcstats.writePktSize[ceilLog2(size)]++; + dcstats.writeBursts++; + dcstats.requestorWriteAccesses[pkt->requestorId()]++; + dcstats.writeReqs++; + } + + // std::cout << pkt->getAddr() << ", " << + // ORB.size() << ", " << + // countLocRdInORB() << ", " << + // countFarRdInORB() << ", " << + // countLocWrInORB() << ", " << + // countFarWr() << "\n"; +} + +bool +DCacheCtrl::resumeConflictingReq(reqBufferEntry* orbEntry) +{ + assert(orbEntry->dccPkt->readyTime != MaxTick); + assert(orbEntry->dccPkt->readyTime >= curTick()); + dcstats.totPktsServiceTime += ((orbEntry->locWrExit - orbEntry->arrivalTick)/1000); + dcstats.totPktsORBTime += ((curTick() - orbEntry->arrivalTick)/1000); + dcstats.totTimeFarRdtoSend += ((orbEntry->farRdIssued - orbEntry->farRdEntered)/1000); + dcstats.totTimeFarRdtoRecv += ((orbEntry->farRdRecvd - orbEntry->farRdIssued)/1000); + dcstats.totTimeInLocRead += ((orbEntry->locRdExit - orbEntry->locRdEntered)/1000); + dcstats.totTimeInLocWrite += ((orbEntry->locWrExit - orbEntry->locWrEntered)/1000); + dcstats.totTimeInFarRead += ((orbEntry->farRdExit - orbEntry->farRdEntered)/1000); + + // std::cout << (orbEntry->farRdRecvd-orbEntry->arrivalTick)/1000 << ", " << orbEntry->arrivalTick << ", " << orbEntry->farRdRecvd << "\n"; + + // std::cout << ((orbEntry->locWrExit - orbEntry->arrivalTick)/1000) << ", " << + // ((orbEntry->locRdExit - orbEntry->locRdEntered)/1000) << ", " << + // ((orbEntry->locWrExit - orbEntry->locWrEntered)/1000) << ", " << + // ((orbEntry->farRdExit - orbEntry->farRdEntered)/1000) << ", " << + // ((orbEntry->farRdIssued - orbEntry->farRdEntered)/1000) << ", " << + // ((orbEntry->farRdRecvd - orbEntry->farRdIssued)/1000) << "\n"; + + bool conflictFound = false; + + if (orbEntry->owPkt->isWrite()) { + isInWriteQueue.erase(orbEntry->owPkt->getAddr()); + } + + logStatsDcache(orbEntry); + + for (auto e = CRB.begin(); e != CRB.end(); ++e) { + + auto entry = *e; + + if (returnIndexDC(entry.second->getAddr(), entry.second->getSize()) + == orbEntry->indexDC) { + + conflictFound = true; + + Addr confAddr = entry.second->getAddr(); + + ORB.erase(orbEntry->owPkt->getAddr()); + + delete orbEntry->owPkt; + + delete orbEntry->dccPkt; + + delete orbEntry; + + handleRequestorPkt(entry.second); + + ORB.at(confAddr)->arrivalTick = entry.first; + + CRB.erase(e); + + checkConflictInCRB(ORB.at(confAddr)); + + pktLocMemRead[0].push_back(ORB.at(confAddr)->dccPkt); + + dcstats.avgLocRdQLenEnq = pktLocMemRead[0].size() + addrLocRdRespReady.size(); + + if (!stallRds && !rescheduleLocRead && !locMemReadEvent.scheduled()) { + schedule(locMemReadEvent, std::max(dram->nextReqTime, curTick())); + } + + if (pktLocMemRead[0].size() > maxLocRdEvQ) { + maxLocRdEvQ = pktLocMemRead[0].size(); + dcstats.maxLocRdEvQ = pktLocMemRead[0].size(); + } + + break; + } + + } + + if (!conflictFound) { + + ORB.erase(orbEntry->owPkt->getAddr()); + + delete orbEntry->owPkt; + + delete orbEntry->dccPkt; + + delete orbEntry; + } + + return conflictFound; +} + +void +DCacheCtrl::checkConflictInCRB(reqBufferEntry* orbEntry) +{ + for (auto e = CRB.begin(); e != CRB.end(); ++e) { + + auto entry = *e; + + if (returnIndexDC(entry.second->getAddr(),entry.second->getSize()) + == orbEntry->indexDC) { + orbEntry->conflict = true; + break; + } + } +} + +void +DCacheCtrl::logStatsDcache(reqBufferEntry* orbEntry) +{ + +} + +void +DCacheCtrl::handleDirtyCacheLine(reqBufferEntry* orbEntry) +{ + assert(orbEntry->dirtyLineAddr != -1); + + // create a new request packet + PacketPtr wbPkt = getPacket(orbEntry->dirtyLineAddr, + orbEntry->owPkt->getSize(), + MemCmd::WriteReq); + + pktFarMemWrite.push_back(std::make_pair(curTick(), wbPkt)); + + dcstats.avgFarWrQLenEnq = pktFarMemWrite.size(); + + if ( + ((pktFarMemWrite.size() >= (orbMaxSize/2)) || (!pktFarMemWrite.empty() && pktFarMemRead.empty())) && + !waitingForRetryReqPort + ) { + sendFarRdReq = false; + if (!farMemWriteEvent.scheduled()) { + schedule(farMemWriteEvent, curTick()); + } + } + + if (pktFarMemWrite.size() > maxFarWrEvQ) { + maxFarWrEvQ = pktFarMemWrite.size(); + dcstats.maxFarWrEvQ = pktFarMemWrite.size(); + } + + dcstats.numWrBacks++; +} + +PacketPtr +DCacheCtrl::getPacket(Addr addr, unsigned size, const MemCmd& cmd, + Request::FlagsType flags) +{ + // Create new request + RequestPtr req = std::make_shared(addr, size, flags, + 0); + // Dummy PC to have PC-based prefetchers latch on; get entropy into higher + // bits + req->setPC(((Addr)0) << 2); + + // Embed it in a packet + PacketPtr pkt = new Packet(req, cmd); + + uint8_t* pkt_data = new uint8_t[req->getSize()]; + pkt->dataDynamic(pkt_data); + + if (cmd.isWrite()) { + std::fill_n(pkt_data, req->getSize(), (uint8_t)0); + } + + return pkt; +} + + + +void +DCacheCtrl::dirtAdrGen() +{ + for (int i=0; istate == locMemRead) { + count++; + } + } + return count; +} + +unsigned +DCacheCtrl::countFarRdInORB() +{ + unsigned count =0; + for (auto i : ORB) { + if (i.second->state == farMemRead) { + count++; + } + } + return count; +} + +unsigned +DCacheCtrl::countLocWrInORB() +{ + unsigned count =0; + for (auto i : ORB) { + if (i.second->state == locMemWrite) { + count++; + } + } + return count; +} + +unsigned +DCacheCtrl::countFarWr() +{ + return pktFarMemWrite.size(); +} + + + +/* reqBufferEntry* +DCacheCtrl::makeOrbEntry(reqBufferEntry* orbEntry, PacketPtr copyOwPkt) +{ + return new reqBufferEntry(orbEntry->validEntry, + orbEntry->arrivalTick, + orbEntry->tagDC, + orbEntry->indexDC, + copyOwPkt, + orbEntry->dccPkt, + orbEntry->state, + orbEntry->issued, + orbEntry->isHit, + orbEntry->conflict, + orbEntry->dirtyLineAddr, + orbEntry->handleDirtyLine, + orbEntry->locRdEntered, + orbEntry->locRdIssued, + orbEntry->locRdExit, + orbEntry->locWrEntered, + orbEntry->locWrExit, + orbEntry->farRdEntered, + orbEntry->farRdIssued, + orbEntry->farRdRecvd, + orbEntry->farRdExit); +} */ + +} // namespace memory +} // namespace gem5 diff --git a/src/mem/dram_cache_ctrl.hh b/src/mem/dram_cache_ctrl.hh new file mode 100644 index 0000000000..20f4b4fde3 --- /dev/null +++ b/src/mem/dram_cache_ctrl.hh @@ -0,0 +1,465 @@ +/* + * Copyright (c) 2012-2020 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2013 Amin Farmahini-Farahani + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * DCacheCtrl declaration + */ + +#ifndef __DCACHE_CTRL_HH__ +#define __DCACHE_CTRL_HH__ + +#include "mem/mem_ctrl.hh" +#include "params/DCacheCtrl.hh" +#include + +namespace gem5 +{ + +namespace memory +{ +class DCacheCtrl : public MemCtrl +{ + private: + + class RequestPortDCache : public RequestPort + { + public: + + RequestPortDCache(const std::string& name, DCacheCtrl& _ctrl) + : RequestPort(name, &_ctrl), ctrl(_ctrl) + { } + + protected: + + void recvReqRetry() + { ctrl.recvReqRetry(); } + + bool recvTimingResp(PacketPtr pkt) + { return ctrl.recvTimingResp(pkt); } + + // to send timing requests it calls bool sendTimingReq(PacketPtr pkt); + + // void recvTimingSnoopReq(PacketPtr pkt) { } + + // void recvFunctionalSnoop(PacketPtr pkt) { } + + // Tick recvAtomicSnoop(PacketPtr pkt) { return 0; } + + private: + + DCacheCtrl& ctrl; + + }; + + /** + * Outcoming port, for a multi-ported controller add a crossbar + * in front of it + */ + RequestPortDCache reqPort; + + /** + * The following are basic design parameters of the unified + * DRAM cache controller, and are initialized based on parameter values. + * The rowsPerBank is determined based on the capacity, number of + * ranks and banks, the burst size, and the row buffer size. + */ + + unsigned long long dramCacheSize; + unsigned blockSize; + unsigned addrSize; + unsigned orbMaxSize; + unsigned orbSize; + unsigned crbMaxSize; + unsigned crbSize; + bool alwaysHit; + bool alwaysDirty; + + struct tagMetaStoreEntry + { + // DRAM cache related metadata + Addr tagDC; + Addr indexDC; + // constant to indicate that the cache line is valid + bool validLine = false; + // constant to indicate that the cache line is dirty + bool dirtyLine = false; + Addr farMemAddr; + }; + + /** A storage to keep the tag and metadata for the + * DRAM Cache entries. + */ + std::vector tagMetadataStore; + + /** Different states a packet can transition from one + * to the other while it's process in the DRAM Cache + * Controller. + */ + enum reqState + { + locMemRead, locMemWrite, + farMemRead, farMemWrite + }; + + /** + * A class for the entries of the + * outstanding request buffer (ORB). + */ + class reqBufferEntry + { + public: + + bool validEntry; + Tick arrivalTick; + + // DRAM cache related metadata + Addr tagDC; + Addr indexDC; + + // pointer to the outside world (ow) packet received from llc + const PacketPtr owPkt; + + // pointer to the dram cache controller (dcc) packet + MemPacket* dccPkt; + + reqState state; + bool issued; + bool isHit; + bool conflict; + + Addr dirtyLineAddr; + bool handleDirtyLine; + + // recording the tick when the req transitions into a new stats. + // The subtract between each two consecutive states entrance ticks, + // is the number of ticks the req spent in the proceeded state. + // The subtract between entrance and issuance ticks for each state, + // is the number of ticks for waiting time in that state. + Tick locRdEntered; + Tick locRdIssued; + Tick locRdExit; + Tick locWrEntered; + Tick locWrExit; + Tick farRdEntered; + Tick farRdIssued; + Tick farRdRecvd; + Tick farRdExit; + + reqBufferEntry( + bool _validEntry, Tick _arrivalTick, + Addr _tagDC, Addr _indexDC, + PacketPtr _owPkt, MemPacket* _dccPkt, + reqState _state, bool _issued, + bool _isHit, bool _conflict, + Addr _dirtyLineAddr, bool _handleDirtyLine, + Tick _locRdEntered, Tick _locRdIssued, Tick _locRdExit, + Tick _locWrEntered, Tick _locWrExit, + Tick _farRdEntered, Tick _farRdIssued, Tick _farRdRecvd, Tick _farRdExit) + : + validEntry(_validEntry), arrivalTick(_arrivalTick), + tagDC(_tagDC), indexDC(_indexDC), + owPkt( _owPkt), dccPkt(_dccPkt), + state(_state), issued(_issued), + isHit(_isHit), conflict(_conflict), + dirtyLineAddr(_dirtyLineAddr), handleDirtyLine(_handleDirtyLine), + locRdEntered(_locRdEntered), locRdIssued(_locRdIssued), locRdExit(_locRdExit), + locWrEntered(_locWrEntered), locWrExit(_locWrExit), + farRdEntered(_farRdEntered), farRdIssued(_farRdIssued), farRdRecvd(_farRdRecvd), farRdExit(_farRdExit) + { } + }; + + /** + * This is the outstanding request buffer (ORB) data + * structure, the main DS within the DRAM Cache + * Controller. The key is the address, for each key + * the map returns a reqBufferEntry which maintains + * the entire info related to that address while it's + * been processed in the DRAM Cache controller. + */ + std::map ORB; + + typedef std::pair timeReqPair; + /** + * This is the second important data structure + * within the DRAM cache controller which holds + * received packets that had conflict with some + * other address(s) in the DRAM Cache that they + * are still under process in the controller. + * Once thoes addresses are finished processing, + * Conflicting Requets Buffre (CRB) is consulted + * to see if any packet can be moved into the + * outstanding request buffer and start being + * processed in the DRAM cache controller. + */ + std::vector CRB; + + /** + * This is a unified retry flag for both reads and writes. + * It helps remember if we have to retry a request when available. + */ + bool retry; + bool retryFMW; + + // Counters and flags to keep track of read/write switchings + // stallRds: A flag to stop processing reads and switching to writes + bool stallRds; + bool sendFarRdReq; + bool waitingForRetryReqPort; + bool rescheduleLocRead; + bool rescheduleLocWrite; + float locWrDrainPerc; + unsigned minLocWrPerSwitch; + unsigned minFarWrPerSwitch; + unsigned locWrCounter; + unsigned farWrCounter; + + /** + * A queue for evicted dirty lines of DRAM cache, + * to be written back to the backing memory. + * These packets are not maintained in the ORB. + */ + std::deque pktFarMemWrite; + + // Maintenance Queues + std::vector pktLocMemRead; + std::vector pktLocMemWrite; + std::deque pktFarMemRead; + std::deque pktFarMemReadResp; + + std::deque addrLocRdRespReady; + //std::deque addrFarRdRespReady; + + // Maintenance variables + unsigned maxConf, maxLocRdEvQ, maxLocRdRespEvQ, + maxLocWrEvQ, maxFarRdEvQ, maxFarRdRespEvQ, maxFarWrEvQ; + + // needs be reimplemented + bool recvTimingReq(PacketPtr pkt) override; + + void accessAndRespond(PacketPtr pkt, Tick static_latency, + MemInterface* mem_intr) override; + + // events + void processLocMemReadEvent(); + EventFunctionWrapper locMemReadEvent; + + void processLocMemReadRespEvent(); + EventFunctionWrapper locMemReadRespEvent; + + void processLocMemWriteEvent(); + EventFunctionWrapper locMemWriteEvent; + + void processFarMemReadEvent(); + EventFunctionWrapper farMemReadEvent; + + void processFarMemReadRespEvent(); + EventFunctionWrapper farMemReadRespEvent; + + void processFarMemWriteEvent(); + EventFunctionWrapper farMemWriteEvent; + + // management functions + void printQSizes(); + void handleRequestorPkt(PacketPtr pkt); + void checkHitOrMiss(reqBufferEntry* orbEntry); + bool checkDirty(Addr addr); + void handleDirtyCacheLine(reqBufferEntry* orbEntry); + bool checkConflictInDramCache(PacketPtr pkt); + void checkConflictInCRB(reqBufferEntry* orbEntry); + bool resumeConflictingReq(reqBufferEntry* orbEntry); + void logStatsDcache(reqBufferEntry* orbEntry); + //reqBufferEntry* makeOrbEntry(reqBufferEntry* orbEntry, PacketPtr copyOwPkt); + PacketPtr getPacket(Addr addr, unsigned size, const MemCmd& cmd, Request::FlagsType flags = 0); + void dirtAdrGen(); + + unsigned countLocRdInORB(); + unsigned countFarRdInORB(); + unsigned countLocWrInORB(); + unsigned countFarWr(); + + Addr returnIndexDC(Addr pkt_addr, unsigned size); + Addr returnTagDC(Addr pkt_addr, unsigned size); + + // port management + void recvReqRetry(); + + void retryReq(); + + bool recvTimingResp(PacketPtr pkt); + + /** Packet waiting to be sent. */ + PacketPtr retryPkt; + + /** Tick when the stalled packet was meant to be sent. */ + // Tick retryPktTick; + + /** Reqs waiting for response **/ + std::unordered_map waitingResp; + + unsigned maxOutstandingReqs = 0; + + struct DCCtrlStats : public statistics::Group + { + DCCtrlStats(DCacheCtrl &ctrl); + + void regStats() override; + + DCacheCtrl &ctrl; + + // All statistics that the model needs to capture + statistics::Scalar readReqs; + statistics::Scalar writeReqs; + statistics::Scalar readBursts; + statistics::Scalar writeBursts; + statistics::Scalar servicedByWrQ; + statistics::Scalar mergedWrBursts; + statistics::Scalar neitherReadNorWriteReqs; + // Average queue lengths + statistics::Average avgRdQLen; + statistics::Average avgWrQLen; + + statistics::Scalar numRdRetry; + statistics::Scalar numWrRetry; + statistics::Vector readPktSize; + statistics::Vector writePktSize; + statistics::Vector rdQLenPdf; + statistics::Vector wrQLenPdf; + statistics::Histogram rdPerTurnAround; + statistics::Histogram wrPerTurnAround; + + statistics::Scalar bytesReadWrQ; + statistics::Scalar bytesReadSys; + statistics::Scalar bytesWrittenSys; + // Average bandwidth + statistics::Formula avgRdBWSys; + statistics::Formula avgWrBWSys; + + statistics::Scalar totGap; + statistics::Formula avgGap; + + // per-requestor bytes read and written to memory + statistics::Vector requestorReadBytes; + statistics::Vector requestorWriteBytes; + + // per-requestor bytes read and written to memory rate + statistics::Formula requestorReadRate; + statistics::Formula requestorWriteRate; + + // per-requestor read and write serviced memory accesses + statistics::Vector requestorReadAccesses; + statistics::Vector requestorWriteAccesses; + + // per-requestor read and write total memory access latency + statistics::Vector requestorReadTotalLat; + statistics::Vector requestorWriteTotalLat; + + // per-requestor raed and write average memory access latency + statistics::Formula requestorReadAvgLat; + statistics::Formula requestorWriteAvgLat; + + statistics::Average avgORBLen; + statistics::Average avgLocRdQLenStrt; + statistics::Average avgLocWrQLenStrt; + statistics::Average avgFarRdQLenStrt; + statistics::Average avgFarWrQLenStrt; + + statistics::Average avgLocRdQLenEnq; + statistics::Average avgLocWrQLenEnq; + statistics::Average avgFarRdQLenEnq; + statistics::Average avgFarWrQLenEnq; + + + statistics::Scalar numWrBacks; + statistics::Scalar totNumConf; + statistics::Scalar totNumORBFull; + statistics::Scalar totNumConfBufFull; + + statistics::Scalar maxNumConf; + statistics::Scalar maxLocRdEvQ; + statistics::Scalar maxLocRdRespEvQ; + statistics::Scalar maxLocWrEvQ; + statistics::Scalar maxFarRdEvQ; + statistics::Scalar maxFarRdRespEvQ; + statistics::Scalar maxFarWrEvQ; + + statistics::Scalar rdToWrTurnAround; + statistics::Scalar wrToRdTurnAround; + + statistics::Scalar sentRdPort; + statistics::Scalar failedRdPort; + statistics::Scalar recvdRdPort; + statistics::Scalar sentWrPort; + statistics::Scalar failedWrPort; + + statistics::Scalar totPktsServiceTime; + statistics::Scalar totPktsORBTime; + statistics::Scalar totTimeFarRdtoSend; + statistics::Scalar totTimeFarRdtoRecv; + statistics::Scalar totTimeFarWrtoSend; + statistics::Scalar totTimeInLocRead; + statistics::Scalar totTimeInLocWrite; + statistics::Scalar totTimeInFarRead; + statistics::Scalar QTLocRd; + statistics::Scalar QTLocWr; + }; + + DCCtrlStats dcstats; + + public: + + DCacheCtrl(const DCacheCtrlParams &p); + + void init() override; + + Port &getPort(const std::string &if_name, + PortID idx=InvalidPortID) override; + + // TODO: write events + bool requestEventScheduled(uint8_t pseudo_channel = 0) const override; + void restartScheduler(Tick tick, uint8_t pseudo_channel = 0) override; + bool respondEventScheduled(uint8_t pseudo_channel = 0) const override { return locMemReadRespEvent.scheduled(); } + +}; + +} // namespace memory +} // namespace gem5 + +#endif //__DCACHE_CTRL_HH__ diff --git a/src/mem/dram_interface.cc b/src/mem/dram_interface.cc index 65e06db4d3..06aedcff50 100644 --- a/src/mem/dram_interface.cc +++ b/src/mem/dram_interface.cc @@ -44,8 +44,11 @@ #include "base/cprintf.hh" #include "base/trace.hh" #include "debug/DRAM.hh" +#include "debug/DRAMT.hh" #include "debug/DRAMPower.hh" #include "debug/DRAMState.hh" +#include "debug/MemCtrl.hh" +#include "enums/Policy.hh" #include "sim/system.hh" namespace gem5 @@ -59,6 +62,7 @@ namespace memory std::pair DRAMInterface::chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const { + DPRINTF(DRAM, "in dram->chooseNextFRFCFS func\n"); std::vector earliest_banks(ranksPerChannel, 0); // Has minBankPrep been called to populate earliest_banks? @@ -93,13 +97,12 @@ DRAMInterface::chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const const Tick col_allowed_at = pkt->isRead() ? bank.rdAllowedAt : bank.wrAllowedAt; - DPRINTF(DRAM, "%s checking DRAM packet in bank %d, row %d\n", - __func__, pkt->bank, pkt->row); + DPRINTF(DRAM, "%s : %d, %d, %d, checking DRAM packet in bank %d, row %d, min: %d, cmdRd/Wr: %d, act: %d, pre: %d\n", + __func__, queue.size(), pkt->isRead(), pkt->isTagCheck, pkt->bank, pkt->row, min_col_at/1000, col_allowed_at/1000, bank.actAllowedAt/1000, bank.preAllowedAt/1000); // check if rank is not doing a refresh and thus is available, // if not, jump to the next packet if (burstReady(pkt)) { - DPRINTF(DRAM, "%s bank %d - Rank %d available\n", __func__, pkt->bank, pkt->rank); @@ -137,6 +140,7 @@ DRAMInterface::chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const std::tie(earliest_banks, hidden_bank_prep) = minBankPrep(queue, min_col_at); filled_earliest_banks = true; + DPRINTF(DRAM, "%s !filled_earliest_banks\n", __func__); } // bank is amongst first available banks @@ -154,12 +158,15 @@ DRAMInterface::chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const if (hidden_bank_prep || !found_prepped_pkt) { selected_pkt_it = i; selected_col_at = col_allowed_at; + DPRINTF(DRAM, "%s behind the scenes: %d\n", __func__, bits(earliest_banks[pkt->rank], + pkt->bank, pkt->bank)); + } } } } else { DPRINTF(DRAM, "%s bank %d - Rank %d not available\n", __func__, - pkt->bank, pkt->rank); + pkt->bank, pkt->rank); } } } @@ -171,19 +178,20 @@ DRAMInterface::chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const return std::make_pair(selected_pkt_it, selected_col_at); } -void +Tick DRAMInterface::activateBank(Rank& rank_ref, Bank& bank_ref, - Tick act_tick, uint32_t row) + Tick act_tick, uint32_t row, bool isTagCheck) { assert(rank_ref.actTicks.size() == activationLimit); // verify that we have command bandwidth to issue the activate // if not, shift to next burst window Tick act_at; - if (twoCycleActivate) + if (twoCycleActivate) { act_at = ctrl->verifyMultiCmd(act_tick, maxCommandsPerWindow, tAAD); - else + } else { act_at = ctrl->verifySingleCmd(act_tick, maxCommandsPerWindow, true); + } DPRINTF(DRAM, "Activate at tick %d\n", act_at); @@ -213,9 +221,10 @@ DRAMInterface::activateBank(Rank& rank_ref, Bank& bank_ref, // The next access has to respect tRAS for this bank bank_ref.preAllowedAt = act_at + tRAS; + // Respect the row-to-column command delay for both read and write cmds bank_ref.rdAllowedAt = std::max(act_at + tRCD_RD, bank_ref.rdAllowedAt); - bank_ref.wrAllowedAt = std::max(act_at + tRCD_WR, bank_ref.wrAllowedAt); + bank_ref.wrAllowedAt = isTagCheck ? std::max(act_at + tRCD_RD + tRTW_int, bank_ref.wrAllowedAt): std::max(act_at + tRCD_WR, bank_ref.wrAllowedAt); // start by enforcing tRRD for (int i = 0; i < banksPerRank; i++) { @@ -263,11 +272,13 @@ DRAMInterface::activateBank(Rank& rank_ref, Bank& bank_ref, DPRINTF(DRAM, "Enforcing tXAW with X = %d, next activate " "no earlier than %llu\n", activationLimit, rank_ref.actTicks.back() + tXAW); - for (int j = 0; j < banksPerRank; j++) + for (int j = 0; j < banksPerRank; j++) { // next activate must not happen before end of window rank_ref.banks[j].actAllowedAt = - std::max(rank_ref.actTicks.back() + tXAW, - rank_ref.banks[j].actAllowedAt); + std::max(rank_ref.actTicks.back() + tXAW, rank_ref.banks[j].actAllowedAt); + if (rank_ref.actTicks.back() + tXAW > rank_ref.banks[j].actAllowedAt) { + } + } } } @@ -278,6 +289,8 @@ DRAMInterface::activateBank(Rank& rank_ref, Bank& bank_ref, else if (rank_ref.activateEvent.when() > act_at) // move it sooner in time reschedule(rank_ref.activateEvent, act_at); + + return act_at; } void @@ -316,7 +329,7 @@ DRAMInterface::prechargeBank(Rank& rank_ref, Bank& bank, Tick pre_tick, assert(rank_ref.numBanksActive != 0); --rank_ref.numBanksActive; - DPRINTF(DRAM, "Precharging bank %d, rank %d at tick %lld, now got " + DPRINTF(DRAMT, "Precharging bank %d, rank %d at tick %lld, now got " "%d active\n", bank.bank, rank_ref.rank, pre_at, rank_ref.numBanksActive); @@ -350,9 +363,9 @@ DRAMInterface::doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at, DPRINTF(DRAM, "Timing access to addr %#x, rank/bank/row %d %d %d\n", mem_pkt->addr, mem_pkt->rank, mem_pkt->bank, mem_pkt->row); + Tick act_at = MaxTick; // get the rank Rank& rank_ref = *ranks[mem_pkt->rank]; - assert(rank_ref.inRefIdleState()); // are we in or transitioning to a low-power state and have not scheduled @@ -382,28 +395,43 @@ DRAMInterface::doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at, } // next we need to account for the delay in activating the page - Tick act_tick = std::max(bank_ref.actAllowedAt, curTick()); + Tick act_tick; + if (mem_pkt->isLocMem) { + if (polMan->locMemPolicy == enums::RambusTagProbOpt) { + act_tick = std::max(std::max(bank_ref.tagActAllowedAt, bank_ref.actAllowedAt), curTick()); + + if (bank_ref.tagActAllowedAt > bank_ref.actAllowedAt && bank_ref.tagActAllowedAt > curTick()) { + stats.actDelayedDueToTagAct++; + } + } else { + act_tick = std::max(bank_ref.actAllowedAt, curTick()); + } + } else { + act_tick = std::max(bank_ref.actAllowedAt, curTick()); + } // Record the activation and deal with all the global timing // constraints caused be a new activation (tRRD and tXAW) - activateBank(rank_ref, bank_ref, act_tick, mem_pkt->row); + act_at = activateBank(rank_ref, bank_ref, act_tick, mem_pkt->row, mem_pkt->isTagCheck); } // respect any constraints on the command (e.g. tRCD or tCCD) - const Tick col_allowed_at = mem_pkt->isRead() ? - bank_ref.rdAllowedAt : bank_ref.wrAllowedAt; + const Tick col_allowed_at = mem_pkt->isRead() ? bank_ref.rdAllowedAt : bank_ref.wrAllowedAt; // we need to wait until the bus is available before we can issue // the command; need to ensure minimum bus delay requirement is met Tick cmd_at = std::max({col_allowed_at, next_burst_at, curTick()}); + // verify that we have command bandwidth to issue the burst // if not, shift to next burst window Tick max_sync = clkResyncDelay + (mem_pkt->isRead() ? tRL : tWL); - if (dataClockSync && ((cmd_at - rank_ref.lastBurstTick) > max_sync)) + if (dataClockSync && ((cmd_at - rank_ref.lastBurstTick) > max_sync)) { cmd_at = ctrl->verifyMultiCmd(cmd_at, maxCommandsPerWindow, tCK); - else + } + else { cmd_at = ctrl->verifySingleCmd(cmd_at, maxCommandsPerWindow, false); + } // if we are interleaving bursts, ensure that // 1) we don't double interleave on next burst issue @@ -421,15 +449,217 @@ DRAMInterface::doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at, cmd_at = rank_ref.lastBurstTick + tBURST; } } + DPRINTF(DRAM, "Schedule RD/WR burst at tick %d\n", cmd_at); // update the packet ready time - if (mem_pkt->isRead()) { - mem_pkt->readyTime = cmd_at + tRL + tBURST; + Tick stall_delay = 0; + if(mem_pkt->isTagCheck) { + + assert(mem_pkt->isLocMem); + + // Calculating the tag check ready time + if (mem_pkt->pkt->owIsRead) { + assert((cmd_at + tRCD_FAST + tRL_FAST) > tRCD_RD); + mem_pkt->tagCheckReady = (cmd_at + tRCD_FAST + tRL_FAST) - tRCD_RD; + } else { + assert((cmd_at + tRCD_FAST + tRL_FAST) > (tRCD_RD + tRTW_int)); + mem_pkt->tagCheckReady = (cmd_at + tRCD_FAST + tRL_FAST) - (tRCD_RD + tRTW_int); + } + stats.tagResBursts++; + + // tag is sent back only for Rd Miss Cleans, for other cases tag is already known. + if (!mem_pkt->pkt->owIsRead && !mem_pkt->pkt->isHit && mem_pkt->pkt->isDirty) { + mem_pkt->tagCheckReady += tTAGBURST; + stats.tagBursts++; + } + + if (polMan->locMemPolicy == enums::RambusTagProbOpt) { + assert((mem_pkt->tagCheckReady + tRC_FAST) > (tRL_FAST + tRCD_FAST)); + bank_ref.tagActAllowedAt = (mem_pkt->tagCheckReady + tRC_FAST) - (tRL_FAST + tRCD_FAST); + } + + // Calculating the data ready time + if (mem_pkt->pkt->owIsRead) { + + mem_pkt->readyTime = cmd_at + std::max(tRL, tRL_FAST + tHM2DQ) + tBURST; + + // Rd Miss Clean + if (mem_pkt->pkt->owIsRead && !mem_pkt->pkt->isHit && !mem_pkt->pkt->isDirty) { + + if (!flushBuffer.empty()) { + + assert(!mem_pkt->pkt->hasDirtyData); + mem_pkt->pkt->hasDirtyData = true; + + assert(mem_pkt->pkt->dirtyLineAddr == -1); + mem_pkt->pkt->dirtyLineAddr = flushBuffer.front(); + + flushBuffer.pop_front(); + + stats.totReadFBByRdMC++; + + DPRINTF(DRAM, "Rd M C !FB.empty: %x\n", mem_pkt->pkt->dirtyLineAddr); + } + else { + DPRINTF(DRAM, "Rd M C FB.empty: %x\n", mem_pkt->pkt->dirtyLineAddr); + } + } + + // Rd Miss Dirty + if (mem_pkt->pkt->owIsRead && !mem_pkt->pkt->isHit && mem_pkt->pkt->isDirty) { + + assert(mem_pkt->pkt->dirtyLineAddr != -1); + + assert(!mem_pkt->pkt->hasDirtyData); + + mem_pkt->pkt->hasDirtyData = true; + + DPRINTF(DRAM, "Rd M D: %x\n", mem_pkt->addr); + } + + // stats + // Every respQueue which will generate an event, increment count + ++rank_ref.outstandingEvents; + + if (!(mem_pkt->pkt->owIsRead && !mem_pkt->pkt->isHit && !mem_pkt->pkt->isDirty && !mem_pkt->pkt->hasDirtyData)) { + stats.readBursts++; + if (row_hit) { + stats.readRowHits++; + } + stats.bytesRead += burstSize; + } + + if (!(mem_pkt->pkt->owIsRead && !mem_pkt->pkt->isHit && !mem_pkt->pkt->isDirty)) { + stats.perBankRdBursts[mem_pkt->bankId]++; + // Update latency stats + stats.totMemAccLat += mem_pkt->readyTime - mem_pkt->entryTime; + stats.totQLat += cmd_at - mem_pkt->entryTime; + stats.totBusLat += tBURST; + } else { + stats.totMemAccLat += mem_pkt->tagCheckReady - mem_pkt->entryTime; + stats.totQLat += cmd_at - mem_pkt->entryTime; + stats.totBusLat += tBURST; + stats.readMC++; + } + + // Update latency stats + // stats.totMemAccLat += mem_pkt->readyTime - mem_pkt->entryTime; + // stats.totQLat += cmd_at - mem_pkt->entryTime; + // stats.totBusLat += tBURST; + } + // Wr + else { + assert(!mem_pkt->pkt->owIsRead); + + mem_pkt->readyTime = cmd_at + tWL + tBURST; + + if (!mem_pkt->pkt->isHit && mem_pkt->pkt->isDirty) { + + DPRINTF(DRAM, "Wr M D: %x\n", mem_pkt->addr); + + Tick pushBackFBTick = cmd_at + tCCD_L; + + if (tempFlushBuffer.empty()) { + assert(!addToFlushBufferEvent.scheduled()); + schedule(addToFlushBufferEvent, pushBackFBTick); + } else { + assert(tempFlushBuffer.back().first <= pushBackFBTick); + assert(addToFlushBufferEvent.scheduled()); + } + + tempFlushBuffer.push_back(std::make_pair(pushBackFBTick, mem_pkt->pkt->dirtyLineAddr)); + + if ((tempFlushBuffer.size() + flushBuffer.size()) >= (banksPerRank * flushBufferHighThreshold) && + !readFlushBufferEvent.scheduled() && + !flushBuffer.empty()) { + + // Flush the flushBuffer and send some dirty data + // to the controller. + + assert(endOfReadFlushBuffPeriod == 0); + + assert(readFlushBufferCount == 0); + + stall_delay = tRFBD + (tempFlushBuffer.size() + flushBuffer.size()) * tBURST; + + mem_pkt->readyTime += stall_delay; + + endOfReadFlushBuffPeriod = cmd_at + tWL + stall_delay; + + schedule(readFlushBufferEvent, cmd_at + tWL + tRFBD + tBURST); + DPRINTF(DRAM, "wr M D Schd FBRdEv: %x at %d\n", mem_pkt->addr, cmd_at + tWL + tRFBD + tBURST); + + stats.totStallToFlushFB++; + + cmd_at += stall_delay; + } + + } + + // stats + if (!rank_ref.writeDoneEvent.scheduled()) { + schedule(rank_ref.writeDoneEvent, mem_pkt->readyTime); + // New event, increment count + ++rank_ref.outstandingEvents; + + } else if (rank_ref.writeDoneEvent.when() < mem_pkt->readyTime) { + reschedule(rank_ref.writeDoneEvent, mem_pkt->readyTime); + } + // will remove write from queue when returned to parent function + // decrement count for DRAM rank + --rank_ref.writeEntries; + + stats.writeBurstsTC++; + if (row_hit) { + stats.writeRowHits++; + } + stats.bytesWritten += burstSize; + stats.perBankWrBursts[mem_pkt->bankId]++; + + // Update latency stats + stats.totMemAccLatWrTC += mem_pkt->readyTime - mem_pkt->entryTime; + stats.totQLatWrTC += cmd_at - mem_pkt->entryTime; + stats.totBusLatWrTC += tBURST; + } + } else { - mem_pkt->readyTime = cmd_at + tWL + tBURST; + // assert(mem_pkt->tagCheckReady == MaxTick); + if (mem_pkt->isRead()) { + mem_pkt->readyTime = cmd_at + tRL + tBURST; + if (mem_pkt->isLocMem) { + if(polMan->locMemPolicy == enums::RambusTagProbOpt && + !mem_pkt->pkt->isHit && + mem_pkt->pkt->isDirty) { + // a probed Rd Miss Dirty + mem_pkt->pkt->hasDirtyData = true; + } + } + } else { + mem_pkt->readyTime = cmd_at + tWL + tBURST; + } } + // Tag probing B slot comes here. + // For now we only prob for read requests. + // NOTE: both tag check packets and regular packets can reach here. Thus: + // some mem_pkt may not have a ow pkt pointer, like fills! + if (mem_pkt->isLocMem) { + if (polMan->locMemPolicy == enums::RambusTagProbOpt) { + assert(mem_pkt->BSlotBusyUntil==MaxTick); + if (mem_pkt->isTagCheck) { + mem_pkt->BSlotBusyUntil = bank_ref.tagActAllowedAt - tRL_FAST + tRC_FAST; + } else { + mem_pkt->BSlotBusyUntil = std::max(act_at,bank_ref.tagActAllowedAt) + tRC_FAST; + } + } + } + + + DPRINTF(DRAMT, "curr pkt, addr: %d, isRd: %d, isTC: %d, bank %d, row %d, act: %d, RdAlw: %d, WrAlw: %d, cmd: %d, rdy: %d\n", + mem_pkt->getAddr(), mem_pkt->isRead(), mem_pkt->isTagCheck, (unsigned) mem_pkt->bank, (unsigned) mem_pkt->row, + act_at/1000, bank_ref.rdAllowedAt/1000, bank_ref.wrAllowedAt/1000, cmd_at/1000, mem_pkt->readyTime/1000); + rank_ref.lastBurstTick = cmd_at; // update the time for the next read/write burst for each @@ -447,16 +677,16 @@ DRAMInterface::doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at, // tCCD_L_WR is required for write-to-write // Need to also take bus turnaround delays into account dly_to_rd_cmd = mem_pkt->isRead() ? - tCCD_L : std::max(tCCD_L, wrToRdDlySameBG); + tCCD_L : std::max(tCCD_L, wrToRdDlySameBG); // 2 : 13 wrToRdDlySameBG(tWL + _p.tBURST_MAX + _p.tWTR_L), dly_to_wr_cmd = mem_pkt->isRead() ? - std::max(tCCD_L, rdToWrDlySameBG) : + std::max(tCCD_L, rdToWrDlySameBG) : // 20 : 2 rdToWrDlySameBG(_p.tRTW + _p.tBURST_MAX), tCCD_L_WR; } else { // tBURST is default requirement for diff BG timing // Need to also take bus turnaround delays into account - dly_to_rd_cmd = mem_pkt->isRead() ? burst_gap : + dly_to_rd_cmd = mem_pkt->isRead() ? burst_gap : // 2 : 13 tBURST + tWTR + tWL; writeToReadDelay(); - dly_to_wr_cmd = mem_pkt->isRead() ? readToWriteDelay() : + dly_to_wr_cmd = mem_pkt->isRead() ? readToWriteDelay() : // 20 : 2 tBURST + tRTW; burst_gap; } } else { @@ -482,6 +712,7 @@ DRAMInterface::doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at, bank_ref.preAllowedAt = std::max(bank_ref.preAllowedAt, mem_pkt->isRead() ? cmd_at + tRTP : mem_pkt->readyTime + tWR); + DPRINTF(DRAMT, "doBurstFunc, bank: %d, PRE: %d\n", (unsigned)bank_ref.bank, bank_ref.preAllowedAt/1000); // increment the bytes accessed and the accesses per row bank_ref.bytesAccessed += burstSize; @@ -572,50 +803,70 @@ DRAMInterface::doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at, } // Update the stats and schedule the next request - if (mem_pkt->isRead()) { - // Every respQueue which will generate an event, increment count - ++rank_ref.outstandingEvents; - - stats.readBursts++; - if (row_hit) - stats.readRowHits++; - stats.bytesRead += burstSize; - stats.perBankRdBursts[mem_pkt->bankId]++; - - // Update latency stats - stats.totMemAccLat += mem_pkt->readyTime - mem_pkt->entryTime; - stats.totQLat += cmd_at - mem_pkt->entryTime; - stats.totBusLat += tBURST; + if (mem_pkt->isTagCheck) { + // stats are already calculated } else { - // Schedule write done event to decrement event count - // after the readyTime has been reached - // Only schedule latest write event to minimize events - // required; only need to ensure that final event scheduled covers - // the time that writes are outstanding and bus is active - // to holdoff power-down entry events - if (!rank_ref.writeDoneEvent.scheduled()) { - schedule(rank_ref.writeDoneEvent, mem_pkt->readyTime); - // New event, increment count + if (mem_pkt->isRead()) { + // Every respQueue which will generate an event, increment count ++rank_ref.outstandingEvents; - } else if (rank_ref.writeDoneEvent.when() < mem_pkt->readyTime) { - reschedule(rank_ref.writeDoneEvent, mem_pkt->readyTime); - } - // will remove write from queue when returned to parent function - // decrement count for DRAM rank - --rank_ref.writeEntries; + stats.readBursts++; + if (row_hit) { + stats.readRowHits++; + } + stats.bytesRead += burstSize; + stats.perBankRdBursts[mem_pkt->bankId]++; + + // Update latency stats + stats.totMemAccLat += mem_pkt->readyTime - mem_pkt->entryTime; + stats.totQLat += cmd_at - mem_pkt->entryTime; + stats.totBusLat += tBURST; + } else { + // Schedule write done event to decrement event count + // after the readyTime has been reached + // Only schedule latest write event to minimize events + // required; only need to ensure that final event scheduled covers + // the time that writes are outstanding and bus is active + // to holdoff power-down entry events + if (!rank_ref.writeDoneEvent.scheduled()) { + schedule(rank_ref.writeDoneEvent, mem_pkt->readyTime); + // New event, increment count + ++rank_ref.outstandingEvents; + + } else if (rank_ref.writeDoneEvent.when() < mem_pkt->readyTime) { + reschedule(rank_ref.writeDoneEvent, mem_pkt->readyTime); + } + // will remove write from queue when returned to parent function + // decrement count for DRAM rank + --rank_ref.writeEntries; + + stats.writeBursts++; + if (row_hit) { + stats.writeRowHits++; + } + stats.bytesWritten += burstSize; + stats.perBankWrBursts[mem_pkt->bankId]++; - stats.writeBursts++; - if (row_hit) - stats.writeRowHits++; - stats.bytesWritten += burstSize; - stats.perBankWrBursts[mem_pkt->bankId]++; + // Update latency stats + stats.totMemAccLatWr += mem_pkt->readyTime - mem_pkt->entryTime; + stats.totQLatWr += cmd_at - mem_pkt->entryTime; + stats.totBusLatWr += tBURST; + } } // Update bus state to reflect when previous command was issued return std::make_pair(cmd_at, cmd_at + burst_gap); } +void +DRAMInterface::updateTagActAllowed(unsigned rankNumber, unsigned bankNumber, Tick BSlotTagBankBusyAt) +{ + assert(BSlotTagBankBusyAt!=MaxTick); + ranks[rankNumber]->banks[bankNumber].tagActAllowedAt = BSlotTagBankBusyAt; + DPRINTF(DRAM, "updateTagFunc tagActAllowedAt change, rank/bank %d/%d -- tagActAllowedAt: %d\n", + rankNumber, bankNumber, BSlotTagBankBusyAt); +} + void DRAMInterface::addRankToRankDelay(Tick cmd_at) { @@ -646,24 +897,36 @@ DRAMInterface::DRAMInterface(const DRAMInterfaceParams &_p) tRFC(_p.tRFC), tREFI(_p.tREFI), tRRD(_p.tRRD), tRRD_L(_p.tRRD_L), tPPD(_p.tPPD), tAAD(_p.tAAD), tXAW(_p.tXAW), tXP(_p.tXP), tXS(_p.tXS), + tTAGBURST(_p.tTAGBURST), tRL_FAST(_p. tRL_FAST), tHM2DQ(_p.tHM2DQ), + tRTW_int(_p.tRTW_int), tRFBD(_p.tRFBD), tRCD_FAST(_p.tRCD_FAST), + tRC_FAST(_p.tRC_FAST), + flushBufferHighThreshold(_p.flushBuffer_high_thresh_perc / 100.0), clkResyncDelay(_p.tBURST_MAX), dataClockSync(_p.data_clock_sync), burstInterleave(tBURST != tBURST_MIN), twoCycleActivate(_p.two_cycle_activate), activationLimit(_p.activation_limit), - wrToRdDlySameBG(tWL + _p.tBURST_MAX + _p.tWTR_L), + wrToRdDlySameBG(tWL + _p.tBURST_MAX + _p.tWTR), rdToWrDlySameBG(_p.tRTW + _p.tBURST_MAX), + maxFBLen(0), pageMgmt(_p.page_policy), maxAccessesPerRow(_p.max_accesses_per_row), timeStampOffset(0), activeRank(0), enableDRAMPowerdown(_p.enable_dram_powerdown), lastStatsResetTick(0), - stats(*this) + // polMan(_p.pol_man), + stats(*this), + readFlushBufferEvent([this] {processReadFlushBufferEvent();}, name()), + addToFlushBufferEvent([this] {processAddToFlushBufferEvent();}, name()), + endOfReadFlushBuffPeriod(0), + readFlushBufferCount(0), + enableReadFlushBuffer(_p.enable_read_flush_buffer), + isAlloy(_p.is_alloy) { DPRINTF(DRAM, "Setting up DRAM Interface\n"); - fatal_if(!isPowerOf2(burstSize), "DRAM burst size %d is not allowed, " - "must be a power of two\n", burstSize); + // fatal_if(!isPowerOf2(burstSize), "DRAM burst size %d is not allowed, " + // "must be a power of two\n", burstSize); // sanity check the ranks since we rely on bit slicing for the // address decoding @@ -717,17 +980,19 @@ DRAMInterface::DRAMInterface(const DRAMInterfaceParams &_p) banksPerRank, bankGroupsPerRank); } // tCCD_L should be greater than minimal, back-to-back burst delay - if (tCCD_L <= tBURST) { + if (tCCD_L < tBURST) { fatal("tCCD_L (%d) should be larger than the minimum bus delay " "(%d) when bank groups per rank (%d) is greater than 1\n", tCCD_L, tBURST, bankGroupsPerRank); } + // tCCD_L_WR should be greater than minimal, back-to-back burst delay - if (tCCD_L_WR <= tBURST) { + if (tCCD_L_WR < tBURST) { fatal("tCCD_L_WR (%d) should be larger than the minimum bus delay " " (%d) when bank groups per rank (%d) is greater than 1\n", tCCD_L_WR, tBURST, bankGroupsPerRank); } + // tRRD_L is greater than minimal, same bank group ACT-to-ACT delay // some datasheets might specify it equal to tRRD if (tRRD_L < tRRD) { @@ -844,8 +1109,14 @@ DRAMInterface::decodePacket(const PacketPtr pkt, Addr pkt_addr, // always the top bits, and check before creating the packet uint64_t row; + Addr mappedAddr = pkt_addr; + + if (isAlloy) { + mappedAddr = ((pkt_addr / 64) * 8) + pkt_addr; + } + // Get packed address, starting at 0 - Addr addr = getCtrlAddr(pkt_addr); + Addr addr = getCtrlAddr(mappedAddr); // truncate the address to a memory burst, which makes it unique to // a specific buffer, row, bank, rank and channel @@ -905,8 +1176,8 @@ DRAMInterface::decodePacket(const PacketPtr pkt, Addr pkt_addr, assert(row < rowsPerBank); assert(row < Bank::NO_ROW); - DPRINTF(DRAM, "Address: %#x Rank %d Bank %d Row %d\n", - pkt_addr, rank, bank, row); + // DPRINTF(DRAM, "Address: %#x Rank %d Bank %d Row %d\n", + // pkt_addr, rank, bank, row); // create the corresponding memory packet with the entry time and // ready time set to the current tick, the latter will be updated @@ -927,6 +1198,113 @@ void DRAMInterface::setupRank(const uint8_t rank, const bool is_read) } } +void +DRAMInterface::setPolicyManager(AbstractMemory* _polMan) +{ + polMan = _polMan; +} + +void +DRAMInterface::processReadFlushBufferEvent() +{ + // It is possible that a ReadFlushBufferEvent is scheduled and + // before reaching to the scheduled time or concurrent with that, + // Read Miss Cleans also pop packet from flushBuffer. + // So, it should return for the scheduled event. + if (flushBuffer.empty()) { + if (readFlushBufferCount > 0) { + stats.avgReadFBPerEvent = readFlushBufferCount; + } + endOfReadFlushBuffPeriod = 0; + readFlushBufferCount = 0; + return; + } + + assert(endOfReadFlushBuffPeriod >= curTick()); + assert(flushBuffer.front() != -1); + + + if (polMan->recvReadFlushBuffer(flushBuffer.front())) { + DPRINTF(DRAM, "sent rd FB: %d\n", flushBuffer.front()); + readFlushBufferCount++; + flushBuffer.pop_front(); + stats.totReadFBSent++; + stats.readBursts++; + stats.bytesRead += burstSize; + stats.totBusLat += tBURST; + + Tick nextBurstFB = curTick() + tBURST; + + if (nextBurstFB <= endOfReadFlushBuffPeriod && !flushBuffer.empty()) { + schedule(readFlushBufferEvent, nextBurstFB); + return; + } else { + // Either the time is beyond the tRFC or flushBuffer is empty. + // Reset control params + stats.avgReadFBPerEvent = readFlushBufferCount; + endOfReadFlushBuffPeriod = 0; + readFlushBufferCount = 0; + return; + } + } else { + // Policy manager has no empty entry available in its write back buffer. + stats.totReadFBFailed++; + // End of readFlushBuffer round. + // Reset control params + stats.avgReadFBPerEvent = readFlushBufferCount; + endOfReadFlushBuffPeriod = 0; + readFlushBufferCount = 0; + return; + } +} + +void +DRAMInterface::processAddToFlushBufferEvent() +{ + assert(!tempFlushBuffer.empty()); + assert(tempFlushBuffer.front().first == curTick()); + + flushBuffer.push_back(tempFlushBuffer.front().second); + + DPRINTF(DRAM, "Wr M D to FB: %x\n", tempFlushBuffer.front().second); + + tempFlushBuffer.pop_front(); + + stats.totPktsPushedFB++; + + stats.avgFBLenEnq = flushBuffer.size(); + + if (flushBuffer.size() > maxFBLen) { + maxFBLen = flushBuffer.size(); + stats.maxFBLenEnq = flushBuffer.size(); + } + + if (!tempFlushBuffer.empty()) { + assert(tempFlushBuffer.front().first >= curTick()); + assert(!addToFlushBufferEvent.scheduled()); + schedule(addToFlushBufferEvent, tempFlushBuffer.front().first); + } + +} + +bool +DRAMInterface::checkFwdMrgeInFB(Addr addr) +{ + for (int i=0; i < flushBuffer.size(); i++) { + if (flushBuffer.at(i) == addr) { + return true; + } + } + + for (int i=0; i < tempFlushBuffer.size(); i++) { + if (tempFlushBuffer.at(i).second == addr) { + return true; + } + } + + return false; +} + void DRAMInterface::respondEvent(uint8_t rank) { @@ -1024,6 +1402,8 @@ DRAMInterface::suspend() } } +typedef std::pair got_waiting_pair; + std::pair, bool> DRAMInterface::minBankPrep(const MemPacketQueue& queue, Tick min_col_at) const @@ -1040,12 +1420,15 @@ DRAMInterface::minBankPrep(const MemPacketQueue& queue, // determine if we have queued transactions targetting the // bank in question - std::vector got_waiting(ranksPerChannel * banksPerRank, false); + std::vector got_waiting(ranksPerChannel * banksPerRank); for (const auto& p : queue) { - if (p->pseudoChannel != pseudoChannel) + if (p->pseudoChannel != pseudoChannel) { continue; - if (p->isDram() && ranks[p->rank]->inRefIdleState()) - got_waiting[p->bankId] = true; + } + if (p->isDram() && ranks[p->rank]->inRefIdleState()) { + got_waiting[p->bankId].first = true; + got_waiting[p->bankId].second = p->isTagCheck? true : false; + } } // Find command with optimal bank timing @@ -1056,7 +1439,7 @@ DRAMInterface::minBankPrep(const MemPacketQueue& queue, // if we have waiting requests for the bank, and it is // amongst the first available, update the mask - if (got_waiting[bank_id]) { + if (got_waiting[bank_id].first) { // make sure this rank is not currently refreshing. assert(ranks[i]->inRefIdleState()); // simplistic approximation of when the bank can issue @@ -1068,14 +1451,15 @@ DRAMInterface::minBankPrep(const MemPacketQueue& queue, // latest Tick for which ACT can occur without // incurring additoinal delay on the data bus - const Tick tRCD = ctrl->inReadBusState(false, this) ? - tRCD_RD : tRCD_WR; + const Tick tRCD = ctrl->inReadBusState(false, + (MemInterface*)(this)) ? tRCD_RD : + (got_waiting[bank_id].second ? tRCD_RD+tRTW_int : tRCD_WR); const Tick hidden_act_max = std::max(min_col_at - tRCD, curTick()); // When is the earliest the R/W burst can issue? const Tick col_allowed_at = ctrl->inReadBusState(false, - this) ? + (MemInterface*)(this)) ? ranks[i]->banks[j].rdAllowedAt : ranks[i]->banks[j].wrAllowedAt; Tick col_at = std::max(col_allowed_at, act_at + tRCD); @@ -1314,6 +1698,7 @@ DRAMInterface::Rank::processRefreshEvent() DPRINTF(DRAM, "Refresh awaiting draining\n"); return; } else { + DPRINTF(DRAM, "ELSE of Refresh awaiting draining\n"); refreshState = REF_PD_EXIT; } } @@ -1328,12 +1713,14 @@ DRAMInterface::Rank::processRefreshEvent() scheduleWakeUpEvent(dram.tXP); return; } else { + DPRINTF(DRAM, "ELSE of Wake Up for refresh\n"); refreshState = REF_PRE; } } // at this point, ensure that all banks are precharged if (refreshState == REF_PRE) { + DPRINTF(DRAM, "REF_PRE\n"); // precharge any active bank if (numBanksActive != 0) { // at the moment, we use a precharge all even if there is @@ -1378,6 +1765,7 @@ DRAMInterface::Rank::processRefreshEvent() // we are already idle schedulePowerEvent(PWR_REF, curTick()); } else { + DPRINTF(DRAM, "banks state is closed but... %d %d\n", prechargeEvent.scheduled(), dram.ctrl->respondEventScheduled()); // banks state is closed but haven't transitioned pwrState to IDLE // or have outstanding ACT,RD/WR,Auto-PRE sequence scheduled // should have outstanding precharge or read response event @@ -1400,6 +1788,20 @@ DRAMInterface::Rank::processRefreshEvent() // last but not least we perform the actual refresh if (refreshState == REF_START) { + dram.stats.totNumberRefreshEvent++; + if (dram.enableReadFlushBuffer && !dram.readFlushBufferEvent.scheduled()) { + // Time to be proactive and send some dirty data + // from flushBuffer to the controller. + assert(dram.endOfReadFlushBuffPeriod == 0); + assert(dram.readFlushBufferCount == 0); + + if (!dram.flushBuffer.empty()) { + dram.endOfReadFlushBuffPeriod = curTick() + dram.tRFC; + schedule(dram.readFlushBufferEvent, curTick() + dram.tRFBD + dram.tBURST); + dram.stats.refSchdRFB++; + } + } + // should never get here with any banks active assert(numBanksActive == 0); assert(pwrState == PWR_REF); @@ -1848,31 +2250,99 @@ DRAMInterface::DRAMStats::DRAMStats(DRAMInterface &_dram) ADD_STAT(readBursts, statistics::units::Count::get(), "Number of DRAM read bursts"), + ADD_STAT(readMC, statistics::units::Count::get(), + "Number of DRAM cache read miss cleans"), ADD_STAT(writeBursts, statistics::units::Count::get(), "Number of DRAM write bursts"), - + ADD_STAT(writeBurstsTC, statistics::units::Count::get(), + "Number of DRAM write bursts for tag check"), + ADD_STAT(tagResBursts, statistics::units::Count::get(), + "Number of tag bursts returned by write miss dirties"), + ADD_STAT(tagBursts, statistics::units::Count::get(), + "Number of tag check bursts"), + + ADD_STAT(avgFBLenEnq, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average flush buffer length when enqueuing"), + ADD_STAT(avgReadFBPerEvent, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average number of reads from flush buffer per event"), + ADD_STAT(totNumberRefreshEvent, statistics::units::Count::get(), + "Total number of refresh events"), + ADD_STAT(totReadFBSent, statistics::units::Count::get(), + "Total number of reads from flush buffer per event"), + ADD_STAT(totReadFBFailed, statistics::units::Count::get(), + "Total number of reads from flush buffer failed to be received by policy manager (write back buffer full)"), + ADD_STAT(totReadFBByRdMC, statistics::units::Count::get(), + "Total number of reads from flush buffer during Read Miss Clean"), + ADD_STAT(totStallToFlushFB, statistics::units::Count::get(), + "Total number of reads from flush buffer during Read Miss Clean"), + ADD_STAT(totPktsPushedFB, statistics::units::Count::get(), + "Total number of packets pushed into flush buffer"), + ADD_STAT(maxFBLenEnq, statistics::units::Count::get(), + "Maximum flush buffer length when enqueuing"), + ADD_STAT(refSchdRFB, statistics::units::Count::get(), + "Maximum flush buffer length when enqueuing"), + ADD_STAT( actDelayedDueToTagAct, statistics::units::Count::get(), + " "), ADD_STAT(perBankRdBursts, statistics::units::Count::get(), "Per bank write bursts"), ADD_STAT(perBankWrBursts, statistics::units::Count::get(), "Per bank write bursts"), ADD_STAT(totQLat, statistics::units::Tick::get(), - "Total ticks spent queuing"), + "Total ticks spent queuing for reads"), ADD_STAT(totBusLat, statistics::units::Tick::get(), - "Total ticks spent in databus transfers"), + "Total ticks spent in databus transfers for reads"), ADD_STAT(totMemAccLat, statistics::units::Tick::get(), "Total ticks spent from burst creation until serviced " - "by the DRAM"), + "by the DRAM for reads"), + + ADD_STAT(totQLatWr, statistics::units::Tick::get(), + "Total ticks spent queuing for writes"), + ADD_STAT(totBusLatWr, statistics::units::Tick::get(), + "Total ticks spent in databus transfers for writes"), + ADD_STAT(totMemAccLatWr, statistics::units::Tick::get(), + "Total ticks spent from burst creation until serviced " + "by the DRAM for writes"), + + ADD_STAT(totQLatWrTC, statistics::units::Tick::get(), + "Total ticks spent queuing for writes tag check"), + ADD_STAT(totBusLatWrTC, statistics::units::Tick::get(), + "Total ticks spent in databus transfers for writes tag check"), + ADD_STAT(totMemAccLatWrTC, statistics::units::Tick::get(), + "Total ticks spent from burst creation until serviced " + "by the DRAM for writes tag check"), ADD_STAT(avgQLat, statistics::units::Rate< statistics::units::Tick, statistics::units::Count>::get(), - "Average queueing delay per DRAM burst"), + "Average queueing delay per DRAM burst for reads"), ADD_STAT(avgBusLat, statistics::units::Rate< statistics::units::Tick, statistics::units::Count>::get(), - "Average bus latency per DRAM burst"), + "Average bus latency per DRAM burst for reads"), ADD_STAT(avgMemAccLat, statistics::units::Rate< statistics::units::Tick, statistics::units::Count>::get(), - "Average memory access latency per DRAM burst"), + "Average memory access latency per DRAM burst for reads"), + + ADD_STAT(avgQLatWr, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), + "Average queueing delay per DRAM burst for writes"), + ADD_STAT(avgBusLatWr, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), + "Average bus latency per DRAM burst for writes"), + ADD_STAT(avgMemAccLatWr, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), + "Average memory access latency per DRAM burst for writes"), + + ADD_STAT(avgQLatWrTC, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), + "Average queueing delay per DRAM burst for writes tag check"), + ADD_STAT(avgBusLatWrTC, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), + "Average bus latency per DRAM burst for writes tag check"), + ADD_STAT(avgMemAccLatWrTC, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), + "Average memory access latency per DRAM burst for writes tag check"), ADD_STAT(readRowHits, statistics::units::Count::get(), "Number of row buffer hits during reads"), @@ -1908,8 +2378,10 @@ DRAMInterface::DRAMStats::DRAMStats(DRAMInterface &_dram) "Data bus utilization in percentage for writes"), ADD_STAT(pageHitRate, statistics::units::Ratio::get(), - "Row buffer hit rate, read and write combined") + "Row buffer hit rate, read and write combined"), + ADD_STAT(hitMissBusUtil, statistics::units::Ratio::get(), + "Hit/Miss bus utilization") { } @@ -1922,6 +2394,16 @@ DRAMInterface::DRAMStats::regStats() avgBusLat.precision(2); avgMemAccLat.precision(2); + avgQLatWr.precision(2); + avgBusLatWr.precision(2); + avgMemAccLatWr.precision(2); + + avgQLatWrTC.precision(2); + avgBusLatWrTC.precision(2); + avgMemAccLatWrTC.precision(2); + + avgFBLenEnq.precision(2); + readRowHitRate.precision(2); writeRowHitRate.precision(2); @@ -1940,13 +2422,23 @@ DRAMInterface::DRAMStats::regStats() pageHitRate.precision(2); + hitMissBusUtil.precision(2); + // Formula stats - avgQLat = totQLat / readBursts; - avgBusLat = totBusLat / readBursts; - avgMemAccLat = totMemAccLat / readBursts; + avgQLat = totQLat / (readBursts+readMC); + avgBusLat = totBusLat / (readBursts+readMC); + avgMemAccLat = totMemAccLat / (readBursts+readMC); + + avgQLatWr = totQLatWr / writeBursts; + avgBusLatWr = totBusLatWr / writeBursts; + avgMemAccLatWr = totMemAccLatWr / writeBursts; + + avgQLatWrTC = totQLatWrTC / writeBurstsTC; + avgBusLatWrTC = totBusLatWrTC / writeBurstsTC; + avgMemAccLatWrTC = totMemAccLatWrTC / writeBurstsTC; readRowHitRate = (readRowHits / readBursts) * 100; - writeRowHitRate = (writeRowHits / writeBursts) * 100; + writeRowHitRate = (writeRowHits / (writeBursts+writeBurstsTC)) * 100; avgRdBW = (bytesRead / 1000000) / simSeconds; avgWrBW = (bytesWritten / 1000000) / simSeconds; @@ -1958,7 +2450,9 @@ DRAMInterface::DRAMStats::regStats() busUtilWrite = avgWrBW / peakBW * 100; pageHitRate = (writeRowHits + readRowHits) / - (writeBursts + readBursts) * 100; + (writeBursts + writeBurstsTC + readBursts) * 100; + + hitMissBusUtil = (((tagResBursts * dram.tCK) + (tagBursts * dram.tTAGBURST)) * 0.000000000001) / simSeconds * 100; } DRAMInterface::RankStats::RankStats(DRAMInterface &_dram, Rank &_rank) diff --git a/src/mem/dram_interface.hh b/src/mem/dram_interface.hh index e20e33faf9..f98f4d511b 100644 --- a/src/mem/dram_interface.hh +++ b/src/mem/dram_interface.hh @@ -48,7 +48,7 @@ #include "mem/drampower.hh" #include "mem/mem_interface.hh" -#include "params/DRAMInterface.hh" +// #include "mem/policy_manager.hh" namespace gem5 { @@ -519,6 +519,14 @@ class DRAMInterface : public MemInterface const Tick tXAW; const Tick tXP; const Tick tXS; + const Tick tTAGBURST; + const Tick tRL_FAST; + const Tick tHM2DQ; + const Tick tRTW_int; + const Tick tRFBD; + const Tick tRCD_FAST; + const Tick tRC_FAST; + float flushBufferHighThreshold; const Tick clkResyncDelay; const bool dataClockSync; const bool burstInterleave; @@ -527,6 +535,8 @@ class DRAMInterface : public MemInterface const Tick wrToRdDlySameBG; const Tick rdToWrDlySameBG; + unsigned maxFBLen; + enums::PageManage pageMgmt; /** @@ -558,8 +568,8 @@ class DRAMInterface : public MemInterface * @param act_tick Time when the activation takes place * @param row Index of the row */ - void activateBank(Rank& rank_ref, Bank& bank_ref, Tick act_tick, - uint32_t row); + Tick activateBank(Rank& rank_ref, Bank& bank_ref, Tick act_tick, + uint32_t row, bool isTagCheck); /** * Precharge a given bank and also update when the precharge is @@ -587,7 +597,23 @@ class DRAMInterface : public MemInterface /** total number of DRAM bursts serviced */ statistics::Scalar readBursts; + statistics::Scalar readMC; statistics::Scalar writeBursts; + statistics::Scalar writeBurstsTC; + statistics::Scalar tagResBursts; + statistics::Scalar tagBursts; + + statistics::Average avgFBLenEnq; + statistics::Average avgReadFBPerEvent; + statistics::Scalar totNumberRefreshEvent; + statistics::Scalar totReadFBSent; + statistics::Scalar totReadFBFailed; + statistics::Scalar totReadFBByRdMC; + statistics::Scalar totStallToFlushFB; + statistics::Scalar totPktsPushedFB; + statistics::Scalar maxFBLenEnq; + statistics::Scalar refSchdRFB; + statistics::Scalar actDelayedDueToTagAct; /** DRAM per bank stats */ statistics::Vector perBankRdBursts; @@ -598,11 +624,27 @@ class DRAMInterface : public MemInterface statistics::Scalar totBusLat; statistics::Scalar totMemAccLat; + statistics::Scalar totQLatWr; + statistics::Scalar totBusLatWr; + statistics::Scalar totMemAccLatWr; + + statistics::Scalar totQLatWrTC; + statistics::Scalar totBusLatWrTC; + statistics::Scalar totMemAccLatWrTC; + // Average latencies per request statistics::Formula avgQLat; statistics::Formula avgBusLat; statistics::Formula avgMemAccLat; + statistics::Formula avgQLatWr; + statistics::Formula avgBusLatWr; + statistics::Formula avgMemAccLatWr; + + statistics::Formula avgQLatWrTC; + statistics::Formula avgBusLatWrTC; + statistics::Formula avgMemAccLatWrTC; + // Row hit count and rate statistics::Scalar readRowHits; statistics::Scalar writeRowHits; @@ -622,6 +664,7 @@ class DRAMInterface : public MemInterface statistics::Formula busUtilRead; statistics::Formula busUtilWrite; statistics::Formula pageHitRate; + statistics::Formula hitMissBusUtil; }; DRAMStats stats; @@ -659,6 +702,36 @@ class DRAMInterface : public MemInterface } public: + + //AbstractMemory* polMan; + + Tick get_tRP() override { return tRP;} + Tick get_tRCD_RD() override { return tRCD_RD;} + Tick get_tRL() override { return tRL;} + + // void setPolicyManager(PolicyManager* _polMan) override; + void setPolicyManager(AbstractMemory* _polMan) override; + + + void processReadFlushBufferEvent(); + EventFunctionWrapper readFlushBufferEvent; + + void processAddToFlushBufferEvent(); + EventFunctionWrapper addToFlushBufferEvent; + + Tick endOfReadFlushBuffPeriod; + unsigned readFlushBufferCount; + bool enableReadFlushBuffer; + bool isAlloy; + + bool checkFwdMrgeInFB(Addr addr) override; + + std::deque flushBuffer; + + typedef std::pair tempFBEntry; + + std::deque tempFlushBuffer; + /** * Initialize the DRAM interface and verify parameters */ @@ -800,6 +873,17 @@ class DRAMInterface : public MemInterface void chooseRead(MemPacketQueue& queue) override { } bool writeRespQueueFull() const override { return false;} + Tick nextTagActAvailability(unsigned rankNumber, unsigned bankNumber) override + { return ranks[rankNumber]->banks[bankNumber].tagActAllowedAt; } + + Tick getTRCFAST() override { return tRC_FAST;} + + Tick getTRLFAST() override { return tRL_FAST;} + + Tick getTRCDFAST() override { return tRCD_FAST;} + + void updateTagActAllowed(unsigned rankNumber, unsigned bankNumber, Tick BSlotTagBankBusyAt) override; + DRAMInterface(const DRAMInterfaceParams &_p); }; diff --git a/src/mem/hbm_ctrl.cc b/src/mem/hbm_ctrl.cc index f87fa2dcbb..ccecdefb4a 100644 --- a/src/mem/hbm_ctrl.cc +++ b/src/mem/hbm_ctrl.cc @@ -47,7 +47,7 @@ HBMCtrl::HBMCtrl(const HBMCtrlParams &p) : MemCtrl(p), retryRdReqPC1(false), retryWrReqPC1(false), nextReqEventPC1([this] {processNextReqEvent(pc1Int, respQueuePC1, - respondEventPC1, nextReqEventPC1, retryWrReqPC1);}, + respondEventPC1, nextReqEventPC1, retryWrReqPC1, retryRdReqPC1);}, name()), respondEventPC1([this] {processRespondEvent(pc1Int, respQueuePC1, respondEventPC1, retryRdReqPC1); }, name()), @@ -207,8 +207,8 @@ bool HBMCtrl::recvTimingReq(PacketPtr pkt) { // This is where we enter from the outside world - DPRINTF(MemCtrl, "recvTimingReq: request %s addr %#x size %d\n", - pkt->cmdString(), pkt->getAddr(), pkt->getSize()); + DPRINTF(MemCtrl, "recvTimingReq: request %s addr %#x size %d isTagCheck: %d\n", + pkt->cmdString(), pkt->getAddr(), pkt->getSize(), pkt->isTagCheck); panic_if(pkt->cacheResponding(), "Should not see packets where cache " "is responding"); diff --git a/src/mem/mem_ctrl.cc b/src/mem/mem_ctrl.cc index 97c7741abd..8cbd203246 100644 --- a/src/mem/mem_ctrl.cc +++ b/src/mem/mem_ctrl.cc @@ -42,6 +42,7 @@ #include "base/trace.hh" #include "debug/DRAM.hh" +#include "debug/DRAMT.hh" #include "debug/Drain.hh" #include "debug/MemCtrl.hh" #include "debug/NVM.hh" @@ -62,7 +63,7 @@ MemCtrl::MemCtrl(const MemCtrlParams &p) : port(name() + ".port", *this), isTimingMode(false), retryRdReq(false), retryWrReq(false), nextReqEvent([this] {processNextReqEvent(dram, respQueue, - respondEvent, nextReqEvent, retryWrReq);}, name()), + respondEvent, nextReqEvent, retryWrReq, retryRdReq);}, name()), respondEvent([this] {processRespondEvent(dram, respQueue, respondEvent, retryRdReq); }, name()), dram(p.dram), @@ -70,12 +71,17 @@ MemCtrl::MemCtrl(const MemCtrlParams &p) : writeBufferSize(dram->writeBufferSize), writeHighThreshold(writeBufferSize * p.write_high_thresh_perc / 100.0), writeLowThreshold(writeBufferSize * p.write_low_thresh_perc / 100.0), + oldestWriteAgeThreshold(p.oldest_write_age_threshold), + oldestWriteAge(0), minWritesPerSwitch(p.min_writes_per_switch), minReadsPerSwitch(p.min_reads_per_switch), memSchedPolicy(p.mem_sched_policy), frontendLatency(p.static_frontend_latency), backendLatency(p.static_backend_latency), + frontendLatencyTC(p.static_frontend_latency_tc), + backendLatencyTC(p.static_backend_latency_tc), commandWindow(p.command_window), + considerOldestWrite(p.consider_oldest_write), prevArrival(0), stats(*this) { @@ -91,9 +97,6 @@ MemCtrl::MemCtrl(const MemCtrlParams &p) : fatal("Write buffer low threshold %d must be smaller than the " "high threshold %d\n", p.write_low_thresh_perc, p.write_high_thresh_perc); - if (p.disable_sanity_check) { - port.disableSanityCheck(); - } } void @@ -135,7 +138,7 @@ MemCtrl::recvAtomic(PacketPtr pkt) Tick MemCtrl::recvAtomicLogic(PacketPtr pkt, MemInterface* mem_intr) { - DPRINTF(MemCtrl, "recvAtomic: %s 0x%x\n", + DPRINTF(MemCtrl, "recvAtomic: %s %x\n", pkt->cmdString(), pkt->getAddr()); panic_if(pkt->cacheResponding(), "Should not see packets where cache " @@ -221,23 +224,25 @@ MemCtrl::addToReadQueue(PacketPtr pkt, Addr burst_addr = burstAlign(addr, mem_intr); // if the burst address is not present then there is no need // looking any further - if (isInWriteQueue.find(burst_addr) != isInWriteQueue.end()) { - for (const auto& vec : writeQueue) { - for (const auto& p : vec) { - // check if the read is subsumed in the write queue - // packet we are looking at - if (p->addr <= addr && - ((addr + size) <= (p->addr + p->size))) { - - foundInWrQ = true; - stats.servicedByWrQ++; - pktsServicedByWrQ++; - DPRINTF(MemCtrl, - "Read to addr %#x with size %d serviced by " - "write queue\n", - addr, size); - stats.bytesReadWrQ += burst_size; - break; + if (!pkt->isTagCheck) { + if (isInWriteQueue.find(burst_addr) != isInWriteQueue.end()) { + for (const auto& vec : writeQueue) { + for (const auto& p : vec) { + // check if the read is subsumed in the write queue + // packet we are looking at + if (p->addr <= addr && + ((addr + size) <= (p->addr + p->size))) { + + foundInWrQ = true; + stats.servicedByWrQ++; + pktsServicedByWrQ++; + DPRINTF(MemCtrl, + "Read to addr %x with size %d serviced by " + "write queue\n", + addr, size); + stats.bytesReadWrQ += burst_size; + break; + } } } } @@ -249,7 +254,7 @@ MemCtrl::addToReadQueue(PacketPtr pkt, // Make the burst helper for split packets if (pkt_count > 1 && burst_helper == NULL) { - DPRINTF(MemCtrl, "Read to addr %#x translates to %d " + DPRINTF(MemCtrl, "Read to addr %x translates to %d " "memory requests\n", pkt->getAddr(), pkt_count); burst_helper = new BurstHelper(pkt_count); } @@ -257,6 +262,8 @@ MemCtrl::addToReadQueue(PacketPtr pkt, MemPacket* mem_pkt; mem_pkt = mem_intr->decodePacket(pkt, addr, size, true, mem_intr->pseudoChannel); + mem_pkt->isTagCheck = pkt->isTagCheck; + mem_pkt->isLocMem = pkt->isLocMem; // Increment read entries of the rank (dram) // Increment count to trigger issue of non-deterministic read (nvm) @@ -273,9 +280,13 @@ MemCtrl::addToReadQueue(PacketPtr pkt, readQueue[mem_pkt->qosValue()].push_back(mem_pkt); // log packet + DPRINTF(MemCtrl, "logRequest rd: %d %d %x\n", + pkt->requestorId(), + pkt->qosValue(), mem_pkt->addr); + logRequest(MemCtrl::READ, pkt->requestorId(), pkt->qosValue(), mem_pkt->addr, 1); - + mem_intr->readQueueSize++; // Update stats @@ -287,7 +298,7 @@ MemCtrl::addToReadQueue(PacketPtr pkt, } // If all packets are serviced by write queue, we send the repsonse back - if (pktsServicedByWrQ == pkt_count) { + if (pktsServicedByWrQ == pkt_count && !pkt->isTagCheck) { accessAndRespond(pkt, frontendLatency, mem_intr); return true; } @@ -323,8 +334,8 @@ MemCtrl::addToWriteQueue(PacketPtr pkt, unsigned int pkt_count, // see if we can merge with an existing item in the write // queue and keep track of whether we have merged or not - bool merged = isInWriteQueue.find(burstAlign(addr, mem_intr)) != - isInWriteQueue.end(); + bool merged = (isInWriteQueue.find(burstAlign(addr, mem_intr)) != + isInWriteQueue.end()) && !pkt->isTagCheck; // if the item was not merged we need to create a new write // and enqueue it @@ -332,6 +343,9 @@ MemCtrl::addToWriteQueue(PacketPtr pkt, unsigned int pkt_count, MemPacket* mem_pkt; mem_pkt = mem_intr->decodePacket(pkt, addr, size, false, mem_intr->pseudoChannel); + mem_pkt->isTagCheck = pkt->isTagCheck; + mem_pkt->isLocMem = pkt->isLocMem; + // Default readyTime to Max if nvm interface; //will be reset once read is issued mem_pkt->readyTime = MaxTick; @@ -345,14 +359,18 @@ MemCtrl::addToWriteQueue(PacketPtr pkt, unsigned int pkt_count, writeQueue[mem_pkt->qosValue()].push_back(mem_pkt); isInWriteQueue.insert(burstAlign(addr, mem_intr)); - + // log packet + DPRINTF(MemCtrl, "logRequest wr: %d %d %x\n", + pkt->requestorId(), + pkt->qosValue(), mem_pkt->addr); + logRequest(MemCtrl::WRITE, pkt->requestorId(), pkt->qosValue(), mem_pkt->addr, 1); - + mem_intr->writeQueueSize++; - assert(totalWriteQueueSize == isInWriteQueue.size()); + //assert(totalWriteQueueSize == isInWriteQueue.size()); // Update stats stats.avgWrQLen = totalWriteQueueSize; @@ -375,7 +393,9 @@ MemCtrl::addToWriteQueue(PacketPtr pkt, unsigned int pkt_count, // snoop the write queue for any upcoming reads // @todo, if a pkt size is larger than burst size, we might need a // different front end latency - accessAndRespond(pkt, frontendLatency, mem_intr); + if (!pkt->isTagCheck) { + accessAndRespond(pkt, frontendLatency, mem_intr); + } } void @@ -385,19 +405,19 @@ MemCtrl::printQs() const DPRINTF(MemCtrl, "===READ QUEUE===\n\n"); for (const auto& queue : readQueue) { for (const auto& packet : queue) { - DPRINTF(MemCtrl, "Read %#x\n", packet->addr); + DPRINTF(MemCtrl, "Read %d\n", packet->addr); } } DPRINTF(MemCtrl, "\n===RESP QUEUE===\n\n"); for (const auto& packet : respQueue) { - DPRINTF(MemCtrl, "Response %#x\n", packet->addr); + DPRINTF(MemCtrl, "Response %d\n", packet->addr); } DPRINTF(MemCtrl, "\n===WRITE QUEUE===\n\n"); for (const auto& queue : writeQueue) { for (const auto& packet : queue) { - DPRINTF(MemCtrl, "Write %#x\n", packet->addr); + DPRINTF(MemCtrl, "Write %d\n", packet->addr); } } #endif // TRACING_ON @@ -407,7 +427,7 @@ bool MemCtrl::recvTimingReq(PacketPtr pkt) { // This is where we enter from the outside world - DPRINTF(MemCtrl, "recvTimingReq: request %s addr %#x size %d\n", + DPRINTF(MemCtrl, "recvTimingReq: request %s addr %x size %d\n", pkt->cmdString(), pkt->getAddr(), pkt->getSize()); panic_if(pkt->cacheResponding(), "Should not see packets where cache " @@ -442,7 +462,7 @@ MemCtrl::recvTimingReq(PacketPtr pkt) if (pkt->isWrite()) { assert(size != 0); if (writeQueueFull(pkt_count)) { - DPRINTF(MemCtrl, "Write queue full, not accepting\n"); + DPRINTF(MemCtrl, "Write queue full, not accepting, readQ size: %d, writeQ size: %d\n", readQueue[pkt->qosValue()].size(), writeQueue[pkt->qosValue()].size()); // remember that we have to retry this port retryWrReq = true; stats.numWrRetry++; @@ -462,7 +482,7 @@ MemCtrl::recvTimingReq(PacketPtr pkt) assert(pkt->isRead()); assert(size != 0); if (readQueueFull(pkt_count)) { - DPRINTF(MemCtrl, "Read queue full, not accepting\n"); + DPRINTF(MemCtrl, "Read queue full, not accepting, readQ size: %d, writeQ size: %d\n", readQueue[pkt->qosValue()].size(), writeQueue[pkt->qosValue()].size()); // remember that we have to retry this port retryRdReq = true; stats.numRdRetry++; @@ -558,7 +578,7 @@ MemCtrl::chooseNext(MemPacketQueue& queue, Tick extra_col_delay, MemInterface* mem_intr) { // This method does the arbitration between requests. - + DPRINTF(MemCtrl, "in chooseNext func\n"); MemPacketQueue::iterator ret = queue.end(); if (!queue.empty()) { @@ -601,6 +621,7 @@ std::pair MemCtrl::chooseNextFRFCFS(MemPacketQueue& queue, Tick extra_col_delay, MemInterface* mem_intr) { + DPRINTF(MemCtrl, "in chooseNextFRFCFS func\n"); auto selected_pkt_it = queue.end(); Tick col_allowed_at = MaxTick; @@ -622,7 +643,7 @@ void MemCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency, MemInterface* mem_intr) { - DPRINTF(MemCtrl, "Responding to Address %#x.. \n", pkt->getAddr()); + DPRINTF(MemCtrl, "Responding to Address %x: %s.. \n", pkt->getAddr(), pkt->cmdString()); bool needsResponse = pkt->needsResponse(); // do the actual memory access which also turns the packet into a @@ -639,11 +660,35 @@ MemCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency, // with headerDelay that takes into account the delay provided by // the xbar and also the payloadDelay that takes into account the // number of data beats. - Tick response_time = curTick() + static_latency + pkt->headerDelay + - pkt->payloadDelay; + Tick response_time; + if (pkt->isTagCheck && pkt->isWrite()) { + assert(!pkt->owIsRead); + // Note: in this case static latency is TagCheckReady time actually! + response_time = static_latency + frontendLatencyTC + backendLatencyTC; + } else { + response_time = curTick() + static_latency + pkt->headerDelay + + pkt->payloadDelay; + } // Here we reset the timing of the packet before sending it out. pkt->headerDelay = pkt->payloadDelay = 0; + if (pkt->isTagCheck && pkt->owIsRead && !pkt->isHit) { + if (pkt->isDirty) { + assert(pkt->hasDirtyData); + } + else if (!pkt->isDirty && !pkt->hasDirtyData) { + // No response is needed. + // It was a just a bubble (null data). + DPRINTF(MemCtrl, "Done, Rd Miss Clean No Dirty Data\n"); + delete pkt; + return; + } + } + + if (pkt->isTagCheck) { + pkt->isTagCheck = false; + } + // queue the packet in the response queue to be sent out after // the static latency has passed port.schedTimingResp(pkt, response_time); @@ -658,6 +703,76 @@ MemCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency, return; } +void +MemCtrl::sendTagCheckRespond(MemPacket* mem_pkt) +{ + DPRINTF(MemCtrl, "sendTagCheckRespond : %x \n", mem_pkt->addr); + assert(mem_pkt->isRead()); + assert(mem_pkt->pkt->isRead()); + assert(mem_pkt->tagCheckReady != MaxTick); + + PacketPtr tagCheckResPkt = getPacket(mem_pkt->addr, 8, MemCmd::ReadReq); + + tagCheckResPkt->isTagCheck = mem_pkt->pkt->isTagCheck; + tagCheckResPkt->isLocMem = mem_pkt->pkt->isLocMem; + tagCheckResPkt->owIsRead = mem_pkt->pkt->owIsRead; + tagCheckResPkt->isHit = mem_pkt->pkt->isHit; + tagCheckResPkt->isDirty = mem_pkt->pkt->isDirty; + tagCheckResPkt->hasDirtyData = mem_pkt->pkt->hasDirtyData; + tagCheckResPkt->dirtyLineAddr = mem_pkt->pkt->dirtyLineAddr; + + tagCheckResPkt->makeResponse(); + + // Tick response_time = curTick() + tagCheckResPkt->headerDelay; + // response_time += tagCheckResPkt->payloadDelay; + // Here we reset the timing of the packet before sending it out. + tagCheckResPkt->headerDelay = tagCheckResPkt->payloadDelay = 0; + + // queue the packet in the response queue to be sent out after + // the static latency has passed + port.schedTimingResp(tagCheckResPkt, mem_pkt->tagCheckReady + frontendLatencyTC + backendLatencyTC); +} + +void +MemCtrl::updateOldestWriteAge() +{ + // Assumption: writeQueue has only one priority = has just one vector in it. + for (const auto& vec : writeQueue) { + if (vec.empty()) { + oldestWriteAge = 0; + } else { + for (const auto& p : vec) { + oldestWriteAge = std::max(oldestWriteAge, curTick() - p->entryTime); + } + } + } +} + +PacketPtr +MemCtrl::getPacket(Addr addr, unsigned size, const MemCmd& cmd, + Request::FlagsType flags) +{ + // Create new request + RequestPtr req = std::make_shared(addr, size, flags, + 0); + // Dummy PC to have PC-based prefetchers latch on; get entropy into higher + // bits + req->setPC(((Addr)0) << 2); + + // Embed it in a packet + PacketPtr pkt = new Packet(req, cmd); + + uint8_t* pkt_data = new uint8_t[req->getSize()]; + + pkt->dataDynamic(pkt_data); + + if (cmd.isWrite()) { + std::fill_n(pkt_data, req->getSize(), (uint8_t)0); + } + + return pkt; +} + void MemCtrl::pruneBurstTick() { @@ -767,7 +882,7 @@ MemCtrl::verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst, } bool -MemCtrl::inReadBusState(bool next_state, const MemInterface* mem_intr) const +MemCtrl::inReadBusState(bool next_state, MemInterface* mem_intr) const { // check the bus state if (next_state) { @@ -780,7 +895,7 @@ MemCtrl::inReadBusState(bool next_state, const MemInterface* mem_intr) const } bool -MemCtrl::inWriteBusState(bool next_state, const MemInterface* mem_intr) const +MemCtrl::inWriteBusState(bool next_state, MemInterface* mem_intr) const { // check the bus state if (next_state) { @@ -792,6 +907,12 @@ MemCtrl::inWriteBusState(bool next_state, const MemInterface* mem_intr) const } } +uint32_t +MemCtrl::bytesPerBurst() const +{ + return dram->bytesPerBurst(); +} + Tick MemCtrl::doBurstAccess(MemPacket* mem_pkt, MemInterface* mem_intr) { @@ -808,13 +929,14 @@ MemCtrl::doBurstAccess(MemPacket* mem_pkt, MemInterface* mem_intr) std::tie(cmd_at, mem_intr->nextBurstAt) = mem_intr->doBurstAccess(mem_pkt, mem_intr->nextBurstAt, queue); - DPRINTF(MemCtrl, "Access to %#x, ready at %lld next burst at %lld.\n", + DPRINTF(MemCtrl, "Access to %x, ready at %lld next burst at %lld.\n", mem_pkt->addr, mem_pkt->readyTime, mem_intr->nextBurstAt); // Update the minimum timing between the requests, this is a // conservative estimate of when we have to schedule the next // request to not introduce any unecessary bubbles. In most cases // we will wake up sooner than we have to. + assert(mem_intr->nextBurstAt > mem_intr->commandOffset()); mem_intr->nextReqTime = mem_intr->nextBurstAt - mem_intr->commandOffset(); // Update the common bus stats @@ -882,7 +1004,15 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr, MemPacketQueue& resp_queue, EventFunctionWrapper& resp_event, EventFunctionWrapper& next_req_event, - bool& retry_wr_req) { + bool& retry_wr_req, + bool& retry_rd_req) { + DPRINTF(MemCtrl, "processNextReqEvent: readQueueSize: %d, writeQueueSize:%d, readQ: %d, writeQ: %d, respQ: %d\n", + mem_intr->readQueueSize, mem_intr->writeQueueSize, readQueue[0].size(), writeQueue[0].size(), + respQueue.size()); + if (considerOldestWrite) { + updateOldestWriteAge(); + } + // transition is handled by QoS algorithm if enabled if (turnPolicy) { // select bus state - only done if QoS algorithms are in use @@ -901,14 +1031,14 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr, if (switched_cmd_type) { if (mem_intr->busState == MemCtrl::READ) { DPRINTF(MemCtrl, - "Switching to writes after %d reads with %d reads " - "waiting\n", mem_intr->readsThisTime, mem_intr->readQueueSize); + "Switching to writes after %d reads with %d reads " + "waiting\n", mem_intr->readsThisTime, mem_intr->readQueueSize); stats.rdPerTurnAround.sample(mem_intr->readsThisTime); mem_intr->readsThisTime = 0; } else { DPRINTF(MemCtrl, - "Switching to reads after %d writes with %d writes " - "waiting\n", mem_intr->writesThisTime, mem_intr->writeQueueSize); + "Switching to reads after %d writes with %d writes " + "waiting\n", mem_intr->writesThisTime, mem_intr->writeQueueSize); stats.wrPerTurnAround.sample(mem_intr->writesThisTime); mem_intr->writesThisTime = 0; } @@ -942,7 +1072,8 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr, // if we are draining) if (!(mem_intr->writeQueueSize == 0) && (drainState() == DrainState::Draining || - mem_intr->writeQueueSize > writeLowThreshold)) { + mem_intr->writeQueueSize > writeLowThreshold || + (considerOldestWrite && oldestWriteAge > oldestWriteAgeThreshold))) { DPRINTF(MemCtrl, "Switching to writes due to read queue empty\n"); @@ -974,7 +1105,7 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr, prio--; - DPRINTF(QOS, + DPRINTF(MemCtrl, "Checking READ queue [%d] priority [%d elements]\n", prio, queue->size()); @@ -1003,20 +1134,60 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr, auto mem_pkt = *to_read; + DPRINTF(MemCtrl, "Read pkt chosen before doburst: %x\n", mem_pkt->getAddr()); + Tick cmd_at = doBurstAccess(mem_pkt, mem_intr); + if (mem_pkt->isLocMem) { + if (dram->polMan->locMemPolicy == enums::RambusTagProbOpt) { + assert(mem_pkt->BSlotBusyUntil!=MaxTick); + assert(!mem_pkt->probedRdMC); + } + + if (mem_pkt->probedRdH) { + assert(mem_pkt->tagCheckReady != MaxTick); + assert(!mem_pkt->probedRdMD); + assert (mem_pkt->tagCheckReady > (dram->getTRCDFAST() + dram->getTRLFAST())); + assert(cmd_at > (mem_pkt->tagCheckReady - dram->getTRCDFAST() - dram->getTRLFAST())); + + stats.deltaAbSlotRdH += + (cmd_at - (mem_pkt->tagCheckReady - dram->getTRCDFAST() - dram->getTRLFAST())); + + } else if (mem_pkt->probedRdMD) { + assert(mem_pkt->tagCheckReady != MaxTick); + assert(!mem_pkt->probedRdH); + assert (mem_pkt->tagCheckReady > (dram->getTRCDFAST() + dram->getTRLFAST())); + assert(cmd_at > (mem_pkt->tagCheckReady - dram->getTRCDFAST() - dram->getTRLFAST())); + + stats.deltaAbSlotRdMD += + (cmd_at - (mem_pkt->tagCheckReady - dram->getTRCDFAST() - dram->getTRLFAST())); + } + } + + assert((*to_read)->getAddr() == mem_pkt->getAddr()); + + if (mem_pkt->isTagCheck) { + DPRINTF(MemCtrl, "read times: %x, %s: tag: %d data: %d \n", mem_pkt->addr, mem_pkt->pkt->cmdString(), mem_pkt->tagCheckReady, mem_pkt->readyTime); + sendTagCheckRespond(mem_pkt); + } + DPRINTF(MemCtrl, - "Command for %#x, issued at %lld.\n", mem_pkt->addr, cmd_at); + "Command for %x, issued at %lld.\n", mem_pkt->addr, cmd_at); // sanity check assert(pktSizeCheck(mem_pkt, mem_intr)); assert(mem_pkt->readyTime >= curTick()); // log the response + DPRINTF(MemCtrl, "logResponse rd1: %d %d %x %d\n", + (*to_read)->requestorId(), + mem_pkt->qosValue(), mem_pkt->getAddr(), + mem_pkt->readyTime - mem_pkt->entryTime); + logResponse(MemCtrl::READ, (*to_read)->requestorId(), mem_pkt->qosValue(), mem_pkt->getAddr(), 1, mem_pkt->readyTime - mem_pkt->entryTime); - + mem_intr->readQueueSize--; // Insert into response queue. It will be sent back to the @@ -1036,16 +1207,31 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr, // there are no other writes that can issue // Also ensure that we've issued a minimum defined number // of reads before switching, or have emptied the readQ - if ((mem_intr->writeQueueSize > writeHighThreshold) && - (mem_intr->readsThisTime >= minReadsPerSwitch || - mem_intr->readQueueSize == 0) - && !(nvmWriteBlock(mem_intr))) { + if (((mem_intr->writeQueueSize > writeHighThreshold) && + (mem_intr->readsThisTime >= minReadsPerSwitch || mem_intr->readQueueSize == 0) && + !(nvmWriteBlock(mem_intr))) || (considerOldestWrite && oldestWriteAge > oldestWriteAgeThreshold)) { switch_to_writes = true; } // remove the request from the queue - // the iterator is no longer valid . + // the iterator is no longer valid . readQueue[mem_pkt->qosValue()].erase(to_read); + + // Tag probing B slot comes here. + if (mem_pkt->isLocMem && dram->polMan->locMemPolicy == enums::RambusTagProbOpt) { + assert(mem_pkt->BSlotBusyUntil != MaxTick); + + DPRINTF(MemCtrl, "Rd--> Start probing for B slot: Aslot addr: %x , end of tag bank busy for B slot: %d\n", + mem_pkt->getAddr(), mem_pkt->BSlotBusyUntil); + bool found = findCandidateForBSlot(mem_pkt); + DPRINTF(MemCtrl, "Rd--> B slot result: found flag: %d\n",found); + + if (found) { + stats.foundCandidBSlot++; + } else { + stats.noCandidBSlot++; + } + } } // switching to writes, either because the read queue is empty @@ -1069,6 +1255,9 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr, DPRINTF(QOS, "Checking WRITE queue [%d] priority [%d elements]\n", prio, queue->size()); + DPRINTF(MemCtrl, + "Checking WRITE queue of size [%d] : \n", + queue->size()); // If we are changing command type, incorporate the minimum // bus turnaround delay @@ -1096,22 +1285,52 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr, // sanity check assert(pktSizeCheck(mem_pkt, mem_intr)); + DPRINTF(MemCtrl, "Write pkt chosen before doburst: %x\n", mem_pkt->getAddr()); + Tick cmd_at = doBurstAccess(mem_pkt, mem_intr); + DPRINTF(MemCtrl, - "Command for %#x, issued at %lld.\n", mem_pkt->addr, cmd_at); + "Command for %x, issued at %lld.\n", mem_pkt->addr, cmd_at); + + if (mem_pkt->isTagCheck) { + DPRINTF(MemCtrl, "write times: %x, %s: tag: %d data: %d \n", mem_pkt->addr, mem_pkt->pkt->cmdString(), mem_pkt->tagCheckReady, mem_pkt->readyTime); + // Note: the second argument in this function call is NOT delay! + accessAndRespond(mem_pkt->pkt, mem_pkt->tagCheckReady, mem_intr); + } isInWriteQueue.erase(burstAlign(mem_pkt->addr, mem_intr)); // log the response + DPRINTF(MemCtrl, "logResponse wr1: %d %d %x %d\n", + mem_pkt->requestorId(), + mem_pkt->qosValue(), mem_pkt->getAddr(), + mem_pkt->readyTime - mem_pkt->entryTime); + logResponse(MemCtrl::WRITE, mem_pkt->requestorId(), mem_pkt->qosValue(), mem_pkt->getAddr(), 1, mem_pkt->readyTime - mem_pkt->entryTime); - + mem_intr->writeQueueSize--; // remove the request from the queue - the iterator is no longer valid writeQueue[mem_pkt->qosValue()].erase(to_write); + // Tag probing B slot comes here. + if (mem_pkt->isLocMem && dram->polMan->locMemPolicy == enums::RambusTagProbOpt) { + assert(mem_pkt->BSlotBusyUntil != MaxTick); + + DPRINTF(MemCtrl, "WR--> Start probing for B slot: Aslot addr: %x , end of tag bank busy for B slot: %d\n", + mem_pkt->getAddr(), mem_pkt->BSlotBusyUntil); + bool found = findCandidateForBSlot(mem_pkt); + DPRINTF(MemCtrl, "WR--> B slot result: found flag: %d\n",found); + + if (found) { + stats.foundCandidBSlot++; + } else { + stats.noCandidBSlot++; + } + } + delete mem_pkt; // If we emptied the write queue, or got sufficiently below the @@ -1135,6 +1354,12 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr, // case, which eventually will check for any draining and // also pause any further scheduling if there is really // nothing to do + + + if (retry_rd_req) { + retry_rd_req = false; + port.sendRetryReq(); + } } } // It is possible that a refresh to another rank kicks things back into @@ -1146,6 +1371,7 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr, retry_wr_req = false; port.sendRetryReq(); } + } bool @@ -1178,6 +1404,151 @@ MemCtrl::pktSizeCheck(MemPacket* mem_pkt, MemInterface* mem_intr) const return (mem_pkt->size <= mem_intr->bytesPerBurst()); } +bool +MemCtrl::findCandidateForBSlot(MemPacket* AslotPkt) +{ + DPRINTF(MemCtrl, "findCandidateForBSlot: Aslot addr: %x, BSlotTagBankBusyUntil: %d, readQ size: %d\n", + AslotPkt->getAddr(), AslotPkt->BSlotBusyUntil, readQueue[AslotPkt->qosValue()].size()); + + assert(AslotPkt->BSlotBusyUntil != MaxTick); + + MemPacketQueue::iterator BslotPktIt; + + for (auto queue = readQueue.rbegin(); + queue != readQueue.rend(); ++queue) { + BslotPktIt = searchReadQueueForBSlot((*queue), AslotPkt); + + if (BslotPktIt != queue->end()) { + // A proper candidate for B slot is found! + auto BslotPkt = *BslotPktIt; + assert(BslotPkt != AslotPkt); + dram->updateTagActAllowed(BslotPkt->rank, BslotPkt->bank, AslotPkt->BSlotBusyUntil); + + DPRINTF(MemCtrl, "B slot found: Addr A: %x, isRead: %d /// Addr B: %x, IsRead: %d, IsHit: %d: IsDirty: %d\n", + AslotPkt->getAddr(), AslotPkt->isRead(), BslotPkt->getAddr(), + BslotPkt->pkt->owIsRead, BslotPkt->pkt->isHit, BslotPkt->pkt->isDirty); + + handleTCforBSlotPkt(BslotPktIt, AslotPkt->BSlotBusyUntil); + + // if (BslotPkt->pkt->owIsRead && BslotPkt->pkt->isHit) { + // stats.foundCandidBSlotRH++; + // } else if (BslotPkt->pkt->owIsRead && !BslotPkt->pkt->isHit && !BslotPkt->pkt->isDirty) { + // stats.foundCandidBSlotRMC++; + // } else if (BslotPkt->pkt->owIsRead && !BslotPkt->pkt->isHit && BslotPkt->pkt->isDirty) { + // stats.foundCandidBSlotRMD++; + // } + return true; + } + } + return false; + +} + +MemPacketQueue::iterator +MemCtrl::searchReadQueueForBSlot(MemPacketQueue& queue, MemPacket* AslotPkt) +{ + DPRINTF(MemCtrl, "searchReadQueueForBSlot: Aslot addr: %x, BSlotTagBankBusyUntil: %d\n", + AslotPkt->getAddr(), AslotPkt->BSlotBusyUntil); + + MemPacketQueue::iterator youngest = queue.end(); + + for (auto i = queue.begin(); i != queue.end() ; ++i) { + MemPacket* BslotPkt = *i; + if (BslotPkt->isTagCheck && BslotPkt != AslotPkt) { + Tick tagActAllowedAt = dram->nextTagActAvailability(BslotPkt->rank, BslotPkt->bank); + if (AslotPkt->BSlotBusyUntil >= tagActAllowedAt + dram->getTRCFAST()) { + auto prev_mem_pkt = *youngest; + if (youngest == queue.end()) { + youngest = i; + } else if (BslotPkt->entryTime > prev_mem_pkt->entryTime) { + youngest = i; + } + } + } + } + + return youngest; +} + +void +MemCtrl::handleTCforBSlotPkt(MemPacketQueue::iterator BslotPktIt, Tick BSlotTagBankBusyUntil) +{ + // assert(policy == rambustagprob); + auto BslotPkt = *BslotPktIt; + + // read hits + if (BslotPkt->pkt->isRead() && BslotPkt->pkt->isHit) { + BslotPkt->tagCheckReady = BSlotTagBankBusyUntil + dram->getTRCDFAST() + + dram->getTRLFAST() - dram->getTRCFAST(); + sendTagCheckRespond(BslotPkt); + BslotPkt->isTagCheck = false; + BslotPkt->pkt->isTagCheck = false; + DPRINTF(MemCtrl, "Rd Hit successfully probed for TC, curTick: %d, adr: %x, tagCheckReady: %d\n", curTick(), BslotPkt->pkt->getAddr(), BslotPkt->tagCheckReady); + + assert(!BslotPkt->probedRdH); + BslotPkt->probedRdH = true; + + stats.foundCandidBSlotRH++; + + return; + } + + // read miss cleans + else if (BslotPkt->pkt->isRead() && !BslotPkt->pkt->isHit && !BslotPkt->pkt->isDirty) { + BslotPkt->tagCheckReady = BSlotTagBankBusyUntil + dram->getTRCDFAST() + + dram->getTRLFAST() - dram->getTRCFAST(); + sendTagCheckRespond(BslotPkt); + BslotPkt->isTagCheck = false; + BslotPkt->pkt->isTagCheck = false; + + // log the response + DPRINTF(MemCtrl, "logResponse rd2: %d %d %x %d\n", + BslotPkt->pkt->requestorId(), + BslotPkt->pkt->qosValue(), BslotPkt->addr, + BslotPkt->tagCheckReady - BslotPkt->entryTime); + + logResponse(MemCtrl::READ, BslotPkt->pkt->requestorId(), + BslotPkt->pkt->qosValue(), BslotPkt->addr, 1, + BslotPkt->tagCheckReady - BslotPkt->entryTime); + + dram->readQueueSize--; + + //remove the packet from read queue + readQueue[BslotPkt->qosValue()].erase(BslotPktIt); + + DPRINTF(MemCtrl, "Rd Miss Clean successfully probed for TC, curTick: %d, adr: %x, tagCheckReady: %d, readQ size:%d\n", + curTick(), BslotPkt->pkt->getAddr(), BslotPkt->tagCheckReady, readQueue[BslotPkt->qosValue()].size()); + + assert(!BslotPkt->probedRdMC); + BslotPkt->probedRdMC = true; + + stats.foundCandidBSlotRMC++; + + delete BslotPkt->pkt; + delete BslotPkt; + return; + } + // read miss dirty + else if (BslotPkt->pkt->isRead() && !BslotPkt->pkt->isHit && BslotPkt->pkt->isDirty) { + BslotPkt->tagCheckReady = BSlotTagBankBusyUntil + dram->getTRCDFAST() + + dram->getTRLFAST() - dram->getTRCFAST(); + sendTagCheckRespond(BslotPkt); + BslotPkt->isTagCheck = false; + BslotPkt->pkt->isTagCheck = false; + DPRINTF(MemCtrl, "Rd Miss Dirty successfully probed for TC, curTick: %d, adr: %x, tagCheckReady: %d\n", + curTick(), BslotPkt->pkt->getAddr(), BslotPkt->tagCheckReady); + + assert(!BslotPkt->probedRdMD); + BslotPkt->probedRdMD = true; + + stats.foundCandidBSlotRMD++; + + return; + } + + +} + MemCtrl::CtrlStats::CtrlStats(MemCtrl &_ctrl) : statistics::Group(&_ctrl), ctrl(_ctrl), @@ -1198,9 +1569,6 @@ MemCtrl::CtrlStats::CtrlStats(MemCtrl &_ctrl) ADD_STAT(mergedWrBursts, statistics::units::Count::get(), "Number of controller write bursts merged with an existing one"), - ADD_STAT(neitherReadNorWriteReqs, statistics::units::Count::get(), - "Number of requests that are neither read nor write"), - ADD_STAT(avgRdQLen, statistics::units::Rate< statistics::units::Count, statistics::units::Tick>::get(), "Average read queue length when enqueuing"), @@ -1227,6 +1595,17 @@ MemCtrl::CtrlStats::CtrlStats(MemCtrl &_ctrl) "Reads before turning the bus around for writes"), ADD_STAT(wrPerTurnAround, statistics::units::Count::get(), "Writes before turning the bus around for reads"), + + ADD_STAT(noCandidBSlot, statistics::units::Count::get(), + " "), + ADD_STAT(foundCandidBSlot, statistics::units::Count::get(), + " "), + ADD_STAT(foundCandidBSlotRH, statistics::units::Count::get(), + " "), + ADD_STAT(foundCandidBSlotRMC, statistics::units::Count::get(), + " "), + ADD_STAT(foundCandidBSlotRMD, statistics::units::Count::get(), + " "), ADD_STAT(bytesReadWrQ, statistics::units::Byte::get(), "Total number of bytes read from write queue"), @@ -1271,7 +1650,15 @@ MemCtrl::CtrlStats::CtrlStats(MemCtrl &_ctrl) "Per-requestor read average memory access latency"), ADD_STAT(requestorWriteAvgLat, statistics::units::Rate< statistics::units::Tick, statistics::units::Count>::get(), - "Per-requestor write average memory access latency") + "Per-requestor write average memory access latency"), + + ADD_STAT(deltaAbSlotRdH, statistics::units::Tick::get(), "stat"), + ADD_STAT(deltaAbSlotRdMD, statistics::units::Tick::get(), "stat"), + + ADD_STAT(avgDeltaAbSlotRdH, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgDeltaAbSlotRdMD, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat") { } @@ -1303,6 +1690,9 @@ MemCtrl::CtrlStats::regStats() avgWrBWSys.precision(8); avgGap.precision(2); + avgDeltaAbSlotRdH.precision(2); + avgDeltaAbSlotRdMD.precision(2); + // per-requestor bytes read and written to memory requestorReadBytes .init(max_requestors) @@ -1369,6 +1759,11 @@ MemCtrl::CtrlStats::regStats() requestorWriteRate = requestorWriteBytes / simSeconds; requestorReadAvgLat = requestorReadTotalLat / requestorReadAccesses; requestorWriteAvgLat = requestorWriteTotalLat / requestorWriteAccesses; + + avgDeltaAbSlotRdH = (deltaAbSlotRdH/foundCandidBSlotRH)/1000; + avgDeltaAbSlotRdMD = (deltaAbSlotRdMD/foundCandidBSlotRMD)/1000; + + } void @@ -1427,8 +1822,9 @@ MemCtrl::drain() { // if there is anything in any of our internal queues, keep track // of that as well - if (totalWriteQueueSize || totalReadQueueSize || !respQEmpty() || - !allIntfDrained()) { + if (!(!totalWriteQueueSize && !totalReadQueueSize && respQEmpty() && + allIntfDrained())) { + DPRINTF(Drain, "Memory controller not drained, write: %d, read: %d," " resp: %d\n", totalWriteQueueSize, totalReadQueueSize, respQueue.size()); diff --git a/src/mem/mem_ctrl.hh b/src/mem/mem_ctrl.hh index 917798ffa7..c65296ecb3 100644 --- a/src/mem/mem_ctrl.hh +++ b/src/mem/mem_ctrl.hh @@ -54,6 +54,7 @@ #include "base/callback.hh" #include "base/statistics.hh" +#include "debug/MemCtrl.hh" #include "enums/MemSched.hh" #include "mem/qos/mem_ctrl.hh" #include "mem/qport.hh" @@ -100,7 +101,7 @@ class MemPacket public: /** When did request enter the controller */ - const Tick entryTime; + Tick entryTime; /** When will request leave the controller */ Tick readyTime; @@ -156,6 +157,20 @@ class MemPacket */ uint8_t _qosValue; + /** + * DRAM cache specific flags + * + */ + bool isTagCheck = false; + Tick tagCheckReady = MaxTick; + bool isLocMem = false; + Tick BSlotBusyUntil = MaxTick; + bool probedRdH = false; + bool probedRdMC = false; + bool probedRdMD = false; + + + /** * Set the packet QoS value * (interface compatibility with Packet) @@ -213,6 +228,20 @@ class MemPacket burstHelper(NULL), _qosValue(_pkt->qosValue()) { } + /* + // MemPacket(PacketPtr _pkt, bool is_read, bool is_dram, uint8_t _channel, + // uint8_t _rank, uint8_t _bank, uint32_t _row, uint16_t bank_id, + // Addr _addr, unsigned int _size, + // bool _isTagCheck, bool _isHit, bool _isDirty, Tick _tagCheckReady) + // : entryTime(curTick()), readyTime(curTick()), pkt(_pkt), + // _requestorId(pkt->requestorId()), + // read(is_read), dram(is_dram), pseudoChannel(_channel), rank(_rank), + // bank(_bank), row(_row), bankId(bank_id), addr(_addr), size(_size), + // burstHelper(NULL), _qosValue(_pkt->qosValue()), + // isTagCheck(_isTagCheck), isHit(_isHit), isDirty(_isDirty), tagCheckReady(_tagCheckReady) + // { } + */ + }; // The memory packets are store in a multiple dequeue structure, @@ -303,7 +332,8 @@ class MemCtrl : public qos::MemCtrl MemPacketQueue& resp_queue, EventFunctionWrapper& resp_event, EventFunctionWrapper& next_req_event, - bool& retry_wr_req); + bool& retry_wr_req, + bool& retry_rd_req); EventFunctionWrapper nextReqEvent; virtual void processRespondEvent(MemInterface* mem_intr, @@ -383,7 +413,9 @@ class MemCtrl : public qos::MemCtrl */ virtual void accessAndRespond(PacketPtr pkt, Tick static_latency, MemInterface* mem_intr); + void sendTagCheckRespond(MemPacket* pkt); + PacketPtr getPacket(Addr addr, unsigned size, const MemCmd& cmd, Request::FlagsType flags = 0); /** * Determine if there is a packet that can issue. * @@ -503,7 +535,7 @@ class MemCtrl : public qos::MemCtrl + */ MemInterface* dram; - virtual AddrRangeList getAddrRanges(); + // virtual AddrRangeList getAddrRanges(); /** * The following are basic design parameters of the memory @@ -515,6 +547,8 @@ class MemCtrl : public qos::MemCtrl uint32_t writeBufferSize; uint32_t writeHighThreshold; uint32_t writeLowThreshold; + uint32_t oldestWriteAgeThreshold; + Tick oldestWriteAge; const uint32_t minWritesPerSwitch; const uint32_t minReadsPerSwitch; @@ -538,12 +572,24 @@ class MemCtrl : public qos::MemCtrl */ const Tick backendLatency; + /** + * Pipeline latency of the controller frontend for tag Check (TC). + */ + const Tick frontendLatencyTC; + + /** + * Pipeline latency of the backend and PHY for tag Check (TC). + */ + const Tick backendLatencyTC; + /** * Length of a command window, used to check * command bandwidth */ const Tick commandWindow; + bool considerOldestWrite; + /** * Till when must we wait before issuing next RD/WR burst? */ @@ -588,6 +634,12 @@ class MemCtrl : public qos::MemCtrl statistics::Histogram rdPerTurnAround; statistics::Histogram wrPerTurnAround; + statistics::Scalar noCandidBSlot; + statistics::Scalar foundCandidBSlot; + statistics::Scalar foundCandidBSlotRH; + statistics::Scalar foundCandidBSlotRMC; + statistics::Scalar foundCandidBSlotRMD; + statistics::Scalar bytesReadWrQ; statistics::Scalar bytesReadSys; statistics::Scalar bytesWrittenSys; @@ -617,6 +669,12 @@ class MemCtrl : public qos::MemCtrl // per-requestor raed and write average memory access latency statistics::Formula requestorReadAvgLat; statistics::Formula requestorWriteAvgLat; + + statistics::Scalar deltaAbSlotRdH; + statistics::Scalar deltaAbSlotRdMD; + + statistics::Formula avgDeltaAbSlotRdH; + statistics::Formula avgDeltaAbSlotRdMD; }; CtrlStats stats; @@ -677,6 +735,8 @@ class MemCtrl : public qos::MemCtrl MemCtrl(const MemCtrlParams &p); + virtual AddrRangeList getAddrRanges(); + /** * Ensure that all interfaced have drained commands * @@ -754,6 +814,7 @@ class MemCtrl : public qos::MemCtrl { assert(pseudo_channel == 0); schedule(nextReqEvent, tick); + DPRINTF(MemCtrl, "Scheduling next request after refreshing\n"); } /** @@ -762,7 +823,7 @@ class MemCtrl : public qos::MemCtrl * @param next_state Check either the current or next bus state * @return True when bus is currently in a read state */ - bool inReadBusState(bool next_state, const MemInterface* mem_intr) const; + bool inReadBusState(bool next_state, MemInterface* mem_intr) const; /** * Check the current direction of the memory channel @@ -770,7 +831,21 @@ class MemCtrl : public qos::MemCtrl * @param next_state Check either the current or next bus state * @return True when bus is currently in a write state */ - bool inWriteBusState(bool next_state, const MemInterface* mem_intr) const; + bool inWriteBusState(bool next_state, MemInterface* mem_intr) const; + + uint32_t bytesPerBurst() const; + + Addr burstAlign(Addr addr) const { return burstAlign(addr, dram); } + + void accessAndRespond(PacketPtr pkt, Tick static_latency) { accessAndRespond(pkt, static_latency, dram); } + + void updateOldestWriteAge(); + + bool findCandidateForBSlot(MemPacket* AslotPkt); + + void handleTCforBSlotPkt(MemPacketQueue::iterator BslotPktIt, Tick BSlotTagBankBusyUntil); + + MemPacketQueue::iterator searchReadQueueForBSlot(MemPacketQueue& queue, MemPacket* AslotPkt); Port &getPort(const std::string &if_name, PortID idx=InvalidPortID) override; diff --git a/src/mem/mem_interface.cc b/src/mem/mem_interface.cc index e97448f457..2e1f3f3bce 100644 --- a/src/mem/mem_interface.cc +++ b/src/mem/mem_interface.cc @@ -79,7 +79,7 @@ MemInterface::setCtrl(MemCtrl* _ctrl, unsigned int command_window, { ctrl = _ctrl; maxCommandsPerWindow = command_window / tCK; - // setting the pseudo channel number for this interface + // setting the pseudo channel number for this interfacez pseudoChannel = pseudo_channel; } diff --git a/src/mem/mem_interface.hh b/src/mem/mem_interface.hh index b0f762fc80..8eede9246b 100644 --- a/src/mem/mem_interface.hh +++ b/src/mem/mem_interface.hh @@ -97,13 +97,14 @@ class MemInterface : public AbstractMemory Tick wrAllowedAt; Tick preAllowedAt; Tick actAllowedAt; + Tick tagActAllowedAt; uint32_t rowAccesses; uint32_t bytesAccessed; Bank() : openRow(NO_ROW), bank(0), bankgr(0), - rdAllowedAt(0), wrAllowedAt(0), preAllowedAt(0), actAllowedAt(0), + rdAllowedAt(0), wrAllowedAt(0), preAllowedAt(0), actAllowedAt(0), tagActAllowedAt(0), rowAccesses(0), bytesAccessed(0) { } }; @@ -167,6 +168,8 @@ class MemInterface : public AbstractMemory public: + AbstractMemory* polMan; + /** * Buffer sizes for read and write queues in the controller * These are passed to the controller on instantiation @@ -275,7 +278,7 @@ class MemInterface : public AbstractMemory /** * @return number of bytes in a burst for this interface */ - uint32_t bytesPerBurst() const { return burstSize; } + virtual uint32_t bytesPerBurst() const { return burstSize; } /* * @return time to offset next command @@ -412,6 +415,22 @@ class MemInterface : public AbstractMemory "should not be executed from here.\n"); } + virtual Tick nextTagActAvailability(unsigned rankNumber, unsigned bankNumber) + { panic("MemInterface nextTagActAvailability should not be executed from here.\n"); } + + virtual Tick getTRCFAST() + { panic("MemInterface getTRCFAST should not be executed from here.\n"); } + + virtual Tick getTRLFAST() + { panic("MemInterface getTRLFAST should not be executed from here.\n"); } + + virtual Tick getTRCDFAST() + { panic("MemInterface getTRCDFAST should not be executed from here.\n"); } + + virtual void updateTagActAllowed(unsigned rankNumber, unsigned bankNumber, Tick BSlotTagAllowedAt) + { panic("MemInterface updateTagActAllowed should not be executed from here.\n"); } + + typedef MemInterfaceParams Params; MemInterface(const Params &_p); }; diff --git a/src/mem/nvm_interface.cc b/src/mem/nvm_interface.cc index 366f71d56a..2c684edc19 100644 --- a/src/mem/nvm_interface.cc +++ b/src/mem/nvm_interface.cc @@ -402,9 +402,9 @@ NVMInterface::processReadReadyEvent() bool NVMInterface::burstReady(MemPacket* pkt) const { - bool read_rdy = pkt->isRead() && (ctrl->inReadBusState(true, this)) && + bool read_rdy = pkt->isRead() && (ctrl->inReadBusState(true, (MemInterface*)this)) && (pkt->readyTime <= curTick()) && (numReadDataReady > 0); - bool write_rdy = !pkt->isRead() && !ctrl->inReadBusState(true, this) && + bool write_rdy = !pkt->isRead() && !ctrl->inReadBusState(true, (MemInterface*)this) && !writeRespQueueFull(); return (read_rdy || write_rdy); } diff --git a/src/mem/packet.hh b/src/mem/packet.hh index df2a8165fc..162747d87a 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -297,6 +297,15 @@ class Packet : public Printable, public Extensible typedef uint32_t FlagsType; typedef gem5::Flags Flags; + bool isTagCheck = false; + bool isLocMem = false; + bool owIsRead = false; + bool isHit = false; + bool isDirty = false; + bool hasDirtyData = false; + Addr dirtyLineAddr = -1; + + private: enum : FlagsType { @@ -1233,7 +1242,7 @@ class Packet : public Printable, public Extensible const T* getConstPtr() const { - assert(flags.isSet(STATIC_DATA|DYNAMIC_DATA)); + //assert(flags.isSet(STATIC_DATA|DYNAMIC_DATA)); return (const T*)data; } diff --git a/src/mem/packet_queue.cc b/src/mem/packet_queue.cc index 535764fbc8..91f78a438b 100644 --- a/src/mem/packet_queue.cc +++ b/src/mem/packet_queue.cc @@ -118,10 +118,10 @@ PacketQueue::schedSendTiming(PacketPtr pkt, Tick when) // add a very basic sanity check on the port to ensure the // invisible buffer is not growing beyond reasonable limits - if (!_disableSanityCheck && transmitList.size() > 128) { - panic("Packet queue %s has grown beyond 128 packets\n", - name()); - } + // if (!_disableSanityCheck && transmitList.size() > 256) { + // panic("Packet queue %s has grown beyond 256 packets\n", + // name()); + // } // we should either have an outstanding retry, or a send event // scheduled, but there is an unfortunate corner case where the diff --git a/src/mem/physical.cc b/src/mem/physical.cc index 06f2cdc003..737951b7c5 100644 --- a/src/mem/physical.cc +++ b/src/mem/physical.cc @@ -116,18 +116,21 @@ PhysicalMemory::PhysicalMemory(const std::string& _name, "Skipping memory %s that is not in global address map\n", m->name()); - // sanity check - fatal_if(m->getAddrRange().interleaved(), - "Memory %s that is not in the global address map cannot " - "be interleaved\n", m->name()); - - // simply do it independently, also note that this kind of - // memories are allowed to overlap in the logic address - // map - std::vector unmapped_mems{m}; - createBackingStore(m->getAddrRange(), unmapped_mems, - m->isConfReported(), m->isInAddrMap(), - m->isKvmMap()); + // Only create backing stores for non-null memories + if (!m->isNull()) { + // sanity check + fatal_if(m->getAddrRange().interleaved(), + "Memory %s that is not in the global address map cannot " + "be interleaved\n", m->name()); + + // simply do it independently, also note that this kind of + // memories are allowed to overlap in the logic address + // map + std::vector unmapped_mems{m}; + createBackingStore(m->getAddrRange(), unmapped_mems, + m->isConfReported(), m->isInAddrMap(), + m->isKvmMap()); + } } } diff --git a/src/mem/policy_manager.cc b/src/mem/policy_manager.cc new file mode 100644 index 0000000000..156e423fad --- /dev/null +++ b/src/mem/policy_manager.cc @@ -0,0 +1,3627 @@ +#include "mem/policy_manager.hh" + +#include "base/trace.hh" +#include "debug/ChkptRstrTest.hh" +#include "debug/PolicyManager.hh" +#include "debug/Drain.hh" +#include "mem/dram_interface.hh" +#include "sim/sim_exit.hh" +#include "sim/system.hh" + +namespace gem5 +{ + +namespace memory +{ + +PolicyManager::PolicyManager(const PolicyManagerParams &p): + AbstractMemory(p), + port(name() + ".port", *this), + locReqPort(name() + ".loc_req_port", *this), + farReqPort(name() + ".far_req_port", *this), + locBurstSize(p.loc_burst_size), + farBurstSize(p.far_burst_size), + locMem(p.loc_mem), + replacementPolicy(p.replacement_policy), + dramCacheSize(p.dram_cache_size), + blockSize(p.block_size), + assoc(p.assoc), + addrSize(p.addr_size), + orbMaxSize(p.orb_max_size), orbSize(0), + crbMaxSize(p.crb_max_size), crbSize(0), + extreme(p.extreme), + alwaysHit(p.always_hit), alwaysDirty(p.always_dirty), + bypassDcache(p.bypass_dcache), + channelIndex(p.channel_index), + frontendLatency(p.static_frontend_latency), + backendLatency(p.static_backend_latency), + numColdMisses(0), + cacheWarmupRatio(p.cache_warmup_ratio), + infoCacheWarmupRatio(0.05), + resetStatsWarmup(false), + prevArrival(0), + blksInserted(0), + retryLLC(false), retryLLCRepetitive(false), retryLLCFarMemWr(false), + retryTagCheck(false), retryLocMemRead(false), retryFarMemRead(false), + retryLocMemWrite(false), retryFarMemWrite(false), + maxConf(0), + tagCheckEvent([this]{ processTagCheckEvent(); }, name()), + locMemReadEvent([this]{ processLocMemReadEvent(); }, name()), + locMemWriteEvent([this]{ processLocMemWriteEvent(); }, name()), + farMemReadEvent([this]{ processFarMemReadEvent(); }, name()), + farMemWriteEvent([this]{ processFarMemWriteEvent(); }, name()), + polManStats(*this) +{ + panic_if(orbMaxSize<8, "ORB maximum size must be at least 8.\n"); + + locMemPolicy = p.loc_mem_policy; + + locMem->setPolicyManager(this); + + unsigned numOfSets = dramCacheSize/(blockSize * assoc); + + for (int i = 0; i < numOfSets; i++) { + std::vector tempSet; + for (int j = 0; j < assoc; j++) { + ReplaceableEntry* tempWay = new ReplaceableEntry (-1, -1, false, false, -1); + tempWay->replacementData = replacementPolicy->instantiateEntry(); + tempSet.push_back(tempWay); + } + tagMetadataStore.push_back(tempSet); + } + DPRINTF(PolicyManager, "policy manager initialized\n"); +} + +Tick +PolicyManager::recvAtomic(PacketPtr pkt) +{ + DPRINTF(PolicyManager, "recvAtomic: %s %d %d %d\n", + pkt->cmdString(), pkt->getAddr(), pkt->getSize(), pkt->getBlockAddr(blockSize)); + + if (!getAddrRange().contains(pkt->getBlockAddr(blockSize))) { + panic("Can't handle address range for packet %s\n", pkt->print()); + } + + panic_if(pkt->cacheResponding(), "Should not see packets where cache " + "is responding"); + + panic_if(pkt->getSize()==0, "Packet size should not be 0.\n"); + + // do the actual memory access and turn the packet into a response + // access(pkt); + + handleRequestorPktAtomic(pkt); + + if (pkt->hasData()) { + // this value is not supposed to be accurate, just enough to + // keep things going, mimic a closed page + // also this latency can't be 0 + // panic("Can't handle this process --> implement accessLatency() " + // "according to your interface. pkt: %s\n", pkt->print()); + return accessLatency(); + } + + return 0; +} + +Tick +PolicyManager::recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor) +{ + DPRINTF(PolicyManager, "recvAtomicBackdoor: %s %d\n", + pkt->cmdString(), pkt->getAddr()); + Tick latency = recvAtomic(pkt); + getBackdoor(backdoor); + return latency; +} + +void +PolicyManager::recvFunctional(PacketPtr pkt) +{ + bool found; + + if (getAddrRange().contains(pkt->getAddr())) { + // rely on the abstract memory + functionalAccess(pkt); + found = true; + } else { + found = false; + } + + panic_if(!found, "Can't handle address range for packet %s\n", + pkt->print()); + + DPRINTF(PolicyManager, "recvFunctional: %s %d\n", + pkt->cmdString(), pkt->getAddr()); + +} + +Tick +PolicyManager::accessLatency() +{ + // THIS IS FOR DRAM ONLY! + // return (tRP + tRCD_RD + tRL); + return (locMem->get_tRP() + locMem->get_tRCD_RD() + locMem->get_tRL()); +} + +bool +PolicyManager::findInORB(Addr addr) +{ + bool found = false; + for (const auto& e : ORB) { + if (e.second->owPkt->getAddr() == addr) { + found = true; + } + } + + return found; +} + +unsigned +PolicyManager::findDupInORB(Addr addr) +{ + unsigned count=0; + for (const auto& e : ORB) { + if (e.second->owPkt->getAddr() == addr) { + + count++; + } + } + return count; +} + +void +PolicyManager::init() +{ + if (!port.isConnected()) { + fatal("Policy Manager %s is unconnected!\n", name()); + } else if (!locReqPort.isConnected()) { + fatal("Policy Manager %s is unconnected!\n", name()); + } else if (!farReqPort.isConnected()) { + fatal("Policy Manager %s is unconnected!\n", name()); + } else { + port.sendRangeChange(); + //reqPort.recvRangeChange(); + } +} + +bool +PolicyManager::recvTimingReq(PacketPtr pkt) +{ + if (bypassDcache) { + return farReqPort.sendTimingReq(pkt); + } + + // This is where we enter from the outside world + DPRINTF(PolicyManager, "recvTimingReq: request %s addr 0x%x-> %d size %d\n", + pkt->cmdString(), pkt->getAddr(), pkt->getAddr(), pkt->getSize()); + + panic_if(pkt->cacheResponding(), "Should not see packets where cache " + "is responding"); + + panic_if(!(pkt->isRead() || pkt->isWrite()), + "Should only see read and writes at memory controller\n"); + assert(pkt->getSize() != 0); + + // Calc avg gap between requests + if (prevArrival != 0) { + polManStats.totGap += curTick() - prevArrival; + } + prevArrival = curTick(); + + // Find out how many memory packets a pkt translates to + // If the burst size is equal or larger than the pkt size, then a pkt + // translates to only one memory packet. Otherwise, a pkt translates to + // multiple memory packets + + const Addr base_addr = pkt->getAddr(); + Addr addr = base_addr; + uint32_t burst_size = locBurstSize; + unsigned size = std::min((addr | (burst_size - 1)) + 1, + base_addr + pkt->getSize()) - addr; + + // check merging for writes + if (pkt->isWrite()) { + + // polManStats.writePktSize[ceilLog2(size)]++; + + bool merged = isInWriteQueue.find((addr & ~(Addr(locBurstSize - 1)))) != + isInWriteQueue.end(); + + bool mergedInLocMemFB = locMem->checkFwdMrgeInFB(pkt->getAddr()); + + assert(!(mergedInLocMemFB && merged)); + + if (merged) { + + polManStats.mergedWrBursts++; + polManStats.mergedWrPolManWB++; + + DPRINTF(PolicyManager, "merged in policy manager write back buffer: %lld\n", pkt->getAddr()); + + // farMemCtrl->accessInterface(pkt); + + // sendRespondToRequestor(pkt, frontendLatency); + accessAndRespond(pkt, frontendLatency); + return true; + } else if (mergedInLocMemFB) { + + polManStats.mergedWrBursts++; + polManStats.mergedWrLocMemFB++; + + DPRINTF(PolicyManager, "merged in DRAM cache flush buffer: %lld\n", pkt->getAddr()); + + // farMemCtrl->accessInterface(pkt); + + // sendRespondToRequestor(pkt, frontendLatency); + accessAndRespond(pkt, frontendLatency); + return true; + } + + } + + // check forwarding for reads + bool foundInORB = false; + bool foundInCRB = false; + bool foundInFarMemWrite = false; + bool foundInLocMemFB = false; + + if (pkt->isRead()) { + + if (isInWriteQueue.find(pkt->getAddr()) != isInWriteQueue.end()) { + + if (!ORB.empty()) { + for (const auto& e : ORB) { + + // check if the read is subsumed in the write queue + // packet we are looking at + if (e.second->validEntry && + e.second->owPkt->isWrite() && + e.second->owPkt->getAddr() <= addr && + ((addr + size) <= + (e.second->owPkt->getAddr() + + e.second->owPkt->getSize()))) { + + foundInORB = true; + + polManStats.servicedByWrQ++; + + polManStats.bytesReadWrQ += burst_size; + + break; + } + } + } + + if (!foundInORB && !CRB.empty()) { + for (const auto& e : CRB) { + + // check if the read is subsumed in the write queue + // packet we are looking at + if (e.second->isWrite() && + e.second->getAddr() <= addr && + ((addr + size) <= + (e.second->getAddr() + e.second->getSize()))) { + + foundInCRB = true; + + polManStats.servicedByWrQ++; + + polManStats.bytesReadWrQ += burst_size; + + break; + } + } + } + + if (!foundInORB && !foundInCRB && !pktFarMemWrite.empty()) { + for (const auto& e : pktFarMemWrite) { + // check if the read is subsumed in the write queue + // packet we are looking at + if (e.second->getAddr() <= addr && + ((addr + size) <= + (e.second->getAddr() + + e.second->getSize()))) { + + foundInFarMemWrite = true; + + polManStats.servicedByWrQ++; + + polManStats.bytesReadWrQ += burst_size; + + break; + } + } + } + } + + if (locMem->checkFwdMrgeInFB(pkt->getAddr())) { + // This is not faithful to the real hardware + // for transferring the FB data to the policy manager + // since the case is very rare. + foundInLocMemFB = true; + + polManStats.servicedByFB++; + + polManStats.bytesReadWrQ += burst_size; + } + + if (foundInORB || foundInCRB || foundInFarMemWrite || foundInLocMemFB) { + DPRINTF(PolicyManager, "FW: %lld\n", pkt->getAddr()); + + polManStats.readPktSize[ceilLog2(size)]++; + + // farMemCtrl->accessInterface(pkt); + + // sendRespondToRequestor(pkt, frontendLatency); + accessAndRespond(pkt, frontendLatency); + return true; + } + } + + // process conflicting requests. + // conflicts are checked only based on Index of DRAM cache + if (checkConflictInORB(pkt)) { + + polManStats.totNumConf++; + + if (CRB.size()>=crbMaxSize) { + + DPRINTF(PolicyManager, "CRBfull: %lld\n", pkt->getAddr()); + + polManStats.totNumCRBFull++; + + retryLLC = true; + + if (pkt->isRead()) { + polManStats.numRdRetry++; + } + else { + polManStats.numWrRetry++; + } + return false; + } + + CRB.push_back(std::make_pair(curTick(), pkt)); + DPRINTF(PolicyManager, "CRB PB: %d: %s\n", pkt->getAddr(), pkt->cmdString()); + + if (pkt->isWrite()) { + isInWriteQueue.insert(pkt->getAddr()); + } + + if (CRB.size() > maxConf) { + maxConf = CRB.size(); + polManStats.maxNumConf = CRB.size(); + } + return true; + } + // check if ORB or FMWB is full and set retry + if (pktFarMemWrite.size() >= (orbMaxSize / 2)) { + + DPRINTF(PolicyManager, "FMWBfull: %lld\n", pkt->getAddr()); + + retryLLCFarMemWr = true; + + if (pkt->isRead()) { + polManStats.numRdRetry++; + } + else { + polManStats.numWrRetry++; + } + return false; + } + + if (ORB.size() >= orbMaxSize) { + + DPRINTF(PolicyManager, "ORBfull: addr %lld\n", pkt->getAddr()); + + polManStats.totNumORBFull++; + + retryLLC = true; + + if (pkt->isRead()) { + polManStats.numRdRetry++; + } + else { + polManStats.numWrRetry++; + } + return false; + } + + // This should only happen in traffic generator tests. + if (findInORB(pkt->getAddr())) { + ORB.at(pkt->getAddr())->repetitiveReqRcvd = true; + retryLLCRepetitive = true; + return false; + } + + // if none of the above cases happens, + // add it to the ORB + handleRequestorPkt(pkt); + + if (pkt->isWrite()) { + isInWriteQueue.insert(pkt->getAddr()); + } + + setNextState(ORB.at(pkt->getAddr())); + + handleNextState(ORB.at(pkt->getAddr())); + + DPRINTF(PolicyManager, "Policy manager accepted packet 0x%x %d\n", pkt->getAddr(), pkt->getAddr()); + + return true; +} + +void +PolicyManager::processTagCheckEvent() +{ + // sanity check for the chosen packet + auto orbEntry = ORB.at(pktTagCheck.front()); + assert(orbEntry->pol == enums::Rambus || orbEntry->pol == enums::RambusTagProbOpt); + assert(orbEntry->validEntry); + assert(orbEntry->state == tagCheck); + assert(!orbEntry->issued); + + PacketPtr tagCheckPktPtr; + + if (orbEntry->owPkt->isRead()) { + tagCheckPktPtr = getPacket(pktTagCheck.front(), + blockSize, + MemCmd::ReadReq); + } else { + assert(orbEntry->owPkt->isWrite()); + tagCheckPktPtr = getPacket(pktTagCheck.front(), + blockSize, + MemCmd::WriteReq); + } + + tagCheckPktPtr->isTagCheck = true; + tagCheckPktPtr->isLocMem = true; + tagCheckPktPtr->owIsRead = orbEntry->owPkt->isRead(); + tagCheckPktPtr->isHit = orbEntry->isHit; + tagCheckPktPtr->isDirty = orbEntry->prevDirty; + assert(!tagCheckPktPtr->hasDirtyData); + tagCheckPktPtr->dirtyLineAddr = orbEntry->dirtyLineAddr; + + if (extreme) { + tagCheckPktPtr->isDirty = alwaysDirty; + tagCheckPktPtr->isHit = alwaysHit; + } + + if (tagCheckPktPtr->owIsRead && !tagCheckPktPtr->isHit) { + + if (!tagCheckPktPtr->isDirty) { + assert(tagCheckPktPtr->dirtyLineAddr == -1); + } else { + assert(tagCheckPktPtr->dirtyLineAddr != -1); + } + } + + if (locReqPort.sendTimingReq(tagCheckPktPtr)) { + DPRINTF(PolicyManager, "Tag check req sent for adr: %lld\n", tagCheckPktPtr->getAddr()); + orbEntry->state = waitingTCtag; + orbEntry->issued = true; + orbEntry->tagCheckIssued = curTick(); + pktTagCheck.pop_front(); + polManStats.sentTagCheckPort++; + } else { + DPRINTF(PolicyManager, "Sending tag check failed for adr: %lld\n", tagCheckPktPtr->getAddr()); + retryTagCheck = true; + delete tagCheckPktPtr; + polManStats.failedTagCheckPort++; + } + + if (!pktTagCheck.empty() && !tagCheckEvent.scheduled() && !retryTagCheck) { + schedule(tagCheckEvent, curTick()+1000); + } +} + +void +PolicyManager::processLocMemReadEvent() +{ + // sanity check for the chosen packet + auto orbEntry = ORB.at(pktLocMemRead.front()); + DPRINTF(PolicyManager, "loc mem read START : %lld--> %d, %d, %d, %d, %d, %d, %d:\n", orbEntry->owPkt->getAddr(), ORB.size(), pktLocMemRead.size(), + pktLocMemWrite.size(), pktFarMemRead.size(), pktFarMemWrite.size(), CRB.size(), orbEntry->state); + assert(orbEntry->validEntry); + assert(orbEntry->state == locMemRead); + assert(!orbEntry->issued); + + PacketPtr rdLocMemPkt = getPacket(pktLocMemRead.front(), + blockSize, + MemCmd::ReadReq); + rdLocMemPkt->isLocMem = true; + if (locReqPort.sendTimingReq(rdLocMemPkt)) { + DPRINTF(PolicyManager, "loc mem read is sent : %lld--> %d, %d, %d, %d, %d, %d\n", rdLocMemPkt->getAddr(), ORB.size(), pktLocMemRead.size(), + pktLocMemWrite.size(), pktFarMemRead.size(), pktFarMemWrite.size(), CRB.size()); + orbEntry->state = waitingLocMemReadResp; + orbEntry->issued = true; + orbEntry->locRdIssued = curTick(); + pktLocMemRead.pop_front(); + polManStats.sentLocRdPort++; + } else { + DPRINTF(PolicyManager, "loc mem read sending failed: %lld\n", rdLocMemPkt->getAddr()); + retryLocMemRead = true; + delete rdLocMemPkt; + polManStats.failedLocRdPort++; + } + + if (!pktLocMemRead.empty() && !locMemReadEvent.scheduled() && !retryLocMemRead) { + schedule(locMemReadEvent, curTick()+1000); + } +} + +void +PolicyManager::processLocMemWriteEvent() +{ + // sanity check for the chosen packet + auto orbEntry = ORB.at(pktLocMemWrite.front()); + DPRINTF(PolicyManager, "loc mem write START : %lld--> %d, %d, %d, %d, %d, %d, %d:\n", orbEntry->owPkt->getAddr(), ORB.size(), pktLocMemRead.size(), + pktLocMemWrite.size(), pktFarMemRead.size(), pktFarMemWrite.size(), CRB.size(), orbEntry->state); + assert(orbEntry->validEntry); + assert(orbEntry->state == locMemWrite); + assert(!orbEntry->issued); + + PacketPtr wrLocMemPkt = getPacket(pktLocMemWrite.front(), + blockSize, + MemCmd::WriteReq); + wrLocMemPkt->isLocMem = true; + assert(!wrLocMemPkt->isTagCheck); + + if (locReqPort.sendTimingReq(wrLocMemPkt)) { + DPRINTF(PolicyManager, "loc mem write is sent : %lld\n", wrLocMemPkt->getAddr()); + orbEntry->state = waitingLocMemWriteResp; + orbEntry->issued = true; + orbEntry->locWrIssued = curTick(); + pktLocMemWrite.pop_front(); + polManStats.sentLocWrPort++; + } else { + DPRINTF(PolicyManager, "loc mem write sending failed: %lld\n", wrLocMemPkt->getAddr()); + retryLocMemWrite = true; + delete wrLocMemPkt; + polManStats.failedLocWrPort++; + } + + if (!pktLocMemWrite.empty() && !locMemWriteEvent.scheduled() && !retryLocMemWrite) { + schedule(locMemWriteEvent, curTick()+1000); + } +} + +void +PolicyManager::processFarMemReadEvent() +{ + // sanity check for the chosen packet + auto orbEntry = ORB.at(pktFarMemRead.front()); + assert(orbEntry->validEntry); + assert(orbEntry->state == farMemRead); + assert(!orbEntry->issued); + + PacketPtr rdFarMemPkt = getPacket(pktFarMemRead.front(), + blockSize, + MemCmd::ReadReq); + + if (farReqPort.sendTimingReq(rdFarMemPkt)) { + DPRINTF(PolicyManager, "far mem read is sent : %lld\n", rdFarMemPkt->getAddr()); + orbEntry->state = waitingFarMemReadResp; + orbEntry->issued = true; + orbEntry->farRdIssued = curTick(); + pktFarMemRead.pop_front(); + polManStats.sentFarRdPort++; + } else { + DPRINTF(PolicyManager, "far mem read sending failed: %lld\n", rdFarMemPkt->getAddr()); + retryFarMemRead = true; + delete rdFarMemPkt; + polManStats.failedFarRdPort++; + } + + if (!pktFarMemRead.empty() && !farMemReadEvent.scheduled() && !retryFarMemRead) { + schedule(farMemReadEvent, curTick()+1000); + } +} + +void +PolicyManager::processFarMemWriteEvent() +{ + PacketPtr wrFarMemPkt = getPacket(pktFarMemWrite.front().second->getAddr(), + blockSize, + MemCmd::WriteReq); + DPRINTF(PolicyManager, "FarMemWriteEvent: request %s addr %#x\n", + wrFarMemPkt->cmdString(), wrFarMemPkt->getAddr()); + + if (farReqPort.sendTimingReq(wrFarMemPkt)) { + DPRINTF(PolicyManager, "far mem write is sent : %lld\n", wrFarMemPkt->getAddr()); + pktFarMemWrite.pop_front(); + polManStats.sentFarWrPort++; + } else { + DPRINTF(PolicyManager, "far mem write sending failed: %lld\n", wrFarMemPkt->getAddr()); + retryFarMemWrite = true; + delete wrFarMemPkt; + polManStats.failedFarWrPort++; + } + + if (!pktFarMemWrite.empty() && !farMemWriteEvent.scheduled() && !retryFarMemWrite) { + schedule(farMemWriteEvent, curTick()+1000); + } else { + if (drainState() == DrainState::Draining && pktFarMemWrite.empty() && + ORB.empty()) { + DPRINTF(Drain, "PolicyManager done draining in farMemWrite\n"); + signalDrainDone(); + } + } + + if (retryLLCFarMemWr && pktFarMemWrite.size()< (orbMaxSize / 2)) { + + DPRINTF(PolicyManager, "retryLLCFarMemWr sent\n"); + + retryLLCFarMemWr = false; + + port.sendRetryReq(); + } +} + +bool +PolicyManager::locMemRecvTimingResp(PacketPtr pkt) +{ + DPRINTF(PolicyManager, "locMemRecvTimingResp : %d: %s\n", pkt->getAddr(), pkt->cmdString()); + + // either read miss dirty data, + // or read miss clean FB data, + // or stall and send from FB + if ((locMemPolicy == enums::Rambus || locMemPolicy == enums::RambusTagProbOpt) + && !pkt->isTagCheck && pkt->hasDirtyData) { + DPRINTF(PolicyManager, "locMemRecvTimingResp: rd miss data async %d:\n", pkt->getAddr()); + assert(pkt->owIsRead); + assert(!pkt->isHit); + handleDirtyCacheLine(pkt->dirtyLineAddr); + if (pkt->isDirty && locMemPolicy == enums::RambusTagProbOpt) { + auto orbEntry = ORB.at(pkt->getAddr()); + assert(!orbEntry->rcvdLocRdResp); + orbEntry->rcvdLocRdResp = true; + if (orbEntry->rcvdLocRdResp && orbEntry->rcvdFarRdResp) { + orbEntry->state = locMemWrite; + orbEntry->locWrEntered = curTick(); + orbEntry->issued = false; + handleNextState(orbEntry); + } + } + delete pkt; + return true; + } + + if (!findInORB(pkt->getAddr())) { + std::cout << "!findInORB: " << pkt->getAddr() << " / " << pkt->cmdString() << "\n"; + std::cout << "+++++++++++++++++++++\n+++++++++++++++++++++\n+++++++++++++++++++++\n"; + } + + auto orbEntry = ORB.at(pkt->getAddr()); + + if(pkt->isTagCheck) { + + assert(orbEntry->pol == enums::Rambus || orbEntry->pol == enums::RambusTagProbOpt); + assert(orbEntry->state == waitingTCtag); + + if (pkt->hasDirtyData) { + assert(orbEntry->owPkt->isRead()); + assert(!orbEntry->isHit); + if (!orbEntry->prevDirty) { // rd miss clean with FB dirty data + assert(orbEntry->dirtyLineAddr == -1); + assert(!orbEntry->handleDirtyLine); + orbEntry->handleDirtyLine = true; + orbEntry->dirtyLineAddr = pkt->dirtyLineAddr; + } else { // dirty + assert(orbEntry->dirtyLineAddr != -1); + assert(orbEntry->handleDirtyLine); + } + } + + // Rd Miss Dirty + if (orbEntry->owPkt->isRead() && !orbEntry->isHit && orbEntry->prevDirty) { + if (locMemPolicy == enums::Rambus) { + // This assert is true only for Rambus policy. + // for RambusTagProbOpt it can be either true or false, + // since a Rd MD TC packet may or may not be probed + // and will carry a dirty flag or not. If it is probed, + // this flag will be set later! not when TC is sent! + assert(pkt->hasDirtyData); + } + assert(orbEntry->handleDirtyLine); + assert(orbEntry->dirtyLineAddr != -1); + } + + // if (!(orbEntry->owPkt->isRead() && orbEntry->isHit)) { + // orbEntry->tagCheckExit = curTick(); + // } + orbEntry->tagCheckExit = curTick(); + + if (orbEntry->owPkt->isRead() && orbEntry->isHit) { + orbEntry->state = waitingLocMemReadResp; + } + + } else { + if (pkt->isRead()) { + + if (orbEntry->pol == enums::CascadeLakeNoPartWrs || + orbEntry->pol == enums::Oracle || + orbEntry->pol == enums::BearWriteOpt) + { + assert(orbEntry->state == waitingLocMemReadResp); + if (orbEntry->handleDirtyLine) { + assert(!orbEntry->isHit); + handleDirtyCacheLine(orbEntry->dirtyLineAddr); + } + orbEntry->locRdExit = curTick(); + } + + if (orbEntry->pol == enums::Rambus || orbEntry->pol == enums::RambusTagProbOpt) { + assert(orbEntry->state == waitingLocMemReadResp); + assert(orbEntry->isHit); + assert(!pkt->hasDirtyData); + assert(orbEntry->dirtyLineAddr == -1); + orbEntry->locRdExit = curTick(); + orbEntry->state = locRdRespReady; + + // else { + // // just a null data, which resembles the bubble on the data bus for this case in Rambus-baseline + // assert(pkt->owIsRead && !pkt->isHit && !pkt->isDirty && !pkt->hasDirtyData); + // return true; + // } + } + + } + else { + assert(pkt->isWrite()); + + if (orbEntry->pol == enums::CascadeLakeNoPartWrs || + orbEntry->pol == enums::Oracle || + orbEntry->pol == enums::BearWriteOpt) + { + assert(orbEntry->state == waitingLocMemWriteResp); + orbEntry->locWrExit = curTick(); + } + + if (orbEntry->pol == enums::Rambus || orbEntry->pol == enums::RambusTagProbOpt) { + if (orbEntry->state == waitingLocMemWriteResp) { + assert(orbEntry->owPkt->isRead()); + assert(!orbEntry->isHit); + orbEntry->locWrExit = curTick(); + } + if (orbEntry->state == waitingTCtag) { + assert(orbEntry->owPkt->isWrite()); + orbEntry->tagCheckExit = curTick(); + } + } + + + } + } + + // IMPORTANT: + // orbEntry should not be used as the passed argument in setNextState and + // handleNextState functions, reason: it's possible that orbEntry may be + // deleted and updated, which will not be reflected here in the scope of + // current lines since it's been read at line #475. + setNextState(ORB.at(pkt->getAddr())); + + handleNextState(ORB.at(pkt->getAddr())); + + delete pkt; + + return true; +} + +bool +PolicyManager::farMemRecvTimingResp(PacketPtr pkt) +{ + if (bypassDcache) { + port.schedTimingResp(pkt, curTick()); + return true; + } + + DPRINTF(PolicyManager, "farMemRecvTimingResp : %lld , %s \n", pkt->getAddr(), pkt->cmdString()); + + if (pkt->isRead()) { + auto orbEntry = ORB.at(pkt->getAddr()); + + DPRINTF(PolicyManager, "farMemRecvTimingResp : continuing to far read resp: %d\n", + orbEntry->owPkt->isRead()); + + assert(orbEntry->state == waitingFarMemReadResp); + + if (locMemPolicy == enums::RambusTagProbOpt && + !orbEntry->isHit && orbEntry->prevDirty) { + assert(!orbEntry->rcvdFarRdResp); + orbEntry->rcvdFarRdResp = true; + } + + orbEntry->farRdExit = curTick(); + + // IMPORTANT: + // orbEntry should not be used as the passed argument in setNextState and + // handleNextState functions, reason: it's possible that orbEntry may be + // deleted and updated, which will not be reflected here in the scope of + // current lines since it's been read at line #508. + setNextState(ORB.at(pkt->getAddr())); + + // The next line is absolutely required since the orbEntry will + // be deleted and renewed within setNextState() + // orbEntry = ORB.at(pkt->getAddr()); + + handleNextState(ORB.at(pkt->getAddr())); + + delete pkt; + } + else { + assert(pkt->isWrite()); + delete pkt; + } + + return true; +} + +void +PolicyManager::locMemRecvReqRetry() +{ + // assert(retryLocMemRead || retryLocMemWrite); + DPRINTF(PolicyManager, "locMemRecvReqRetry start: %d, %d , %d \n", retryTagCheck, retryLocMemRead, retryLocMemWrite); + bool schedTC = false; + bool schedRd = false; + bool schedWr = false; + + if (retryTagCheck) { + + if (!tagCheckEvent.scheduled() && !pktTagCheck.empty()) { + assert(locMemPolicy == enums::Rambus || locMemPolicy == enums::RambusTagProbOpt); + schedule(tagCheckEvent, curTick()); + } + retryTagCheck = false; + schedTC = true; + } + + if (retryLocMemRead) { + + if (!locMemReadEvent.scheduled() && !pktLocMemRead.empty()) { + schedule(locMemReadEvent, curTick()); + } + retryLocMemRead = false; + schedRd = true; + } + + if (retryLocMemWrite) { + if (!locMemWriteEvent.scheduled() && !pktLocMemWrite.empty()) { + schedule(locMemWriteEvent, curTick()); + } + retryLocMemWrite = false; + schedWr = true; + } + if (!schedTC && !schedRd && !schedWr) { + // panic("Wrong local mem retry event happend.\n"); + + // TODO: there are cases where none of retryLocMemRead and retryLocMemWrite + // are true, yet locMemRecvReqRetry() is called. I should fix this later. + if ((locMemPolicy == enums::Rambus || locMemPolicy == enums::RambusTagProbOpt) && !tagCheckEvent.scheduled() && !pktTagCheck.empty()) { + schedule(tagCheckEvent, curTick()); + } + if (!locMemReadEvent.scheduled() && !pktLocMemRead.empty()) { + schedule(locMemReadEvent, curTick()); + } + if (!locMemWriteEvent.scheduled() && !pktLocMemWrite.empty()) { + schedule(locMemWriteEvent, curTick()); + } + } + + DPRINTF(PolicyManager, "locMemRecvReqRetry end: %d, %d , %d \n", schedTC, schedRd, schedWr); +} + +void +PolicyManager::farMemRecvReqRetry() +{ + if (bypassDcache) { + port.sendRetryReq(); + return; + } + + // assert(retryFarMemRead || retryFarMemWrite); + + bool schedRd = false; + bool schedWr = false; + + if (retryFarMemRead) { + if (!farMemReadEvent.scheduled() && !pktFarMemRead.empty()) { + schedule(farMemReadEvent, curTick()); + } + retryFarMemRead = false; + schedRd = true; + } + if (retryFarMemWrite) { + if (!farMemWriteEvent.scheduled() && !pktFarMemWrite.empty()) { + schedule(farMemWriteEvent, curTick()); + } + retryFarMemWrite = false; + schedWr = true; + } + // else { + // panic("Wrong far mem retry event happend.\n"); + // } + if (!schedRd && !schedWr) { + // panic("Wrong local mem retry event happend.\n"); + + // TODO: there are cases where none of retryFarMemRead and retryFarMemWrite + // are true, yet farMemRecvReqRetry() is called. I should fix this later. + if (!farMemReadEvent.scheduled() && !pktFarMemRead.empty()) { + schedule(farMemReadEvent, curTick()); + } + if (!farMemWriteEvent.scheduled() && !pktFarMemWrite.empty()) { + schedule(farMemWriteEvent, curTick()); + } + } + + DPRINTF(PolicyManager, "farMemRecvReqRetry: %d , %d \n", schedRd, schedWr); +} + +void +PolicyManager::setNextState(reqBufferEntry* orbEntry) +{ + orbEntry->issued = false; + enums::Policy pol = orbEntry->pol; + reqState state = orbEntry->state; + bool isRead = orbEntry->owPkt->isRead(); + bool isHit = orbEntry->isHit; + //This must be checked for the first 3 policies, later. + // bool isDirty = checkDirty(orbEntry->owPkt->getAddr()); + bool isDirty = orbEntry->prevDirty; + + /////////////////////////////////////////////////////////////////////////////////////// + // CascadeLakeNoPartWrs + + // start --> read tag + if (orbEntry->pol == enums::CascadeLakeNoPartWrs && + orbEntry->state == start) { + orbEntry->state = locMemRead; + orbEntry->locRdEntered = curTick(); + return; + } + + // tag ready && read && hit --> DONE + if (orbEntry->pol == enums::CascadeLakeNoPartWrs && + orbEntry->owPkt->isRead() && + orbEntry->state == waitingLocMemReadResp && + orbEntry->isHit) { + // done + // do nothing + return; + } + + // tag ready && write --> loc write + if (orbEntry->pol == enums::CascadeLakeNoPartWrs && + orbEntry->owPkt->isWrite() && + orbEntry->state == waitingLocMemReadResp) { + // write it to the DRAM cache + orbEntry->state = locMemWrite; + orbEntry->locWrEntered = curTick(); + return; + } + + // loc read resp ready && read && miss --> far read + if (orbEntry->pol == enums::CascadeLakeNoPartWrs && + orbEntry->owPkt->isRead() && + orbEntry->state == waitingLocMemReadResp && + !orbEntry->isHit) { + + orbEntry->state = farMemRead; + orbEntry->farRdEntered = curTick(); + return; + } + + // far read resp ready && read && miss --> loc write + if (orbEntry->pol == enums::CascadeLakeNoPartWrs && + orbEntry->owPkt->isRead() && + orbEntry->state == waitingFarMemReadResp && + !orbEntry->isHit) { + + PacketPtr copyOwPkt = new Packet(orbEntry->owPkt, + false, + orbEntry->owPkt->isRead()); + + accessAndRespond(orbEntry->owPkt, + frontendLatency + backendLatency + backendLatency); + + ORB.at(copyOwPkt->getAddr()) = new reqBufferEntry( + orbEntry->validEntry, + orbEntry->arrivalTick, + orbEntry->tagDC, + orbEntry->indexDC, + orbEntry->wayNum, + copyOwPkt, + orbEntry->pol, + orbEntry->state, + orbEntry->issued, + orbEntry->isHit, + orbEntry->conflict, + orbEntry->prevDirty, + orbEntry->rcvdLocRdResp, + orbEntry->rcvdFarRdResp, + orbEntry->dirtyLineAddr, + orbEntry->handleDirtyLine, + orbEntry->tagCheckEntered, + orbEntry->tagCheckIssued, + orbEntry->tagCheckExit, + orbEntry->locRdEntered, + orbEntry->locRdIssued, + orbEntry->locRdExit, + orbEntry->locWrEntered, + orbEntry->locWrIssued, + orbEntry->locWrExit, + orbEntry->farRdEntered, + orbEntry->farRdIssued, + orbEntry->farRdExit); + delete orbEntry; + + orbEntry = ORB.at(copyOwPkt->getAddr()); + + polManStats.totPktRespTime += ((curTick() - orbEntry->arrivalTick)/1000); + polManStats.totPktRespTimeRd += ((curTick() - orbEntry->arrivalTick)/1000); + + orbEntry->state = locMemWrite; + + orbEntry->locWrEntered = curTick(); + + return; + } + + // loc write received + if (orbEntry->pol == enums::CascadeLakeNoPartWrs && + // orbEntry->owPkt->isRead() && + // !orbEntry->isHit && + orbEntry->state == waitingLocMemWriteResp) { + // done + // do nothing + return; + } + + //////////////////////////////////////////////////////////////////////// + /// Oracle + + // RD Hit Dirty & Clean, RD Miss Dirty, WR Miss Dirty + // start --> read loc + if (pol == enums::Oracle && state == start && + ((isRead && isHit) || (isRead && !isHit && isDirty) || (!isRead && !isHit && isDirty)) + ) { + orbEntry->state = locMemRead; + orbEntry->locRdEntered = curTick(); + return; + } + // RD Miss Clean + // start --> read far + if (pol == enums::Oracle && state == start && + (isRead && !isHit && !isDirty) + ) { + orbEntry->state = farMemRead; + orbEntry->farRdEntered = curTick(); + return; + } + // WR Hit Dirty & Clean, WR Miss Clean + // start --> write loc + if (pol == enums::Oracle && state == start && + ((!isRead && isHit)|| (!isRead && !isHit && !isDirty)) + ) { + orbEntry->state = locMemWrite; + orbEntry->locWrEntered = curTick(); + return; + } + + // RD Hit Dirty & Clean + // start --> read loc --> done + if (pol == enums::Oracle && + isRead && isHit && + state == waitingLocMemReadResp ) { + // done + // do nothing + return; + } + + // RD Miss Dirty: + // start --> read loc --> read far + if (pol == enums::Oracle && + isRead && !isHit && isDirty && + state == waitingLocMemReadResp ) { + orbEntry->state = farMemRead; + orbEntry->farRdEntered = curTick(); + return; + } + + // WR Miss Dirty: + // start --> read loc --> loc write + if (pol == enums::Oracle && + !isRead && !isHit && isDirty && + state == waitingLocMemReadResp) { + // write it to the DRAM cache + orbEntry->state = locMemWrite; + orbEntry->locWrEntered = curTick(); + return; + } + + // RD Miss Clean & Dirty + // start --> ... --> far read -> loc write + if (pol == enums::Oracle && + (isRead && !isHit) && + state == waitingFarMemReadResp + ) { + PacketPtr copyOwPkt = new Packet(orbEntry->owPkt, + false, + orbEntry->owPkt->isRead()); + + accessAndRespond(orbEntry->owPkt, + frontendLatency + backendLatency + backendLatency); + + ORB.at(copyOwPkt->getAddr()) = new reqBufferEntry( + orbEntry->validEntry, + orbEntry->arrivalTick, + orbEntry->tagDC, + orbEntry->indexDC, + orbEntry->wayNum, + copyOwPkt, + orbEntry->pol, + orbEntry->state, + orbEntry->issued, + orbEntry->isHit, + orbEntry->conflict, + orbEntry->prevDirty, + orbEntry->rcvdLocRdResp, + orbEntry->rcvdFarRdResp, + orbEntry->dirtyLineAddr, + orbEntry->handleDirtyLine, + orbEntry->tagCheckEntered, + orbEntry->tagCheckIssued, + orbEntry->tagCheckExit, + orbEntry->locRdEntered, + orbEntry->locRdIssued, + orbEntry->locRdExit, + orbEntry->locWrEntered, + orbEntry->locWrIssued, + orbEntry->locWrExit, + orbEntry->farRdEntered, + orbEntry->farRdIssued, + orbEntry->farRdExit); + delete orbEntry; + + orbEntry = ORB.at(copyOwPkt->getAddr()); + + polManStats.totPktRespTime += ((curTick() - orbEntry->arrivalTick)/1000); + polManStats.totPktRespTimeRd += ((curTick() - orbEntry->arrivalTick)/1000); + + orbEntry->state = locMemWrite; + + orbEntry->locWrEntered = curTick(); + + return; + } + + // loc write received + if (pol == enums::Oracle && + state == waitingLocMemWriteResp) { + assert (!(isRead && isHit)); + // done + // do nothing + return; + } + + //////////////////////////////////////////////////////////////////////// + // BEAR Write optimized + if (orbEntry->pol == enums::BearWriteOpt && + orbEntry->state == start && !(orbEntry->owPkt->isWrite() && orbEntry->isHit)) { + orbEntry->state = locMemRead; + orbEntry->locRdEntered = curTick(); + DPRINTF(PolicyManager, "set: start -> locMemRead : %d\n", orbEntry->owPkt->getAddr()); + return; + } + + if (orbEntry->pol == enums::BearWriteOpt && + orbEntry->state == start && orbEntry->owPkt->isWrite() && orbEntry->isHit) { + orbEntry->state = locMemWrite; + orbEntry->locRdEntered = curTick(); + DPRINTF(PolicyManager, "set: start -> locMemWrite : %d\n", orbEntry->owPkt->getAddr()); + return; + } + + // tag ready && read && hit --> DONE + if (orbEntry->pol == enums::BearWriteOpt && + orbEntry->owPkt->isRead() && + orbEntry->state == waitingLocMemReadResp && + orbEntry->isHit) { + DPRINTF(PolicyManager, "set: waitingLocMemReadResp -> NONE : %d\n", orbEntry->owPkt->getAddr()); + + // done + // do nothing + return; + } + + // tag ready && write --> loc write + if (orbEntry->pol == enums::BearWriteOpt && + orbEntry->owPkt->isWrite() && + orbEntry->state == waitingLocMemReadResp) { + assert(!orbEntry->isHit); + // write it to the DRAM cache + orbEntry->state = locMemWrite; + orbEntry->locWrEntered = curTick(); + DPRINTF(PolicyManager, "set: waitingLocMemReadResp -> locMemWrite : %d\n", orbEntry->owPkt->getAddr()); + return; + } + + // loc read resp ready && read && miss --> far read + if (orbEntry->pol == enums::BearWriteOpt && + orbEntry->owPkt->isRead() && + orbEntry->state == waitingLocMemReadResp && + !orbEntry->isHit) { + + orbEntry->state = farMemRead; + orbEntry->farRdEntered = curTick(); + DPRINTF(PolicyManager, "set: waitingLocMemReadResp -> farMemRead : %d\n", orbEntry->owPkt->getAddr()); + return; + } + + // far read resp ready && read && miss --> loc write + if (orbEntry->pol == enums::BearWriteOpt && + orbEntry->owPkt->isRead() && + orbEntry->state == waitingFarMemReadResp && + !orbEntry->isHit) { + + PacketPtr copyOwPkt = new Packet(orbEntry->owPkt, + false, + orbEntry->owPkt->isRead()); + + accessAndRespond(orbEntry->owPkt, + frontendLatency + backendLatency + backendLatency); + + ORB.at(copyOwPkt->getAddr()) = new reqBufferEntry( + orbEntry->validEntry, + orbEntry->arrivalTick, + orbEntry->tagDC, + orbEntry->indexDC, + orbEntry->wayNum, + copyOwPkt, + orbEntry->pol, + orbEntry->state, + orbEntry->issued, + orbEntry->isHit, + orbEntry->conflict, + orbEntry->prevDirty, + orbEntry->rcvdLocRdResp, + orbEntry->rcvdFarRdResp, + orbEntry->dirtyLineAddr, + orbEntry->handleDirtyLine, + orbEntry->tagCheckEntered, + orbEntry->tagCheckIssued, + orbEntry->tagCheckExit, + orbEntry->locRdEntered, + orbEntry->locRdIssued, + orbEntry->locRdExit, + orbEntry->locWrEntered, + orbEntry->locWrIssued, + orbEntry->locWrExit, + orbEntry->farRdEntered, + orbEntry->farRdIssued, + orbEntry->farRdExit); + delete orbEntry; + + orbEntry = ORB.at(copyOwPkt->getAddr()); + + polManStats.totPktRespTime += ((curTick() - orbEntry->arrivalTick)/1000); + polManStats.totPktRespTimeRd += ((curTick() - orbEntry->arrivalTick)/1000); + + orbEntry->state = locMemWrite; + + orbEntry->locWrEntered = curTick(); + + DPRINTF(PolicyManager, "set: waitingFarMemReadResp -> locMemWrite : %d\n", orbEntry->owPkt->getAddr()); + + return; + } + + // loc write received + if (orbEntry->pol == enums::BearWriteOpt && + // orbEntry->owPkt->isRead() && + // !orbEntry->isHit && + orbEntry->state == waitingLocMemWriteResp) { + DPRINTF(PolicyManager, "set: waitingLocMemWriteResp -> NONE : %d\n", orbEntry->owPkt->getAddr()); + + // done + // do nothing + + return; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Rambus + // start --> read tag + if (orbEntry->pol == enums::Rambus && + orbEntry->state == start) { + orbEntry->state = tagCheck; + orbEntry->tagCheckEntered = curTick(); + return; + } + + // tag ready + // read && hit --> wait for data + if (orbEntry->pol == enums::Rambus && + orbEntry->state == waitingTCtag && + orbEntry->owPkt->isRead() && orbEntry->isHit) { + // orbEntry->state = waitingLocMemReadResp; + // do nothing + return; + } + + // tag ready + // read && miss --> don't wait for dirty data (MC with FB>0/MD), transition to far read + if (orbEntry->pol == enums::Rambus && + orbEntry->state == waitingTCtag && + orbEntry->owPkt->isRead() && + !orbEntry->isHit) { + orbEntry->state = farMemRead; + orbEntry->farRdEntered = curTick(); + return; + } + + // tag ready + // write --> done + if (orbEntry->pol == enums::Rambus && + orbEntry->state == waitingTCtag && + orbEntry->owPkt->isWrite()) { + // done, do nothing and return; + return; + } + + // tag ready && read && hit --> DONE + if (orbEntry->pol == enums::Rambus && + orbEntry->state == waitingLocMemReadResp) { + assert(orbEntry->isHit); + assert(orbEntry->owPkt->isRead()); + // done + // do nothing + return; + } + + // tag ready && read && hit --> DONE + if (orbEntry->pol == enums::Rambus && + orbEntry->state == locRdRespReady) { + assert(orbEntry->isHit); + assert(orbEntry->owPkt->isRead()); + // done + // do nothing + return; + } + + // far read resp ready && read && miss --> loc write + if (orbEntry->pol == enums::Rambus && + orbEntry->state == waitingFarMemReadResp) { + + assert(orbEntry->owPkt->isRead()); + assert(!orbEntry->isHit); + + PacketPtr copyOwPkt = new Packet(orbEntry->owPkt, + false, + orbEntry->owPkt->isRead()); + + accessAndRespond(orbEntry->owPkt, + frontendLatency + backendLatency + backendLatency); + + ORB.at(copyOwPkt->getAddr()) = new reqBufferEntry( + orbEntry->validEntry, + orbEntry->arrivalTick, + orbEntry->tagDC, + orbEntry->indexDC, + orbEntry->wayNum, + copyOwPkt, + orbEntry->pol, + orbEntry->state, + orbEntry->issued, + orbEntry->isHit, + orbEntry->conflict, + orbEntry->prevDirty, + orbEntry->rcvdLocRdResp, + orbEntry->rcvdFarRdResp, + orbEntry->dirtyLineAddr, + orbEntry->handleDirtyLine, + orbEntry->tagCheckEntered, + orbEntry->tagCheckIssued, + orbEntry->tagCheckExit, + orbEntry->locRdEntered, + orbEntry->locRdIssued, + orbEntry->locRdExit, + orbEntry->locWrEntered, + orbEntry->locWrIssued, + orbEntry->locWrExit, + orbEntry->farRdEntered, + orbEntry->farRdIssued, + orbEntry->farRdExit); + delete orbEntry; + + orbEntry = ORB.at(copyOwPkt->getAddr()); + + polManStats.totPktRespTime += ((curTick() - orbEntry->arrivalTick)/1000); + polManStats.totPktRespTimeRd += ((curTick() - orbEntry->arrivalTick)/1000); + + orbEntry->state = locMemWrite; + + orbEntry->locWrEntered = curTick(); + + return; + } + + // loc write received + if (orbEntry->pol == enums::Rambus && + orbEntry->state == waitingLocMemWriteResp) { + assert(orbEntry->owPkt->isRead()); + assert(!orbEntry->isHit); + // done + // do nothing + return; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // RambusTagProbOpt + // start --> read tag + if (orbEntry->pol == enums::RambusTagProbOpt && + orbEntry->state == start) { + orbEntry->state = tagCheck; + orbEntry->tagCheckEntered = curTick(); + return; + } + + // tag ready + // read && hit --> wait for data + if (orbEntry->pol == enums::RambusTagProbOpt && + orbEntry->state == waitingTCtag && + orbEntry->owPkt->isRead() && + orbEntry->isHit) { + // do nothing + return; + } + + // tag ready + // read && miss --> don't wait for dirty data (MC with FB>0/MD), transition to far read + if (orbEntry->pol == enums::RambusTagProbOpt && + orbEntry->state == waitingTCtag && + orbEntry->owPkt->isRead() && + !orbEntry->isHit) { + orbEntry->state = farMemRead; + orbEntry->farRdEntered = curTick(); + return; + } + + // tag ready + // write --> done + if (orbEntry->pol == enums::RambusTagProbOpt && + orbEntry->state == waitingTCtag && + orbEntry->owPkt->isWrite()) { + // done, do nothing and return; + return; + } + + // tag ready && read && hit --> DONE + if (orbEntry->pol == enums::RambusTagProbOpt && + orbEntry->state == waitingLocMemReadResp) { + assert(orbEntry->isHit); + assert(orbEntry->owPkt->isRead()); + // done + // do nothing + return; + } + + // tag ready && read && hit --> DONE + if (orbEntry->pol == enums::RambusTagProbOpt && + orbEntry->state == locRdRespReady) { + assert(orbEntry->isHit); + assert(orbEntry->owPkt->isRead()); + // done + // do nothing + return; + } + + // far read resp ready && read && miss --> loc write + if (orbEntry->pol == enums::RambusTagProbOpt && + orbEntry->state == waitingFarMemReadResp) { + + assert(orbEntry->owPkt->isRead()); + assert(!orbEntry->isHit); + + PacketPtr copyOwPkt = new Packet(orbEntry->owPkt, + false, + orbEntry->owPkt->isRead()); + + accessAndRespond(orbEntry->owPkt, + frontendLatency + backendLatency + backendLatency); + ORB.at(copyOwPkt->getAddr()) = new reqBufferEntry( + orbEntry->validEntry, + orbEntry->arrivalTick, + orbEntry->tagDC, + orbEntry->indexDC, + orbEntry->wayNum, + copyOwPkt, + orbEntry->pol, + orbEntry->state, + orbEntry->issued, + orbEntry->isHit, + orbEntry->conflict, + orbEntry->prevDirty, + orbEntry->rcvdLocRdResp, + orbEntry->rcvdFarRdResp, + orbEntry->dirtyLineAddr, + orbEntry->handleDirtyLine, + orbEntry->tagCheckEntered, + orbEntry->tagCheckIssued, + orbEntry->tagCheckExit, + orbEntry->locRdEntered, + orbEntry->locRdIssued, + orbEntry->locRdExit, + orbEntry->locWrEntered, + orbEntry->locWrIssued, + orbEntry->locWrExit, + orbEntry->farRdEntered, + orbEntry->farRdIssued, + orbEntry->farRdExit); + delete orbEntry; + + orbEntry = ORB.at(copyOwPkt->getAddr()); + + polManStats.totPktRespTime += ((curTick() - orbEntry->arrivalTick)/1000); + polManStats.totPktRespTimeRd += ((curTick() - orbEntry->arrivalTick)/1000); + + if (orbEntry->prevDirty && orbEntry->rcvdLocRdResp && orbEntry->rcvdFarRdResp) { + orbEntry->state = locMemWrite; + orbEntry->locWrEntered = curTick(); + } else if (!orbEntry->prevDirty) { + orbEntry->state = locMemWrite; + orbEntry->locWrEntered = curTick(); + } + + return; + } + + // loc write received + if (orbEntry->pol == enums::RambusTagProbOpt && + orbEntry->state == waitingLocMemWriteResp) { + assert(orbEntry->owPkt->isRead()); + assert(!orbEntry->isHit); + // done + // do nothing + return; + } +} + +void +PolicyManager::handleNextState(reqBufferEntry* orbEntry) +{ + //////////////////////////////////////////////////////////////////////// + // CascadeLakeNoPartWrs + + if (orbEntry->pol == enums::CascadeLakeNoPartWrs && + orbEntry->state == locMemRead) { + + // assert(!pktLocMemRead.empty()); + + pktLocMemRead.push_back(orbEntry->owPkt->getAddr()); + + polManStats.avgLocRdQLenEnq = pktLocMemRead.size(); + + if (!locMemReadEvent.scheduled() && !retryLocMemRead) { + schedule(locMemReadEvent, curTick()); + } + return; + } + + if (orbEntry->pol == enums::CascadeLakeNoPartWrs && + orbEntry->owPkt->isRead() && + orbEntry->state == waitingLocMemReadResp && + orbEntry->isHit) { + // DONE + // send the respond to the requestor + + PacketPtr copyOwPkt = new Packet(orbEntry->owPkt, + false, + orbEntry->owPkt->isRead()); + + accessAndRespond(orbEntry->owPkt, + frontendLatency + backendLatency); + + ORB.at(copyOwPkt->getAddr()) = new reqBufferEntry( + orbEntry->validEntry, + orbEntry->arrivalTick, + orbEntry->tagDC, + orbEntry->indexDC, + orbEntry->wayNum, + copyOwPkt, + orbEntry->pol, + orbEntry->state, + orbEntry->issued, + orbEntry->isHit, + orbEntry->conflict, + orbEntry->prevDirty, + orbEntry->rcvdLocRdResp, + orbEntry->rcvdFarRdResp, + orbEntry->dirtyLineAddr, + orbEntry->handleDirtyLine, + orbEntry->tagCheckEntered, + orbEntry->tagCheckIssued, + orbEntry->tagCheckExit, + orbEntry->locRdEntered, + orbEntry->locRdIssued, + orbEntry->locRdExit, + orbEntry->locWrEntered, + orbEntry->locWrIssued, + orbEntry->locWrExit, + orbEntry->farRdEntered, + orbEntry->farRdIssued, + orbEntry->farRdExit); + delete orbEntry; + + orbEntry = ORB.at(copyOwPkt->getAddr()); + + polManStats.totPktRespTime += ((curTick() - orbEntry->arrivalTick)/1000); + polManStats.totPktRespTimeRd += ((curTick() - orbEntry->arrivalTick)/1000); + + // clear ORB + resumeConflictingReq(orbEntry); + + return; + } + + if (orbEntry->pol == enums::CascadeLakeNoPartWrs && + orbEntry->owPkt->isRead() && + orbEntry->state == farMemRead) { + + assert(!orbEntry->isHit); + + // do a read from far mem + pktFarMemRead.push_back(orbEntry->owPkt->getAddr()); + + polManStats.avgFarRdQLenEnq = pktFarMemRead.size(); + + if (!farMemReadEvent.scheduled() && !retryFarMemRead) { + schedule(farMemReadEvent, curTick()); + } + return; + + } + + if (orbEntry->pol == enums::CascadeLakeNoPartWrs && + orbEntry->state == locMemWrite) { + + if (orbEntry->owPkt->isRead()) { + assert(!orbEntry->isHit); + } + + // do a read from far mem + pktLocMemWrite.push_back(orbEntry->owPkt->getAddr()); + + polManStats.avgLocWrQLenEnq = pktLocMemWrite.size(); + + + if (!locMemWriteEvent.scheduled() && !retryLocMemWrite) { + schedule(locMemWriteEvent, curTick()); + } + return; + + } + + if (orbEntry->pol == enums::CascadeLakeNoPartWrs && + // orbEntry->owPkt->isRead() && + // !orbEntry->isHit && + orbEntry->state == waitingLocMemWriteResp) { + // DONE + // clear ORB + resumeConflictingReq(orbEntry); + + return; + } + + //////////////////////////////////////////////////////////////////////// + // Oracle + if (orbEntry->pol == enums::Oracle && + orbEntry->state == locMemRead) { + + pktLocMemRead.push_back(orbEntry->owPkt->getAddr()); + + polManStats.avgLocRdQLenEnq = pktLocMemRead.size(); + + if (!locMemReadEvent.scheduled() && !retryLocMemRead) { + schedule(locMemReadEvent, curTick()); + } + return; + } + + if (orbEntry->pol == enums::Oracle && + orbEntry->owPkt->isRead() && + orbEntry->state == waitingLocMemReadResp && + orbEntry->isHit) { + // DONE + // send the respond to the requestor + + PacketPtr copyOwPkt = new Packet(orbEntry->owPkt, + false, + orbEntry->owPkt->isRead()); + + accessAndRespond(orbEntry->owPkt, + frontendLatency + backendLatency); + + ORB.at(copyOwPkt->getAddr()) = new reqBufferEntry( + orbEntry->validEntry, + orbEntry->arrivalTick, + orbEntry->tagDC, + orbEntry->indexDC, + orbEntry->wayNum, + copyOwPkt, + orbEntry->pol, + orbEntry->state, + orbEntry->issued, + orbEntry->isHit, + orbEntry->conflict, + orbEntry->prevDirty, + orbEntry->rcvdLocRdResp, + orbEntry->rcvdFarRdResp, + orbEntry->dirtyLineAddr, + orbEntry->handleDirtyLine, + orbEntry->tagCheckEntered, + orbEntry->tagCheckIssued, + orbEntry->tagCheckExit, + orbEntry->locRdEntered, + orbEntry->locRdIssued, + orbEntry->locRdExit, + orbEntry->locWrEntered, + orbEntry->locWrIssued, + orbEntry->locWrExit, + orbEntry->farRdEntered, + orbEntry->farRdIssued, + orbEntry->farRdExit); + delete orbEntry; + + orbEntry = ORB.at(copyOwPkt->getAddr()); + + polManStats.totPktRespTime += ((curTick() - orbEntry->arrivalTick)/1000); + polManStats.totPktRespTimeRd += ((curTick() - orbEntry->arrivalTick)/1000); + + // clear ORB + resumeConflictingReq(orbEntry); + + return; + } + + if (orbEntry->pol == enums::Oracle && + orbEntry->state == farMemRead) { + + assert(orbEntry->owPkt->isRead() && !orbEntry->isHit); + + // do a read from far mem + pktFarMemRead.push_back(orbEntry->owPkt->getAddr()); + + polManStats.avgFarRdQLenEnq = pktFarMemRead.size(); + + if (!farMemReadEvent.scheduled() && !retryFarMemRead) { + schedule(farMemReadEvent, curTick()); + } + return; + + } + + if (orbEntry->pol == enums::Oracle && + orbEntry->state == locMemWrite) { + + if (orbEntry->owPkt->isRead()) { + assert(!orbEntry->isHit); + } + + // do a read from far mem + pktLocMemWrite.push_back(orbEntry->owPkt->getAddr()); + + polManStats.avgLocWrQLenEnq = pktLocMemWrite.size(); + + + if (!locMemWriteEvent.scheduled() && !retryLocMemWrite) { + schedule(locMemWriteEvent, curTick()); + } + return; + + } + + if (orbEntry->pol == enums::Oracle && + orbEntry->state == waitingLocMemWriteResp) { + // DONE + // clear ORB + resumeConflictingReq(orbEntry); + + return; + } + + //////////////////////////////////////////////////////////////////////// + // BEAR Write Optmized + if (orbEntry->pol == enums::BearWriteOpt && + orbEntry->state == locMemRead) { + + pktLocMemRead.push_back(orbEntry->owPkt->getAddr()); + + polManStats.avgLocRdQLenEnq = pktLocMemRead.size(); + + if (!locMemReadEvent.scheduled() && !retryLocMemRead) { + schedule(locMemReadEvent, curTick()); + } + return; + } + + if (orbEntry->pol == enums::BearWriteOpt && + orbEntry->owPkt->isRead() && + orbEntry->state == waitingLocMemReadResp && + orbEntry->isHit) { + // DONE + // send the respond to the requestor + + PacketPtr copyOwPkt = new Packet(orbEntry->owPkt, + false, + orbEntry->owPkt->isRead()); + + accessAndRespond(orbEntry->owPkt, + frontendLatency + backendLatency); + + ORB.at(copyOwPkt->getAddr()) = new reqBufferEntry( + orbEntry->validEntry, + orbEntry->arrivalTick, + orbEntry->tagDC, + orbEntry->indexDC, + orbEntry->wayNum, + copyOwPkt, + orbEntry->pol, + orbEntry->state, + orbEntry->issued, + orbEntry->isHit, + orbEntry->conflict, + orbEntry->prevDirty, + orbEntry->rcvdLocRdResp, + orbEntry->rcvdFarRdResp, + orbEntry->dirtyLineAddr, + orbEntry->handleDirtyLine, + orbEntry->tagCheckEntered, + orbEntry->tagCheckIssued, + orbEntry->tagCheckExit, + orbEntry->locRdEntered, + orbEntry->locRdIssued, + orbEntry->locRdExit, + orbEntry->locWrEntered, + orbEntry->locWrIssued, + orbEntry->locWrExit, + orbEntry->farRdEntered, + orbEntry->farRdIssued, + orbEntry->farRdExit); + delete orbEntry; + + orbEntry = ORB.at(copyOwPkt->getAddr()); + + polManStats.totPktRespTime += ((curTick() - orbEntry->arrivalTick)/1000); + polManStats.totPktRespTimeRd += ((curTick() - orbEntry->arrivalTick)/1000); + + // clear ORB + resumeConflictingReq(orbEntry); + + return; + } + + if (orbEntry->pol == enums::BearWriteOpt && + orbEntry->owPkt->isRead() && + orbEntry->state == farMemRead) { + + assert(!orbEntry->isHit); + + // do a read from far mem + pktFarMemRead.push_back(orbEntry->owPkt->getAddr()); + + polManStats.avgFarRdQLenEnq = pktFarMemRead.size(); + + if (!farMemReadEvent.scheduled() && !retryFarMemRead) { + schedule(farMemReadEvent, curTick()); + } + return; + + } + + if (orbEntry->pol == enums::BearWriteOpt && + orbEntry->state == locMemWrite) { + + if (orbEntry->owPkt->isRead()) { + assert(!orbEntry->isHit); + } + + // do a read from far mem + pktLocMemWrite.push_back(orbEntry->owPkt->getAddr()); + + polManStats.avgLocWrQLenEnq = pktLocMemWrite.size(); + + + if (!locMemWriteEvent.scheduled() && !retryLocMemWrite) { + schedule(locMemWriteEvent, curTick()); + } + return; + + } + + if (orbEntry->pol == enums::BearWriteOpt && + // orbEntry->owPkt->isRead() && + // !orbEntry->isHit && + orbEntry->state == waitingLocMemWriteResp) { + // DONE + // clear ORB + resumeConflictingReq(orbEntry); + + return; + } + + //////////////////////////////////////////////////////////////////////// + // Rambus + + if (orbEntry->pol == enums::Rambus && + orbEntry->state == tagCheck) { + + pktTagCheck.push_back(orbEntry->owPkt->getAddr()); + + polManStats.avgTagCheckQLenEnq = pktTagCheck.size(); + + if (!tagCheckEvent.scheduled() && !retryTagCheck) { + schedule(tagCheckEvent, curTick()); + } + return; + } + + if (orbEntry->pol == enums::Rambus && + orbEntry->state == waitingLocMemReadResp) { + return; + } + + if (orbEntry->pol == enums::Rambus && + orbEntry->state == locRdRespReady) { + assert(orbEntry->owPkt->isRead()); + assert(orbEntry->isHit); + // DONE + // send the respond to the requestor + + PacketPtr copyOwPkt = new Packet(orbEntry->owPkt, + false, + orbEntry->owPkt->isRead()); + + accessAndRespond(orbEntry->owPkt, + frontendLatency + backendLatency); + + ORB.at(copyOwPkt->getAddr()) = new reqBufferEntry( + orbEntry->validEntry, + orbEntry->arrivalTick, + orbEntry->tagDC, + orbEntry->indexDC, + orbEntry->wayNum, + copyOwPkt, + orbEntry->pol, + orbEntry->state, + orbEntry->issued, + orbEntry->isHit, + orbEntry->conflict, + orbEntry->prevDirty, + orbEntry->rcvdLocRdResp, + orbEntry->rcvdFarRdResp, + orbEntry->dirtyLineAddr, + orbEntry->handleDirtyLine, + orbEntry->tagCheckEntered, + orbEntry->tagCheckIssued, + orbEntry->tagCheckExit, + orbEntry->locRdEntered, + orbEntry->locRdIssued, + orbEntry->locRdExit, + orbEntry->locWrEntered, + orbEntry->locWrIssued, + orbEntry->locWrExit, + orbEntry->farRdEntered, + orbEntry->farRdIssued, + orbEntry->farRdExit); + delete orbEntry; + + orbEntry = ORB.at(copyOwPkt->getAddr()); + + polManStats.totPktRespTime += ((curTick() - orbEntry->arrivalTick)/1000); + polManStats.totPktRespTimeRd += ((curTick() - orbEntry->arrivalTick)/1000); + + // clear ORB + resumeConflictingReq(orbEntry); + + return; + } + + if (orbEntry->pol == enums::Rambus && + orbEntry->owPkt->isWrite()) { + // DONE + // respond for writes is already sent to the requestor. + // clear ORB + assert(orbEntry->state == waitingTCtag); + + resumeConflictingReq(orbEntry); + + return; + } + + if (orbEntry->pol == enums::Rambus && + orbEntry->state == farMemRead) { + + assert(orbEntry->owPkt->isRead()); + assert(!orbEntry->isHit); + + // do a read from far mem + pktFarMemRead.push_back(orbEntry->owPkt->getAddr()); + + polManStats.avgFarRdQLenEnq = pktFarMemRead.size(); + + if (!farMemReadEvent.scheduled() && !retryFarMemRead) { + schedule(farMemReadEvent, curTick()); + } + return; + + } + + if (orbEntry->pol == enums::Rambus && + orbEntry->state == locMemWrite) { + + assert(orbEntry->owPkt->isRead()); + assert(!orbEntry->isHit); + + pktLocMemWrite.push_back(orbEntry->owPkt->getAddr()); + + polManStats.avgLocWrQLenEnq = pktLocMemWrite.size(); + + + if (!locMemWriteEvent.scheduled() && !retryLocMemWrite) { + schedule(locMemWriteEvent, curTick()); + } + return; + + } + + if (orbEntry->pol == enums::Rambus && + // orbEntry->owPkt->isRead() && + // !orbEntry->isHit && + orbEntry->state == waitingLocMemWriteResp) { + // DONE + // clear ORB + assert(orbEntry->owPkt->isRead()); + assert(!orbEntry->isHit); + resumeConflictingReq(orbEntry); + + return; + } + + //////////////////////////////////////////////////////////////////////// + // RambusTagProbOpt + + if (orbEntry->pol == enums::RambusTagProbOpt && + orbEntry->state == tagCheck) { + + pktTagCheck.push_back(orbEntry->owPkt->getAddr()); + + polManStats.avgTagCheckQLenEnq = pktTagCheck.size(); + + if (!tagCheckEvent.scheduled() && !retryTagCheck) { + schedule(tagCheckEvent, curTick()); + } + return; + } + + if (orbEntry->pol == enums::RambusTagProbOpt && + orbEntry->state == waitingLocMemReadResp) { + return; + } + + if (orbEntry->pol == enums::RambusTagProbOpt && + orbEntry->state == locRdRespReady) { + assert(orbEntry->owPkt->isRead()); + assert(orbEntry->isHit); + // DONE + // send the respond to the requestor + + PacketPtr copyOwPkt = new Packet(orbEntry->owPkt, + false, + orbEntry->owPkt->isRead()); + + accessAndRespond(orbEntry->owPkt, + frontendLatency + backendLatency); + + ORB.at(copyOwPkt->getAddr()) = new reqBufferEntry( + orbEntry->validEntry, + orbEntry->arrivalTick, + orbEntry->tagDC, + orbEntry->indexDC, + orbEntry->wayNum, + copyOwPkt, + orbEntry->pol, + orbEntry->state, + orbEntry->issued, + orbEntry->isHit, + orbEntry->conflict, + orbEntry->prevDirty, + orbEntry->rcvdLocRdResp, + orbEntry->rcvdFarRdResp, + orbEntry->dirtyLineAddr, + orbEntry->handleDirtyLine, + orbEntry->tagCheckEntered, + orbEntry->tagCheckIssued, + orbEntry->tagCheckExit, + orbEntry->locRdEntered, + orbEntry->locRdIssued, + orbEntry->locRdExit, + orbEntry->locWrEntered, + orbEntry->locWrIssued, + orbEntry->locWrExit, + orbEntry->farRdEntered, + orbEntry->farRdIssued, + orbEntry->farRdExit); + delete orbEntry; + + orbEntry = ORB.at(copyOwPkt->getAddr()); + + polManStats.totPktRespTime += ((curTick() - orbEntry->arrivalTick)/1000); + polManStats.totPktRespTimeRd += ((curTick() - orbEntry->arrivalTick)/1000); + + // clear ORB + resumeConflictingReq(orbEntry); + + return; + } + + if (orbEntry->pol == enums::RambusTagProbOpt && + orbEntry->owPkt->isWrite()) { + // DONE + // respond for writes is already sent to the requestor. + // clear ORB + assert(orbEntry->state == waitingTCtag); + + resumeConflictingReq(orbEntry); + + return; + } + + if (orbEntry->pol == enums::RambusTagProbOpt && + orbEntry->state == farMemRead) { + + assert(orbEntry->owPkt->isRead()); + assert(!orbEntry->isHit); + + // do a read from far mem + pktFarMemRead.push_back(orbEntry->owPkt->getAddr()); + + polManStats.avgFarRdQLenEnq = pktFarMemRead.size(); + + if (!farMemReadEvent.scheduled() && !retryFarMemRead) { + schedule(farMemReadEvent, curTick()); + } + return; + + } + + if (orbEntry->pol == enums::RambusTagProbOpt && + orbEntry->state == locMemWrite) { + + assert(orbEntry->owPkt->isRead()); + assert(!orbEntry->isHit); + + pktLocMemWrite.push_back(orbEntry->owPkt->getAddr()); + + polManStats.avgLocWrQLenEnq = pktLocMemWrite.size(); + + + if (!locMemWriteEvent.scheduled() && !retryLocMemWrite) { + schedule(locMemWriteEvent, curTick()); + } + return; + + } + + if (orbEntry->pol == enums::RambusTagProbOpt && + // orbEntry->owPkt->isRead() && + // !orbEntry->isHit && + orbEntry->state == waitingLocMemWriteResp) { + // DONE + // clear ORB + assert(orbEntry->owPkt->isRead()); + assert(!orbEntry->isHit); + resumeConflictingReq(orbEntry); + + return; + } +} + +void +PolicyManager::handleRequestorPktAtomic(PacketPtr pkt) +{ + Addr tag = returnTagDC(pkt->getBlockAddr(blockSize), blockSize); + Addr index = returnIndexDC(pkt->getBlockAddr(blockSize), blockSize); + Addr way = findMatchingWay(index, tag); + + if (way == noMatchingWay) { + // MISSED! Candidate = Either there's an empty way to + // fill in or a victim will be selected. + way = getCandidateWay(index); + + // This is the current resident that is about to leave. + if (tagMetadataStore.at(index).at(way)->validLine) { + capacityTracker[tagMetadataStore.at(index).at(way)->farMemAddr] = blksInserted; + polManStats.blkReuse.sample(tagMetadataStore.at(index).at(way)->counter); + assert(tagMetadataStore.at(index).at(way)->tickEntered != MaxTick); + assert(curTick() >= tagMetadataStore.at(index).at(way)->tickEntered); + polManStats.ticksBeforeEviction.sample( + curTick() - tagMetadataStore.at(index).at(way)->tickEntered + ); + } + } + + assert(way < assoc); + + polManStats.avgORBLen = ORB.size(); + polManStats.avgTagCheckQLenStrt = countTagCheckInORB(); + polManStats.avgLocRdQLenStrt = countLocRdInORB(); + polManStats.avgFarRdQLenStrt = countFarRdInORB(); + polManStats.avgLocWrQLenStrt = countLocWrInORB(); + polManStats.avgFarWrQLenStrt = countFarWr(); + + Addr addr = pkt->getAddr(); + unsigned burst_size = locBurstSize; + unsigned size = std::min((addr | (burst_size - 1)) + 1, + addr + pkt->getSize()) - addr; + + if(pkt->isRead()) { + polManStats.bytesReadSys += size; + polManStats.readPktSize[ceilLog2(size)]++; + polManStats.readReqs++; + } else { + polManStats.bytesWrittenSys += size; + polManStats.writePktSize[ceilLog2(size)]++; + polManStats.writeReqs++; + } + + bool isHit = checkHitOrMissAtomic(index, way, pkt); + bool wasDirty = tagMetadataStore.at(index).at(way)->validLine && + tagMetadataStore.at(index).at(way)->dirtyLine; + + // Updating Tag & Metadata + tagMetadataStore.at(index).at(way)->tagDC = tag; + tagMetadataStore.at(index).at(way)->indexDC = index; + tagMetadataStore.at(index).at(way)->validLine = true; + tagMetadataStore.at(index).at(way)->farMemAddr = pkt->getBlockAddr(blockSize); + replacementPolicy->touch(tagMetadataStore.at(index).at(way)->replacementData, pkt); + + if (pkt->isRead() && !isHit) { + tagMetadataStore.at(index).at(way)->dirtyLine = false; + } + if (!pkt->isRead()) { // write + tagMetadataStore.at(index).at(way)->dirtyLine = true; + } + + if (isHit) { + tagMetadataStore.at(index).at(way)->counter++; + } else { + tagMetadataStore.at(index).at(way)->counter = 0; + tagMetadataStore.at(index).at(way)->tickEntered = curTick(); + + if (capacityTracker.find(pkt->getBlockAddr(blockSize)) != capacityTracker.end()) { + polManStats.missDistance.sample(blksInserted - capacityTracker[pkt->getBlockAddr(blockSize)]); + capacityTracker.erase(pkt->getBlockAddr(blockSize)); + } + + blksInserted++; + } + + DPRINTF(PolicyManager, "ORB+: adr= %d -> %d, index= %d, tag= %d, cmd= %s, isHit= %d, wasDirty= %d\n", + pkt->getAddr(), pkt->getBlockAddr(blockSize), index, tag, pkt->cmdString(), + isHit, wasDirty); + +} + +void +PolicyManager::handleRequestorPkt(PacketPtr pkt) +{ + Addr tag = returnTagDC(pkt->getAddr(), pkt->getSize()); + Addr index = returnIndexDC(pkt->getAddr(), pkt->getSize()); + Addr way = findMatchingWay(index, tag); + + if (way == noMatchingWay) { // MISSED! Candidate = Either there's an empty way to fill in or a victim will be selected. + way = getCandidateWay(index); + + // This is the current resident that is about to leave. + if (tagMetadataStore.at(index).at(way)->validLine) { + capacityTracker[tagMetadataStore.at(index).at(way)->farMemAddr] = blksInserted; + if (tagMetadataStore.at(index).at(way)->tickEntered != MaxTick) { + polManStats.blkReuse.sample(tagMetadataStore.at(index).at(way)->counter); + assert(curTick() >= tagMetadataStore.at(index).at(way)->tickEntered); + polManStats.ticksBeforeEviction.sample(curTick() - tagMetadataStore.at(index).at(way)->tickEntered); + } + } + } + + assert(way < assoc); + + reqBufferEntry* orbEntry = new reqBufferEntry( + true, curTick(), + tag, index, way, + pkt, + locMemPolicy, start, + false, false, false, + false, false, false, + -1, false, + MaxTick, MaxTick, MaxTick, + MaxTick, MaxTick, MaxTick, + MaxTick, MaxTick, MaxTick, + MaxTick, MaxTick, MaxTick + ); + + ORB.emplace(pkt->getAddr(), orbEntry); + + DPRINTF(PolicyManager, "handleRequestorPkt added to ORB: adr= %d, index= %d, tag= %d, %s\n", orbEntry->owPkt->getAddr(), orbEntry->indexDC, orbEntry->tagDC, orbEntry->owPkt->cmdString()); + + polManStats.avgORBLen = ORB.size(); + polManStats.avgTagCheckQLenStrt = countTagCheckInORB(); + polManStats.avgLocRdQLenStrt = countLocRdInORB(); + polManStats.avgFarRdQLenStrt = countFarRdInORB(); + polManStats.avgLocWrQLenStrt = countLocWrInORB(); + polManStats.avgFarWrQLenStrt = countFarWr(); + + Addr addr = pkt->getAddr(); + unsigned burst_size = locBurstSize; + unsigned size = std::min((addr | (burst_size - 1)) + 1, + addr + pkt->getSize()) - addr; + + if(pkt->isRead()) { + polManStats.bytesReadSys += size; + polManStats.readPktSize[ceilLog2(size)]++; + polManStats.readReqs++; + } else { + polManStats.bytesWrittenSys += size; + polManStats.writePktSize[ceilLog2(size)]++; + polManStats.writeReqs++; + } + + if (pkt->isWrite()) { + + PacketPtr copyOwPkt = new Packet(orbEntry->owPkt, + false, + orbEntry->owPkt->isRead()); + + accessAndRespond(orbEntry->owPkt, + frontendLatency + backendLatency); + + ORB.at(copyOwPkt->getAddr()) = new reqBufferEntry( + orbEntry->validEntry, + orbEntry->arrivalTick, + orbEntry->tagDC, + orbEntry->indexDC, + orbEntry->wayNum, + copyOwPkt, + orbEntry->pol, + orbEntry->state, + orbEntry->issued, + orbEntry->isHit, + orbEntry->conflict, + orbEntry->prevDirty, + orbEntry->rcvdLocRdResp, + orbEntry->rcvdFarRdResp, + orbEntry->dirtyLineAddr, + orbEntry->handleDirtyLine, + orbEntry->tagCheckEntered, + orbEntry->tagCheckIssued, + orbEntry->tagCheckExit, + orbEntry->locRdEntered, + orbEntry->locRdIssued, + orbEntry->locRdExit, + orbEntry->locWrEntered, + orbEntry->locWrIssued, + orbEntry->locWrExit, + orbEntry->farRdEntered, + orbEntry->farRdIssued, + orbEntry->farRdExit); + delete orbEntry; + + orbEntry = ORB.at(copyOwPkt->getAddr()); + } + + checkHitOrMiss(orbEntry); + if (checkDirty(orbEntry->indexDC, orbEntry->wayNum) && !orbEntry->isHit) { + if (extreme && tagMetadataStore.at(orbEntry->indexDC).at(orbEntry->wayNum)->farMemAddr == -1) { + // extreme cases are faked. So, this address for cold misses are -1, so we fake it to a random address like 1024. + orbEntry->dirtyLineAddr = orbEntry->owPkt->getAddr() == 0 ? 64 : (orbEntry->owPkt->getAddr()-64); + } else { + orbEntry->dirtyLineAddr = tagMetadataStore.at(orbEntry->indexDC).at(orbEntry->wayNum)->farMemAddr; + + } + orbEntry->handleDirtyLine = true; + } + + if (extreme) { + orbEntry->prevDirty = alwaysDirty; + } else { + orbEntry->prevDirty = checkDirty(orbEntry->indexDC, orbEntry->wayNum); + } + + // Updating Tag & Metadata + tagMetadataStore.at(orbEntry->indexDC).at(orbEntry->wayNum)->tagDC = orbEntry->tagDC; + tagMetadataStore.at(orbEntry->indexDC).at(orbEntry->wayNum)->indexDC = orbEntry->indexDC; + tagMetadataStore.at(orbEntry->indexDC).at(orbEntry->wayNum)->validLine = true; + tagMetadataStore.at(orbEntry->indexDC).at(orbEntry->wayNum)->farMemAddr = orbEntry->owPkt->getAddr(); + replacementPolicy->touch(tagMetadataStore.at(orbEntry->indexDC).at(orbEntry->wayNum)->replacementData, pkt); + + if (orbEntry->owPkt->isRead() && !orbEntry->isHit) { + tagMetadataStore.at(orbEntry->indexDC).at(orbEntry->wayNum)->dirtyLine = false; + } + if (!orbEntry->owPkt->isRead()) { // write + tagMetadataStore.at(orbEntry->indexDC).at(orbEntry->wayNum)->dirtyLine = true; + } + + if (orbEntry->isHit) { + tagMetadataStore.at(orbEntry->indexDC).at(orbEntry->wayNum)->counter++; + } else { + tagMetadataStore.at(orbEntry->indexDC).at(orbEntry->wayNum)->counter = 1; + tagMetadataStore.at(orbEntry->indexDC).at(orbEntry->wayNum)->tickEntered = curTick(); + + blksInserted++; + + if (capacityTracker.find(orbEntry->owPkt->getAddr()) != capacityTracker.end()) { + polManStats.missDistance.sample(blksInserted - capacityTracker[orbEntry->owPkt->getAddr()]); + capacityTracker.erase(orbEntry->owPkt->getAddr()); + } + } + + DPRINTF(PolicyManager, "ORB+: adr= %d, index= %d, tag= %d, cmd= %s, isHit= %d, wasDirty= %d, dirtyAddr= %d\n", orbEntry->owPkt->getAddr(), orbEntry->indexDC, orbEntry->tagDC, orbEntry->owPkt->cmdString(), orbEntry->isHit, orbEntry->prevDirty, orbEntry->dirtyLineAddr); +} + +bool +PolicyManager::checkConflictInORB(PacketPtr pkt) +{ + Addr indexDC = returnIndexDC(pkt->getAddr(), pkt->getSize()); + //Addr tagDC = returnTagDC(pkt->getAddr(), pkt->getSize()); + + std::vector sameIndex; + + for (auto e = ORB.begin(); e != ORB.end(); ++e) { + if (e->second->validEntry && indexDC == e->second->indexDC /*&& tagDC != e->second->tagDC*/) { + sameIndex.push_back(e->first); + } + } + if (sameIndex.size() == assoc) { + for (int i=0; iconflict = true; + } + return true; + } + return false; +} + +bool +PolicyManager::checkHitOrMissAtomic(unsigned index, unsigned way, PacketPtr pkt) +{ + // look up the tagMetadataStore data structure to + // check if it's hit or miss + + bool currValid = tagMetadataStore.at(index).at(way)->validLine; + bool currDirty = tagMetadataStore.at(index).at(way)->dirtyLine; + + Addr tag = returnTagDC(pkt->getBlockAddr(blockSize), blockSize); + + bool isHit = currValid && (tag == tagMetadataStore.at(index).at(way)->tagDC); + + if (isHit) { + + polManStats.numTotHits++; + + if (pkt->isRead()) { + polManStats.numRdHit++; + if (currDirty) { + polManStats.numRdHitDirty++; + } else { + polManStats.numRdHitClean++; + } + } else { + polManStats.numWrHit++; + if (currDirty) { + polManStats.numWrHitDirty++; + } else { + polManStats.numWrHitClean++; + } + } + + } else { + + polManStats.numTotMisses++; + + unsigned invalidBlocks = 0; + for (int i = 0; i < assoc; i++) { + if (!tagMetadataStore.at(index).at(i)->validLine) { + invalidBlocks++; + } + } + + if (invalidBlocks == assoc) { + polManStats.numColdMissesSet++; + } + + if (currValid) { + polManStats.numHotMisses++; + } else { + polManStats.numColdMisses++; + numColdMisses++; + } + + if (pkt->isRead()) { + if (currDirty && currValid) { + polManStats.numRdMissDirty++; + } else { + polManStats.numRdMissClean++; + } + } else { + if (currDirty && currValid) { + polManStats.numWrMissDirty++; + } else { + polManStats.numWrMissClean++; + } + + } + } + + if ((numColdMisses >= (unsigned)(infoCacheWarmupRatio * dramCacheSize/blockSize)) && !resetStatsWarmup) { + inform("DRAM cache warm up percentage : %f, @ %d .. \n", infoCacheWarmupRatio*100.0, curTick()); + infoCacheWarmupRatio = infoCacheWarmupRatio + 0.05; + } + + if ((numColdMisses >= (unsigned)(cacheWarmupRatio * dramCacheSize/blockSize)) && !resetStatsWarmup) { + inform("DRAM cache fully warmed up @ %d .. \n", curTick()); + // exitSimLoop("cacheIsWarmedup",0); + resetStatsWarmup = true; + } + + return isHit; +} + +void +PolicyManager::checkHitOrMiss(reqBufferEntry* orbEntry) +{ + // look up the tagMetadataStore data structure to + // check if it's hit or miss + + bool currValid = tagMetadataStore.at(orbEntry->indexDC).at(orbEntry->wayNum)->validLine; + bool currDirty = tagMetadataStore.at(orbEntry->indexDC).at(orbEntry->wayNum)->dirtyLine; + + if (extreme) { + orbEntry->isHit = alwaysHit; + currValid = true; + currDirty = alwaysDirty; + } else { + orbEntry->isHit = currValid && (orbEntry->tagDC == tagMetadataStore.at(orbEntry->indexDC).at(orbEntry->wayNum)->tagDC); + } + + if (orbEntry->isHit) { + + polManStats.numTotHits++; + + if (orbEntry->owPkt->isRead()) { + polManStats.numRdHit++; + if (currDirty) { + polManStats.numRdHitDirty++; + } else { + polManStats.numRdHitClean++; + } + } else { + polManStats.numWrHit++; + if (currDirty) { + polManStats.numWrHitDirty++; + } else { + polManStats.numWrHitClean++; + } + } + + } else { + + polManStats.numTotMisses++; + + unsigned invalidBlocks = 0; + for (int i = 0; i < assoc; i++) { + if (!tagMetadataStore.at(orbEntry->indexDC).at(i)->validLine) { + invalidBlocks++; + } + } + + if (invalidBlocks == assoc) { + polManStats.numColdMissesSet++; + } + + if (currValid) { + polManStats.numHotMisses++; + } else { + polManStats.numColdMisses++; + numColdMisses++; + } + + if (orbEntry->owPkt->isRead()) { + if (currDirty && currValid) { + polManStats.numRdMissDirty++; + } else { + polManStats.numRdMissClean++; + } + } else { + if (currDirty && currValid) { + polManStats.numWrMissDirty++; + } else { + polManStats.numWrMissClean++; + } + + } + } + + if ((numColdMisses >= (unsigned)(infoCacheWarmupRatio * dramCacheSize/blockSize)) && !resetStatsWarmup) { + inform("DRAM cache warm up percentage : %f, @ %d .. \n", infoCacheWarmupRatio*100.0, curTick()); + infoCacheWarmupRatio = infoCacheWarmupRatio + 0.05; + } + + if ((numColdMisses >= (unsigned)(cacheWarmupRatio * dramCacheSize/blockSize)) && !resetStatsWarmup) { + inform("DRAM cache fully warmed up @ %d .. \n", curTick()); + // exitSimLoop("cacheIsWarmedup",0); + resetStatsWarmup = true; + } +} + +bool +PolicyManager::checkDirty(Addr index, int way) +{ + assert(way >= 0); + if (extreme) { + return alwaysDirty; + } else { + return (tagMetadataStore.at(index).at(way)->validLine && + tagMetadataStore.at(index).at(way)->dirtyLine); + } +} + +void +PolicyManager::accessAndRespond(PacketPtr pkt, Tick static_latency) +{ + DPRINTF(PolicyManager, "Responding to Address %d: %s\n", pkt->getAddr(), pkt->cmdString()); + + bool needsResponse = pkt->needsResponse(); + // do the actual memory access which also turns the packet into a + // response + panic_if(!getAddrRange().contains(pkt->getAddr()), + "Can't handle address range for packet %s\n", pkt->print()); + access(pkt); + + // turn packet around to go back to requestor if response expected + assert(needsResponse); + //if (needsResponse) { + // access already turned the packet into a response + assert(pkt->isResponse()); + // response_time consumes the static latency and is charged also + // with headerDelay that takes into account the delay provided by + // the xbar and also the payloadDelay that takes into account the + // number of data beats. + Tick response_time = curTick() + static_latency + pkt->headerDelay + + pkt->payloadDelay; + // Here we reset the timing of the packet before sending it out. + pkt->headerDelay = pkt->payloadDelay = 0; + + // queue the packet in the response queue to be sent out after + // the static latency has passed + port.schedTimingResp(pkt, response_time); + //} + // else { + // // @todo the packet is going to be deleted, and the MemPacket + // // is still having a pointer to it + // pendingDelete.reset(pkt); + // } + + DPRINTF(PolicyManager, "Done\n"); + + return; +} + +PacketPtr +PolicyManager::getPacket(Addr addr, unsigned size, const MemCmd& cmd, + Request::FlagsType flags) +{ + // Create new request + RequestPtr req = std::make_shared(addr, size, flags, + 0); + // Dummy PC to have PC-based prefetchers latch on; get entropy into higher + // bits + req->setPC(((Addr)0) << 2); + + // Embed it in a packet + PacketPtr pkt = new Packet(req, cmd); + + uint8_t* pkt_data = new uint8_t[req->getSize()]; + + pkt->dataDynamic(pkt_data); + + if (cmd.isWrite()) { + std::fill_n(pkt_data, req->getSize(), (uint8_t)0); + } + + return pkt; +} + +void +PolicyManager::sendRespondToRequestor(PacketPtr pkt, Tick static_latency) +{ + PacketPtr copyOwPkt = new Packet(pkt, + false, + pkt->isRead()); + copyOwPkt->makeResponse(); + + Tick response_time = curTick() + static_latency + copyOwPkt->headerDelay; + response_time += copyOwPkt->payloadDelay; + // Here we reset the timing of the packet before sending it out. + copyOwPkt->headerDelay = copyOwPkt->payloadDelay = 0; + + // queue the packet in the response queue to be sent out after + // the static latency has passed + port.schedTimingResp(copyOwPkt, response_time); + +} + +bool +PolicyManager::resumeConflictingReq(reqBufferEntry* orbEntry) +{ + DPRINTF(PolicyManager, "resumeConflictingReq: %d: %d \n", curTick(), orbEntry->owPkt->getAddr()); + + bool conflictFound = false; + + if (orbEntry->owPkt->isWrite()) { + isInWriteQueue.erase(orbEntry->owPkt->getAddr()); + } + + logStatsPolMan(orbEntry); + + for (auto e = CRB.begin(); e != CRB.end(); ++e) { + + auto entry = *e; + + if (returnIndexDC(entry.second->getAddr(), entry.second->getSize()) + == orbEntry->indexDC) { + + DPRINTF(PolicyManager, "conf found: %d\n", entry.second->getAddr()); + + conflictFound = true; + + Addr confAddr = entry.second->getAddr(); + + ORB.erase(orbEntry->owPkt->getAddr()); + + delete orbEntry->owPkt; + + delete orbEntry; + + handleRequestorPkt(entry.second); + + ORB.at(confAddr)->arrivalTick = entry.first; + + CRB.erase(e); + + checkConflictInCRB(ORB.at(confAddr)); + + setNextState(ORB.at(confAddr)); + + handleNextState(ORB.at(confAddr)); + + break; + } + + } + + if (!conflictFound) { + DPRINTF(PolicyManager, "no conf for: %d\n", orbEntry->owPkt->getAddr()); + + ORB.erase(orbEntry->owPkt->getAddr()); + + delete orbEntry->owPkt; + + delete orbEntry; + + if (retryLLC) { + DPRINTF(PolicyManager, "retryLLC: sent\n"); + retryLLC = false; + port.sendRetryReq(); + } else { + if (drainState() == DrainState::Draining && ORB.empty() && + pktFarMemWrite.empty()) { + DPRINTF(Drain, "PolicyManager done draining\n"); + signalDrainDone(); + } + } + } + + if (retryLLCRepetitive) { + DPRINTF(PolicyManager, "retryLLCRepetitive: sent\n"); + retryLLCRepetitive = false; + port.sendRetryReq(); + } + + return conflictFound; +} + +void +PolicyManager::checkConflictInCRB(reqBufferEntry* orbEntry) +{ + for (auto e = CRB.begin(); e != CRB.end(); ++e) { + + auto entry = *e; + + if (returnIndexDC(entry.second->getAddr(),entry.second->getSize()) + == orbEntry->indexDC) { + orbEntry->conflict = true; + break; + } + } +} + +unsigned +PolicyManager::countTagCheckInORB() +{ + unsigned count =0; + for (auto i : ORB) { + if (i.second->state == tagCheck) { + count++; + } + } + return count; +} + +unsigned +PolicyManager::countLocRdInORB() +{ + unsigned count =0; + for (auto i : ORB) { + if (i.second->state == locMemRead) { + count++; + } + } + return count; +} + +unsigned +PolicyManager::countFarRdInORB() +{ + unsigned count =0; + for (auto i : ORB) { + if (i.second->state == farMemRead) { + count++; + } + } + return count; +} + +unsigned +PolicyManager::countLocWrInORB() +{ + unsigned count =0; + for (auto i : ORB) { + if (i.second->state == locMemWrite) { + count++; + } + } + return count; +} + +unsigned +PolicyManager::countFarWr() +{ + return pktFarMemWrite.size(); +} + +AddrRangeList +PolicyManager::getAddrRanges() +{ + return farReqPort.getAddrRanges(); +} + +Addr +PolicyManager::returnIndexDC(Addr request_addr, unsigned size) +{ + int index_bits = ceilLog2(dramCacheSize/(blockSize*assoc)); + int block_bits = ceilLog2(size); + return bits(request_addr, block_bits + index_bits-1, block_bits); +} + +Addr +PolicyManager::returnTagDC(Addr request_addr, unsigned size) +{ + int index_bits = ceilLog2(dramCacheSize/(blockSize*assoc)); + int block_bits = ceilLog2(size); + return bits(request_addr, addrSize-1, (index_bits+block_bits)); +} + +int +PolicyManager::findMatchingWay(Addr index, Addr tag) +{ + for (int i = 0; i < assoc; i++) { + if (tagMetadataStore.at(index).at(i)->validLine && tagMetadataStore.at(index).at(i)->tagDC == tag) { + return i; + } + } + + return noMatchingWay; +} + +int +PolicyManager::getCandidateWay(Addr index) +{ + if (assoc == 1) { + // equal to direct mapped cache + return 0; + } else { + // first find an empty way + for (int i = 0; i < assoc; i++) { + if (!tagMetadataStore.at(index).at(i)->validLine) { + return i; + } + } + + // if no empty way is found (= all the ways are filled), pick a victim to evcuate + std::vector entries; + + for (int i = 0; i < assoc; i++) { + if (tagMetadataStore.at(index).at(i)->validLine) { + entries.push_back(tagMetadataStore.at(index).at(i)); + } + } + + ReplaceableEntry* victim = replacementPolicy->getVictim(entries); + + for (int i = 0; i < assoc; i++) { + if (entries.at(i)->tagDC == victim->tagDC) { + assert(entries.at(i)->validLine); + return i; + } + } + } + + return -1; +} + +void +PolicyManager::handleDirtyCacheLine(Addr dirtyLineAddr) +{ + DPRINTF(PolicyManager, "handleDirtyCacheLine: %d\n", dirtyLineAddr); + assert(dirtyLineAddr != -1); + + // create a new request packet + PacketPtr wbPkt = getPacket(dirtyLineAddr, + blockSize, + MemCmd::WriteReq); + + pktFarMemWrite.push_back(std::make_pair(curTick(), wbPkt)); + + polManStats.avgFarWrQLenEnq = pktFarMemWrite.size(); + + if (!farMemWriteEvent.scheduled() && !retryFarMemWrite) { + schedule(farMemWriteEvent, curTick()); + } + + polManStats.numWrBacks++; +} + +void +PolicyManager::logStatsPolMan(reqBufferEntry* orbEntry) +{ + if (locMemPolicy == enums::Rambus || locMemPolicy == enums::RambusTagProbOpt) { + assert(orbEntry->arrivalTick != MaxTick); + assert(orbEntry->tagCheckEntered != MaxTick); + assert(orbEntry->tagCheckExit != MaxTick); + + polManStats.totPktLifeTime += ((curTick() - orbEntry->arrivalTick)/1000); + polManStats.totPktORBTime += ((curTick() - orbEntry->tagCheckEntered)/1000); + polManStats.totTimeTagCheckRes += ((orbEntry->tagCheckExit - orbEntry->tagCheckEntered)/1000); + + if (orbEntry->owPkt->isRead()) { + polManStats.totPktLifeTimeRd += ((curTick() - orbEntry->arrivalTick)/1000); + polManStats.totPktORBTimeRd += ((curTick() - orbEntry->tagCheckEntered)/1000); + polManStats.totTimeTagCheckResRd += ((orbEntry->tagCheckExit - orbEntry->tagCheckEntered)/1000); + + if (orbEntry->isHit) { + polManStats.totTimeTagCheckResRdH += ((orbEntry->tagCheckExit - orbEntry->tagCheckEntered)/1000); + } else if (!orbEntry->isHit && !orbEntry->prevDirty) { + polManStats.totTimeTagCheckResRdMC += ((orbEntry->tagCheckExit - orbEntry->tagCheckEntered)/1000); + } else if (!orbEntry->isHit && orbEntry->prevDirty) { + polManStats.totTimeTagCheckResRdMD += ((orbEntry->tagCheckExit - orbEntry->tagCheckEntered)/1000); + } + + } else { + polManStats.totPktRespTime += ((curTick() - orbEntry->arrivalTick)/1000); + polManStats.totPktRespTimeWr += ((curTick() - orbEntry->arrivalTick)/1000); + polManStats.totPktLifeTimeWr += ((curTick() - orbEntry->arrivalTick)/1000); + polManStats.totPktORBTimeWr += ((curTick() - orbEntry->tagCheckEntered)/1000); + polManStats.totTimeTagCheckResWr += ((orbEntry->tagCheckExit - orbEntry->tagCheckEntered)/1000); + } + + if (orbEntry->owPkt->isRead() && orbEntry->isHit) { + assert(orbEntry->locRdExit != MaxTick); + polManStats.totTimeInLocRead += ((orbEntry->locRdExit - orbEntry->tagCheckEntered)/1000); + } + + if (orbEntry->owPkt->isRead() && !orbEntry->isHit) { + assert(orbEntry->farRdExit != MaxTick); + assert(orbEntry->farRdEntered != MaxTick); + assert(orbEntry->farRdIssued != MaxTick); + assert(orbEntry->tagCheckExit != MaxTick); + assert(orbEntry->locWrExit != MaxTick); + assert(orbEntry->locWrEntered != MaxTick); + + polManStats.totTimeInFarRead += ((orbEntry->farRdExit - orbEntry->farRdEntered)/1000); + polManStats.totTimeFarRdtoSend += ((orbEntry->farRdIssued - orbEntry->farRdEntered)/1000); + polManStats.totTimeFarRdtoRecv += ((orbEntry->farRdExit - orbEntry->farRdIssued)/1000); + polManStats.totTimeInLocWrite += ((orbEntry->locWrExit - orbEntry->locWrEntered)/1000); + } + } + else { + // MUST be updated since they are average, they should be per case + if (locMemPolicy == enums::Oracle ) { + if ((orbEntry->owPkt->isRead() && orbEntry->isHit) || + (orbEntry->owPkt->isRead() && !orbEntry->isHit && orbEntry->prevDirty) || + (!orbEntry->owPkt->isRead() && !orbEntry->isHit && orbEntry->prevDirty)) { + polManStats.totPktORBTime += ((curTick() - orbEntry->locRdEntered)/1000); + if (orbEntry->owPkt->isRead()) { + polManStats.totPktORBTimeRd += ((curTick() - orbEntry->locRdEntered)/1000); + polManStats.totTimeTagCheckResRd += ((orbEntry->locRdExit - orbEntry->locRdEntered)/1000); + } else { + polManStats.totPktORBTimeWr += ((curTick() - orbEntry->locRdEntered)/1000); + polManStats.totTimeTagCheckResWr += ((orbEntry->locRdExit - orbEntry->locRdEntered)/1000); + } + } + else if (!orbEntry->owPkt->isRead() && (orbEntry->isHit || (!orbEntry->isHit && !orbEntry->prevDirty))) { + polManStats.totPktORBTime += ((curTick() - orbEntry->locWrEntered)/1000); + polManStats.totPktORBTimeWr += ((curTick() - orbEntry->locWrEntered)/1000); + polManStats.totTimeTagCheckResWr += ((orbEntry->locRdExit - orbEntry->locRdEntered)/1000); + } + else if (orbEntry->owPkt->isRead() && !orbEntry->isHit && !orbEntry->prevDirty) { + polManStats.totPktORBTime += ((curTick() - orbEntry->farRdEntered)/1000); + polManStats.totPktORBTimeRd += ((curTick() - orbEntry->farRdEntered)/1000); + polManStats.totTimeTagCheckResRd += ((orbEntry->locRdExit - orbEntry->locRdEntered)/1000); + } + + } else { // locMemPolicy == enums::CascadeLakeNoPartWrs + // This is incorrect for locMemPolicy == enums::BearWriteOpt + polManStats.totPktORBTime += ((curTick() - orbEntry->locRdEntered)/1000); + + if (orbEntry->owPkt->isRead()) { + polManStats.totPktORBTimeRd += ((curTick() - orbEntry->locRdEntered)/1000); + polManStats.totTimeTagCheckResRd += ((orbEntry->locRdExit - orbEntry->locRdEntered)/1000); + } else { + polManStats.totPktORBTimeWr += ((curTick() - orbEntry->locRdEntered)/1000); + polManStats.totTimeTagCheckResWr += ((orbEntry->locRdExit - orbEntry->locRdEntered)/1000); + } + } + + polManStats.totPktLifeTime += ((curTick() - orbEntry->arrivalTick)/1000); + + polManStats.totTimeFarRdtoSend += ((orbEntry->farRdIssued - orbEntry->farRdEntered)/1000); + polManStats.totTimeFarRdtoRecv += ((orbEntry->farRdExit - orbEntry->farRdIssued)/1000); + polManStats.totTimeInLocRead += ((orbEntry->locRdExit - orbEntry->locRdEntered)/1000); + polManStats.totTimeInLocWrite += ((orbEntry->locWrExit - orbEntry->locWrEntered)/1000); + polManStats.totTimeInFarRead += ((orbEntry->farRdExit - orbEntry->farRdEntered)/1000); + + if (orbEntry->owPkt->isRead()) { + polManStats.totPktLifeTimeRd += ((curTick() - orbEntry->arrivalTick)/1000); + } else { + polManStats.totPktLifeTimeWr += ((curTick() - orbEntry->arrivalTick)/1000); + polManStats.totPktRespTime += ((curTick() - orbEntry->arrivalTick)/1000); + polManStats.totPktRespTimeWr += ((curTick() - orbEntry->arrivalTick)/1000); + } + + } +} + + +void +PolicyManager::ReqPortPolManager::recvReqRetry() +{ + if (this->name().find("loc_req_port") != std::string::npos) { + polMan.locMemRecvReqRetry(); + } + if (this->name().find("far_req_port") != std::string::npos) { + polMan.farMemRecvReqRetry(); + } +} + +bool +PolicyManager::ReqPortPolManager::recvTimingResp(PacketPtr pkt) +{ + // since in the constructor we are appending loc_req_port + // to the loc mem port, the name should always have the substring + // irrespective of the configuration names + if (this->name().find("loc_req_port") != std::string::npos) { + return polMan.locMemRecvTimingResp(pkt); + } else if (this->name().find("far_req_port") != std::string::npos) { + return polMan.farMemRecvTimingResp(pkt); + } else { + std::cout << "Port name error, fix it!\n"; + return false; + } +} + +PolicyManager::PolicyManagerStats::PolicyManagerStats(PolicyManager &_polMan) + : statistics::Group(&_polMan), + polMan(_polMan), + +///// + ADD_STAT(readReqs, statistics::units::Count::get(), + "Number of read requests accepted"), + ADD_STAT(writeReqs, statistics::units::Count::get(), + "Number of write requests accepted"), + + ADD_STAT(servicedByWrQ, statistics::units::Count::get(), + "Number of controller read bursts serviced by the write queue"), + ADD_STAT(servicedByFB, statistics::units::Count::get(), + "Number of controller read bursts serviced by the flush buffer"), + ADD_STAT(mergedWrBursts, statistics::units::Count::get(), + "Number of controller write bursts merged with an existing one"), + ADD_STAT(mergedWrPolManWB, statistics::units::Count::get(), + "Number of controller write bursts merged with an existing one in write back buffer"), + ADD_STAT(mergedWrLocMemFB, statistics::units::Count::get(), + "Number of controller write bursts merged with an existing one in flush buffer"), + + ADD_STAT(numRdRetry, statistics::units::Count::get(), + "Number of times read queue was full causing retry"), + ADD_STAT(numWrRetry, statistics::units::Count::get(), + "Number of times write queue was full causing retry"), + + ADD_STAT(readPktSize, statistics::units::Count::get(), + "Read request sizes (log2)"), + ADD_STAT(writePktSize, statistics::units::Count::get(), + "Write request sizes (log2)"), + + ADD_STAT(bytesReadWrQ, statistics::units::Byte::get(), + "Total number of bytes read from write queue"), + ADD_STAT(bytesReadSys, statistics::units::Byte::get(), + "Total read bytes from the system interface side"), + ADD_STAT(bytesWrittenSys, statistics::units::Byte::get(), + "Total written bytes from the system interface side"), + + ADD_STAT(avgRdBWSys, statistics::units::Rate< + statistics::units::Byte, statistics::units::Second>::get(), + "Average system read bandwidth in Byte/s"), + ADD_STAT(avgWrBWSys, statistics::units::Rate< + statistics::units::Byte, statistics::units::Second>::get(), + "Average system write bandwidth in Byte/s"), + + ADD_STAT(totGap, statistics::units::Tick::get(), + "Total gap between requests"), + ADD_STAT(avgGap, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), + "Average gap between requests"), + + ADD_STAT(avgORBLen, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average ORB length"), + ADD_STAT(avgTagCheckQLenStrt, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average local read queue length"), + ADD_STAT(avgLocRdQLenStrt, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average local read queue length"), + ADD_STAT(avgLocWrQLenStrt, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average local write queue length"), + ADD_STAT(avgFarRdQLenStrt, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average far read queue length"), + ADD_STAT(avgFarWrQLenStrt, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average far write queue length"), + + ADD_STAT(avgTagCheckQLenEnq, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average local read queue length when enqueuing"), + ADD_STAT(avgLocRdQLenEnq, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average local read queue length when enqueuing"), + ADD_STAT(avgLocWrQLenEnq, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average local write queue length when enqueuing"), + ADD_STAT(avgFarRdQLenEnq, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average far read queue length when enqueuing"), + ADD_STAT(avgFarWrQLenEnq, statistics::units::Rate< + statistics::units::Count, statistics::units::Tick>::get(), + "Average far write queue length when enqueuing"), + + ADD_STAT(numWrBacks, statistics::units::Count::get(), + "Total number of write backs from DRAM cache to main memory"), + ADD_STAT(totNumConf, statistics::units::Count::get(), + "Total number of packets conflicted on DRAM cache"), + ADD_STAT(totNumORBFull, statistics::units::Count::get(), + "Total number of packets ORB full"), + ADD_STAT(totNumCRBFull, statistics::units::Count::get(), + "Total number of packets conflicted yet couldn't " + "enter confBuffer"), + + ADD_STAT(maxNumConf, statistics::units::Count::get(), + "Maximum number of packets conflicted on DRAM cache"), + + ADD_STAT(sentTagCheckPort, statistics::units::Count::get(), + "stat"), + ADD_STAT(failedTagCheckPort, statistics::units::Count::get(), + "stat"), + ADD_STAT(sentLocRdPort, statistics::units::Count::get(), + "stat"), + ADD_STAT(sentLocWrPort, statistics::units::Count::get(), + "stat"), + ADD_STAT(failedLocRdPort, statistics::units::Count::get(), + "stat"), + ADD_STAT(failedLocWrPort, statistics::units::Count::get(), + "stat"), + // ADD_STAT(recvdRdPort, statistics::units::Count::get(), + // "stat"), + ADD_STAT(sentFarRdPort, statistics::units::Count::get(), + "stat"), + ADD_STAT(sentFarWrPort, statistics::units::Count::get(), + "stat"), + ADD_STAT(failedFarRdPort, statistics::units::Count::get(), + "stat"), + ADD_STAT(failedFarWrPort, statistics::units::Count::get(), + "stat"), + + ADD_STAT(totPktLifeTime, statistics::units::Tick::get(), "stat"), + ADD_STAT(totPktLifeTimeRd, statistics::units::Tick::get(), "stat"), + ADD_STAT(totPktLifeTimeWr, statistics::units::Tick::get(), "stat"), + ADD_STAT(totPktORBTime, statistics::units::Tick::get(), "stat"), + ADD_STAT(totPktORBTimeRd, statistics::units::Tick::get(), "stat"), + ADD_STAT(totPktORBTimeWr, statistics::units::Tick::get(), "stat"), + ADD_STAT(totPktRespTime, statistics::units::Tick::get(), "stat"), + ADD_STAT(totPktRespTimeRd, statistics::units::Tick::get(), "stat"), + ADD_STAT(totPktRespTimeWr, statistics::units::Tick::get(), "stat"), + ADD_STAT(totTimeTagCheckRes, statistics::units::Tick::get(), "stat"), + ADD_STAT(totTimeTagCheckResRd, statistics::units::Tick::get(), "stat"), + ADD_STAT(totTimeTagCheckResWr, statistics::units::Tick::get(), "stat"), + ADD_STAT(totTimeTagCheckResRdH, statistics::units::Tick::get(), "stat"), + ADD_STAT(totTimeTagCheckResRdMC, statistics::units::Tick::get(), "stat"), + ADD_STAT(totTimeTagCheckResRdMD, statistics::units::Tick::get(), "stat"), + ADD_STAT(totTimeInLocRead, statistics::units::Tick::get(), "stat"), + ADD_STAT(totTimeInLocWrite, statistics::units::Tick::get(), "stat"), + ADD_STAT(totTimeInFarRead, statistics::units::Tick::get(), "stat"), + ADD_STAT(totTimeFarRdtoSend, statistics::units::Tick::get(), "stat"), + ADD_STAT(totTimeFarRdtoRecv, statistics::units::Tick::get(), "stat"), + ADD_STAT(totTimeFarWrtoSend, statistics::units::Tick::get(), "stat"), + + ADD_STAT(avgPktLifeTime, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgPktLifeTimeRd, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgPktLifeTimeWr, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgPktORBTime, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgPktORBTimeRd, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgPktORBTimeWr, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgPktRespTime, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgPktRespTimeRd, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgPktRespTimeWr, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgTimeTagCheckRes, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgTimeTagCheckResRd, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgTimeTagCheckResWr, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgTimeTagCheckResRdH, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgTimeTagCheckResRdMC, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgTimeTagCheckResRdMD, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgTimeInLocRead, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgTimeInLocWrite, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgTimeInFarRead, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgTimeFarRdtoSend, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgTimeFarRdtoRecv, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + ADD_STAT(avgTimeFarWrtoSend, statistics::units::Rate< + statistics::units::Tick, statistics::units::Count>::get(), "stat"), + + ADD_STAT(numTotHits, statistics::units::Count::get(), "stat"), + ADD_STAT(numTotMisses, statistics::units::Count::get(), "stat"), + ADD_STAT(numColdMisses, statistics::units::Count::get(), "stat"), + ADD_STAT(numColdMissesSet, statistics::units::Count::get(), "stat"), + ADD_STAT(numHotMisses, statistics::units::Count::get(), "stat"), + ADD_STAT(numRdMissClean, statistics::units::Count::get(), "stat"), + ADD_STAT(numRdMissDirty, statistics::units::Count::get(), "stat"), + ADD_STAT(numRdHit, statistics::units::Count::get(), "stat"), + ADD_STAT(numWrMissClean, statistics::units::Count::get(), "stat"), + ADD_STAT(numWrMissDirty, statistics::units::Count::get(), "stat"), + ADD_STAT(numWrHit, statistics::units::Count::get(), "stat"), + ADD_STAT(numRdHitDirty, statistics::units::Count::get(), "stat"), + ADD_STAT(numRdHitClean, statistics::units::Count::get(), "stat"), + ADD_STAT(numWrHitDirty, statistics::units::Count::get(), "stat"), + ADD_STAT(numWrHitClean, statistics::units::Count::get(), "stat"), + + ADD_STAT(missRatio, statistics::units::Rate< + statistics::units::Count, statistics::units::Count>::get(), "stat"), + ADD_STAT(dirtyRatio, statistics::units::Rate< + statistics::units::Count, statistics::units::Count>::get(), "stat"), + ADD_STAT(missDistance, statistics::units::Count::get(), + "Miss distance, to track capacity misses"), + ADD_STAT(blkReuse, statistics::units::Count::get(), + "cache line block reuse before eviction"), + ADD_STAT(ticksBeforeEviction, statistics::units::Count::get(), + "how long the blk was in the cache") + +{ +} + +void +PolicyManager::PolicyManagerStats::regStats() +{ + using namespace statistics; + + avgORBLen.precision(4); + avgLocRdQLenStrt.precision(2); + avgLocWrQLenStrt.precision(2); + avgFarRdQLenStrt.precision(2); + avgFarWrQLenStrt.precision(2); + + avgLocRdQLenEnq.precision(2); + avgLocWrQLenEnq.precision(2); + avgFarRdQLenEnq.precision(2); + avgFarWrQLenEnq.precision(2); + + avgPktLifeTime.precision(2); + avgPktLifeTimeRd.precision(2); + avgPktLifeTimeWr.precision(2); + avgPktORBTime.precision(2); + avgPktORBTimeRd.precision(2); + avgPktORBTimeWr.precision(2); + avgPktRespTime.precision(2); + avgPktRespTimeRd.precision(2); + avgPktRespTimeWr.precision(2); + avgTimeTagCheckRes.precision(2); + avgTimeTagCheckResRd.precision(2); + avgTimeTagCheckResWr.precision(2); + avgTimeTagCheckResRdH.precision(2); + avgTimeTagCheckResRdMC.precision(2); + avgTimeTagCheckResRdMD.precision(2); + avgTimeInLocRead.precision(2); + avgTimeInLocWrite.precision(2); + avgTimeInFarRead.precision(2); + avgTimeFarRdtoSend.precision(2); + avgTimeFarRdtoRecv.precision(2); + avgTimeFarWrtoSend.precision(2); + + readPktSize.init(ceilLog2(polMan.blockSize) + 1); + writePktSize.init(ceilLog2(polMan.blockSize) + 1); + + avgRdBWSys.precision(8); + avgWrBWSys.precision(8); + avgGap.precision(2); + + missRatio.precision(2); + dirtyRatio.precision(2); + + missDistance + .init(2048) + .flags(pdf | nozero); + + blkReuse + .init(128) + .flags(pdf | nozero); + + ticksBeforeEviction + .init(1024) + .flags(pdf | nozero); + + // Formula stats + avgRdBWSys = (bytesReadSys) / simSeconds; + avgWrBWSys = (bytesWrittenSys) / simSeconds; + + avgPktLifeTime = (totPktLifeTime) / (readReqs + writeReqs); + avgPktLifeTimeRd = (totPktLifeTimeRd) / (readReqs); + avgPktLifeTimeWr = (totPktLifeTimeWr) / (writeReqs); + + avgPktORBTime = (totPktORBTime) / (readReqs + writeReqs); + avgPktORBTimeRd = (totPktORBTimeRd) / (readReqs); + avgPktORBTimeWr = (totPktORBTimeWr) / (writeReqs); + + avgPktRespTime = (totPktRespTime) / (readReqs + writeReqs); + avgPktRespTimeRd = (totPktRespTimeRd) / (readReqs); + avgPktRespTimeWr = (totPktRespTimeWr) / (writeReqs); + + if (polMan.locMemPolicy == enums::Rambus || polMan.locMemPolicy == enums::RambusTagProbOpt) { + avgTimeTagCheckRes = (totTimeTagCheckRes) / (readReqs + writeReqs); + avgTimeInLocRead = (totTimeInLocRead) / (numRdHit); + } else { + avgTimeTagCheckRes = (totTimeInLocRead) / (readReqs + writeReqs); + avgTimeInLocRead = (totTimeInLocRead) / (readReqs + writeReqs); + } + + avgTimeTagCheckResRd = (totTimeTagCheckResRd) / (readReqs); + avgTimeTagCheckResWr = (totTimeTagCheckResWr) / (writeReqs); + + avgTimeTagCheckResRdH = (totTimeTagCheckResRdH) / (numRdHit); + avgTimeTagCheckResRdMC = (totTimeTagCheckResRdMC) / (numRdMissClean); + avgTimeTagCheckResRdMD = (totTimeTagCheckResRdMD) / (numRdMissDirty); + + + avgTimeInLocWrite = (totTimeInLocWrite) / (numRdMissClean + numRdMissDirty + writeReqs); + avgTimeInFarRead = (totTimeInFarRead) / (numRdMissClean + numRdMissDirty); + + avgTimeFarRdtoSend = (totTimeFarRdtoSend) / (sentFarRdPort); + avgTimeFarRdtoRecv = (totTimeFarRdtoRecv) / (sentFarRdPort); + avgTimeFarWrtoSend = (totTimeFarWrtoSend) / (sentFarWrPort); + + missRatio = (numTotMisses / (readReqs + writeReqs)) * 100; + dirtyRatio = ((numRdMissDirty + numWrMissDirty) / (readReqs + writeReqs)) * 100; + + avgGap = totGap / (readReqs + writeReqs); + +} + +Port & +PolicyManager::getPort(const std::string &if_name, PortID idx) +{ + panic_if(idx != InvalidPortID, "This object doesn't support vector ports"); + + // This is the name from the Python SimObject declaration (SimpleMemobj.py) + if (if_name == "port") { + return port; + } else if (if_name == "loc_req_port") { + return locReqPort; + } else if (if_name == "far_req_port") { + return farReqPort; + } else { + // pass it along to our super class + panic("PORT NAME ERROR !!!!\n"); + } +} + +DrainState +PolicyManager::drain() +{ + if (!ORB.empty() || !pktFarMemWrite.empty()) { + DPRINTF(Drain, "DRAM cache is not drained! Have %d in ORB and %d in " + "writeback queue.\n", ORB.size(), pktFarMemWrite.size()); + return DrainState::Draining; + } else { + return DrainState::Drained; + } +} + +void +PolicyManager::serialize(CheckpointOut &cp) const +{ + warn_if(numColdMisses > tagMetadataStore.size()*assoc, + "numColdMisses is more than the total blocks!"); + DPRINTF(ChkptRstrTest, "name: %s\n", "tagMetadataStore"+channelIndex); + + ScopedCheckpointSection sec(cp, "tagMetadataStore"+channelIndex); + paramOut(cp, "numValidEntries", numColdMisses); + int count = 0; + int invalids = 0; + for (auto const &set : tagMetadataStore) { + for (auto const way : set) { + if (way->validLine) { + ScopedCheckpointSection sec_entry(cp,csprintf("Entry%d", count++)); + paramOut(cp, "dirtyLine", way->dirtyLine); + paramOut(cp, "farMemAddr", way->farMemAddr); + paramOut(cp, "tag", way->tagDC); + paramOut(cp, "index", way->indexDC); + paramOut(cp, "counter", way->counter); + paramOut(cp, "tickEntered", way->tickEntered); + Tick lastTouchTick = replacementPolicy->getLastTouchTick(way->replacementData); + assert(lastTouchTick != MaxTick); + paramOut(cp, "lastTouchTick", lastTouchTick); + } else { + invalids++; + } + } + } + warn_if((tagMetadataStore.size()*assoc - numColdMisses) != invalids, + "Number of invalids did not match\n"); + DPRINTF(ChkptRstrTest, "invalids: %d\n", invalids); +} + +void +PolicyManager::unserialize(CheckpointIn &cp) +{ + DPRINTF(ChkptRstrTest, "name: %s\n", "tagMetadataStore"+channelIndex); + + ScopedCheckpointSection sec(cp, "tagMetadataStore"+channelIndex); + int num_entries = 0; + int countValid = 0; + paramIn(cp, "numValidEntries", num_entries); + for (int i = 0; i < num_entries; i++) { + ScopedCheckpointSection sec_entry(cp,csprintf("Entry%d", i)); + bool dirty; + Addr farAddr; + Addr tag; + Addr index; + unsigned counter; + uint64_t tickEntered; + Tick lastTouchTick; + + paramIn(cp, "dirtyLine", dirty); + paramIn(cp, "farMemAddr", farAddr); + paramIn(cp, "tag", tag); + paramIn(cp, "index", index); + paramIn(cp, "counter", counter); + paramIn(cp, "tickEntered", tickEntered); + paramIn(cp, "lastTouchTick", lastTouchTick); + + assert(getAddrRange().contains(farAddr)); + countValid++; + int way = -1; + + if (assoc == 1) { + way = findEmptyWay(index); + // once you stored LRU, come back here and call it instead of putting 0; + if (way ==-1) { + way = 0; // so it always works for direct-mapped + } + } + if (assoc > 1) { + Addr indexNew = returnIndexDC(farAddr, blockSize); + Addr tagNew = returnTagDC(farAddr, blockSize); + way = findMatchingWay(indexNew, tagNew); + if (way == noMatchingWay) { + way = getCandidateWay(indexNew); + } + assert(way != -1); + assert(way < assoc); + index = indexNew; + tag = tagNew; + } + + tagMetadataStore.at(index).at(way)->tagDC = tag; + tagMetadataStore.at(index).at(way)->indexDC = index; + tagMetadataStore.at(index).at(way)->validLine = true; + tagMetadataStore.at(index).at(way)->dirtyLine = dirty; + tagMetadataStore.at(index).at(way)->farMemAddr = farAddr; + tagMetadataStore.at(index).at(way)->counter = counter; + tagMetadataStore.at(index).at(way)->tickEntered = tickEntered; + replacementPolicy->setLastTouchTick( + tagMetadataStore.at(index).at(way)->replacementData, + lastTouchTick); + + DPRINTF(ChkptRstrTest, "%d, %d, %d, %d, %d, %d, %d, %d\n", + tagMetadataStore.at(index).at(way)->tagDC, + tagMetadataStore.at(index).at(way)->indexDC, + tagMetadataStore.at(index).at(way)->validLine, + tagMetadataStore.at(index).at(way)->dirtyLine, + tagMetadataStore.at(index).at(way)->farMemAddr, + tagMetadataStore.at(index).at(way)->counter, + tagMetadataStore.at(index).at(way)->tickEntered, + replacementPolicy->getLastTouchTick( + tagMetadataStore.at(index).at(way)->replacementData)); + + } +} + +int +PolicyManager::findEmptyWay(Addr index) +{ + for (int i=0; ivalidLine) { + return i; + } + } + + return -1; +} + +bool +PolicyManager::recvReadFlushBuffer(Addr addr) +{ + if (pktFarMemWrite.size() < (orbMaxSize / 2)) { + handleDirtyCacheLine(addr); + return true; + } + return false; +} + +} // namespace memory +} // namespace gem5 diff --git a/src/mem/policy_manager.hh b/src/mem/policy_manager.hh new file mode 100644 index 0000000000..a00ee3f5d8 --- /dev/null +++ b/src/mem/policy_manager.hh @@ -0,0 +1,556 @@ +/** + * @file + * DCacheCtrl declaration + */ + +#ifndef __POLICY_MANAGER_HH__ +#define __POLICY_MANAGER_HH__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "base/callback.hh" +#include "base/compiler.hh" +#include "base/logging.hh" +#include "base/statistics.hh" +#include "base/trace.hh" +#include "base/types.hh" +#include "enums/Policy.hh" +#include "enums/ReplPolicySetAssoc.hh" +#include "mem/cache/replacement_policies/base.hh" +#include "mem/cache/replacement_policies/replaceable_entry.hh" +#include "mem/mem_ctrl.hh" +#include "mem/mem_interface.hh" +#include "mem/packet.hh" +#include "mem/qport.hh" +#include "mem/request.hh" +#include "params/PolicyManager.hh" +#include "sim/clocked_object.hh" +#include "sim/cur_tick.hh" +#include "sim/eventq.hh" +#include "sim/system.hh" + +#define noMatchingWay 1000000 + +namespace gem5 +{ + +namespace memory +{ + +// class DRAMInterface; + +class PolicyManager : public AbstractMemory +{ + protected: + + class RespPortPolManager : public QueuedResponsePort + { + private: + + RespPacketQueue queue; + PolicyManager& polMan; + + public: + + RespPortPolManager(const std::string& name, PolicyManager& _polMan) + : QueuedResponsePort(name, queue), + queue(_polMan, *this, true), + polMan(_polMan) + { } + + protected: + + Tick recvAtomic(PacketPtr pkt) override + {return polMan.recvAtomic(pkt);} + + Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor) override + {return polMan.recvAtomicBackdoor(pkt, backdoor);} + + void recvFunctional(PacketPtr pkt) override + {polMan.recvFunctional(pkt);} + + bool recvTimingReq(PacketPtr pkt) override + {return polMan.recvTimingReq(pkt);} + + AddrRangeList getAddrRanges() const override + {return polMan.getAddrRanges();} + + }; + + class ReqPortPolManager : public RequestPort + { + public: + + ReqPortPolManager(const std::string& name, PolicyManager& _polMan) + : RequestPort(name, &_polMan), polMan(_polMan) + { } + + protected: + + void recvReqRetry(); + + bool recvTimingResp(PacketPtr pkt); + + private: + + PolicyManager& polMan; + + }; + + RespPortPolManager port; + ReqPortPolManager locReqPort; + ReqPortPolManager farReqPort; + + unsigned locBurstSize; + unsigned farBurstSize; + + // enums::Policy locMemPolicy; + + /** + * The following are basic design parameters of the unified + * DRAM cache controller, and are initialized based on parameter values. + * The rowsPerBank is determined based on the capacity, number of + * ranks and banks, the burst size, and the row buffer size. + */ + + // MemInterface* locMem; + AbstractMemory* locMem; + + /** Replacement policy */ + replacement_policy::Base* replacementPolicy; + + unsigned long long dramCacheSize; + unsigned blockSize; + unsigned assoc; + unsigned addrSize; + unsigned orbMaxSize; + unsigned orbSize; + unsigned crbMaxSize; + unsigned crbSize; + bool extreme; + bool alwaysHit; + bool alwaysDirty; + bool bypassDcache; + std::string channelIndex; + + /** + * Pipeline latency of the controller frontend. The frontend + * contribution is added to writes (that complete when they are in + * the write buffer) and reads that are serviced the write buffer. + */ + const Tick frontendLatency; + + /** + * Pipeline latency of the backend and PHY. Along with the + * frontend contribution, this latency is added to reads serviced + * by the memory. + */ + const Tick backendLatency; + + unsigned numColdMisses; + float cacheWarmupRatio; + // used to print an update + // whenever cache is warmed up + // by an additional 5% (hard-coded value) + float infoCacheWarmupRatio; + bool resetStatsWarmup; + + Tick prevArrival; + + std::unordered_set isInWriteQueue; + + /** A storage to keep the tag and metadata for the + * DRAM Cache entries. + */ + std::vector> tagMetadataStore; + + /** Different states a packet can transition from one + * to the other while it's process in the DRAM Cache Controller. + */ + enum reqState + { + start, + tagCheck, waitingTCtag, //WaitingTCdata, + locMemRead, waitingLocMemReadResp, locRdRespReady, + locMemWrite, waitingLocMemWriteResp, + farMemRead, waitingFarMemReadResp, + farMemWrite, waitingFarMemWriteResp + }; + + /** + * A class for the entries of the + * outstanding request buffer (ORB). + */ + class reqBufferEntry + { + public: + + bool validEntry; + Tick arrivalTick; + + // DRAM cache related metadata + Addr tagDC; + Addr indexDC; + int wayNum; + + // pointer to the outside world (ow) packet received from llc + const PacketPtr owPkt; + + enums::Policy pol; + reqState state; + + bool issued; + bool isHit; + bool conflict; + bool prevDirty; + // rcvdRdResp is only used for read misses, + // since the data response from a tag check + // may be received too late + // (after rd from far mem & write to loc). + // Note: writes responds are very quick, + // just an ack with a frontend latency only. + bool rcvdLocRdResp; + bool rcvdFarRdResp; + Addr dirtyLineAddr; + bool handleDirtyLine; + bool repetitiveReqRcvd; + + + // recording the tick when the req transitions into a new stats. + // The subtract between each two consecutive states entrance ticks, + // is the number of ticks the req spent in the proceeded state. + // The subtract between entrance and issuance ticks for each state, + // is the number of ticks for waiting time in that state. + Tick tagCheckEntered; + Tick tagCheckIssued; + Tick tagCheckExit; + Tick locRdEntered; + Tick locRdIssued; + Tick locRdExit; + Tick locWrEntered; + Tick locWrIssued; + Tick locWrExit; + Tick farRdEntered; + Tick farRdIssued; + Tick farRdExit; + + reqBufferEntry( + bool _validEntry, Tick _arrivalTick, + Addr _tagDC, Addr _indexDC, int _wayNum, + PacketPtr _owPkt, + enums::Policy _pol, reqState _state, + bool _issued, bool _isHit, bool _conflict, + bool _prevDirty, bool _rcvdLocRdResp, bool _rcvdFarRdResp, + Addr _dirtyLineAddr, bool _handleDirtyLine, + Tick _tagCheckEntered, Tick _tagCheckIssued, Tick _tagCheckExit, + Tick _locRdEntered, Tick _locRdIssued, Tick _locRdExit, + Tick _locWrEntered, Tick _locWrIssued, Tick _locWrExit, + Tick _farRdEntered, Tick _farRdIssued, Tick _farRdExit) + : + validEntry(_validEntry), arrivalTick(_arrivalTick), + tagDC(_tagDC), indexDC(_indexDC), wayNum(_wayNum), + owPkt( _owPkt), + pol(_pol), state(_state), + issued(_issued), isHit(_isHit), conflict(_conflict), + prevDirty(_prevDirty), rcvdLocRdResp(_rcvdLocRdResp), rcvdFarRdResp(_rcvdFarRdResp), + dirtyLineAddr(_dirtyLineAddr), handleDirtyLine(_handleDirtyLine), repetitiveReqRcvd(false), + tagCheckEntered(_tagCheckEntered), tagCheckIssued(_tagCheckIssued), tagCheckExit(_tagCheckExit), + locRdEntered(_locRdEntered), locRdIssued(_locRdIssued), locRdExit(_locRdExit), + locWrEntered(_locWrEntered), locWrIssued(_locWrIssued), locWrExit(_locWrExit), + farRdEntered(_farRdEntered), farRdIssued(_farRdIssued), farRdExit(_farRdExit) + { } + }; + + /** + * This is the outstanding request buffer (ORB) data + * structure, the main DS within the DRAM Cache + * Controller. The key is the address, for each key + * the map returns a reqBufferEntry which maintains + * the entire info related to that address while it's + * been processed in the DRAM Cache controller. + */ + std::map ORB; + + typedef std::pair timeReqPair; + /** + * This is the second important data structure + * within the DRAM cache controller which holds + * received packets that had conflict with some + * other address(s) in the DRAM Cache that they + * are still under process in the controller. + * Once thoes addresses are finished processing, + * Conflicting Requets Buffre (CRB) is consulted + * to see if any packet can be moved into the + * outstanding request buffer and start being + * processed in the DRAM cache controller. + */ + std::vector CRB; + + std::unordered_map capacityTracker; + uint64_t blksInserted; + + + /** + * This is a unified retry flag for both reads and writes. + * It helps remember if we have to retry a request when available. + */ + bool retryLLC; + bool retryLLCRepetitive; + bool retryLLCFarMemWr; + bool retryTagCheck; + bool retryLocMemRead; + bool retryFarMemRead; + bool retryLocMemWrite; + bool retryFarMemWrite; + + /** + * A queue for evicted dirty lines of DRAM cache, + * to be written back to the backing memory. + * These packets are not maintained in the ORB. + */ + std::deque pktFarMemWrite; + + // Maintenance Queues + std::deque pktTagCheck; // Used only for Rambus policy + std::deque pktLocMemRead; + std::deque pktLocMemWrite; + std::deque pktFarMemRead; + + // Maintenance variables + unsigned maxConf; + + AddrRangeList getAddrRanges(); + + // events + + // Used only for Rambus policy + void processTagCheckEvent(); + EventFunctionWrapper tagCheckEvent; + + void processLocMemReadEvent(); + EventFunctionWrapper locMemReadEvent; + + void processLocMemWriteEvent(); + EventFunctionWrapper locMemWriteEvent; + + void processFarMemReadEvent(); + EventFunctionWrapper farMemReadEvent; + + void processFarMemWriteEvent(); + EventFunctionWrapper farMemWriteEvent; + + // management functions + void setNextState(reqBufferEntry* orbEntry); + void handleNextState(reqBufferEntry* orbEntry); + void sendRespondToRequestor(PacketPtr pkt, Tick static_latency); + void printQSizes() {} + void handleRequestorPkt(PacketPtr pkt); + void handleRequestorPktAtomic(PacketPtr pkt); + void checkHitOrMiss(reqBufferEntry* orbEntry); + bool checkHitOrMissAtomic(unsigned index, unsigned way, PacketPtr pkt); + bool checkDirty(Addr index, int way); + void handleDirtyCacheLine(Addr dirtyLineAddr); + bool checkConflictInORB(PacketPtr pkt); + void checkConflictInCRB(reqBufferEntry* orbEntry); + bool resumeConflictingReq(reqBufferEntry* orbEntry); + void logStatsPolMan(reqBufferEntry* orbEntry); + void accessAndRespond(PacketPtr pkt, Tick static_latency); + PacketPtr getPacket(Addr addr, unsigned size, const MemCmd& cmd, Request::FlagsType flags = 0); + Tick accessLatency(); + bool findInORB(Addr addr); + unsigned findDupInORB(Addr addr); + + unsigned countTagCheckInORB(); + unsigned countLocRdInORB(); + unsigned countFarRdInORB(); + unsigned countLocWrInORB(); + unsigned countFarWr(); + + Addr returnIndexDC(Addr pkt_addr, unsigned size); + Addr returnTagDC(Addr pkt_addr, unsigned size); + int returnWayDC(Addr index, Addr tag); + int findMatchingWay(Addr index, Addr tag); + int getCandidateWay(Addr index); + int findEmptyWay(Addr index); + + // port management + void locMemRecvReqRetry(); + void farMemRecvReqRetry(); + + bool locMemRecvTimingResp(PacketPtr pkt); + bool farMemRecvTimingResp(PacketPtr pkt); + struct PolicyManagerStats : public statistics::Group + { + PolicyManagerStats(PolicyManager &polMan); + + void regStats() override; + + const PolicyManager &polMan; + + // All statistics that the model needs to capture + statistics::Scalar readReqs; + statistics::Scalar writeReqs; + + statistics::Scalar servicedByWrQ; + statistics::Scalar servicedByFB; + statistics::Scalar mergedWrBursts; + statistics::Scalar mergedWrPolManWB; + statistics::Scalar mergedWrLocMemFB; + + statistics::Scalar numRdRetry; + statistics::Scalar numWrRetry; + + statistics::Vector readPktSize; + statistics::Vector writePktSize; + + statistics::Scalar bytesReadWrQ; + statistics::Scalar bytesReadSys; + statistics::Scalar bytesWrittenSys; + + // Average bandwidth + statistics::Formula avgRdBWSys; + statistics::Formula avgWrBWSys; + + statistics::Scalar totGap; + statistics::Formula avgGap; + + // DRAM Cache Specific Stats + statistics::Average avgORBLen; + statistics::Average avgTagCheckQLenStrt; + statistics::Average avgLocRdQLenStrt; + statistics::Average avgLocWrQLenStrt; + statistics::Average avgFarRdQLenStrt; + statistics::Average avgFarWrQLenStrt; + + statistics::Average avgTagCheckQLenEnq; + statistics::Average avgLocRdQLenEnq; + statistics::Average avgLocWrQLenEnq; + statistics::Average avgFarRdQLenEnq; + statistics::Average avgFarWrQLenEnq; + + statistics::Scalar numWrBacks; + statistics::Scalar totNumConf; + statistics::Scalar totNumORBFull; + statistics::Scalar totNumCRBFull; + + statistics::Scalar maxNumConf; + + statistics::Scalar sentTagCheckPort; + statistics::Scalar failedTagCheckPort; + statistics::Scalar sentLocRdPort; + statistics::Scalar sentLocWrPort; + statistics::Scalar failedLocRdPort; + statistics::Scalar failedLocWrPort; + statistics::Scalar sentFarRdPort; + statistics::Scalar sentFarWrPort; + statistics::Scalar failedFarRdPort; + statistics::Scalar failedFarWrPort; + + statistics::Scalar totPktLifeTime; + statistics::Scalar totPktLifeTimeRd; + statistics::Scalar totPktLifeTimeWr; + statistics::Scalar totPktORBTime; + statistics::Scalar totPktORBTimeRd; + statistics::Scalar totPktORBTimeWr; + statistics::Scalar totPktRespTime; + statistics::Scalar totPktRespTimeRd; + statistics::Scalar totPktRespTimeWr; + statistics::Scalar totTimeTagCheckRes; + statistics::Scalar totTimeTagCheckResRd; + statistics::Scalar totTimeTagCheckResWr; + statistics::Scalar totTimeTagCheckResRdH; + statistics::Scalar totTimeTagCheckResRdMC; + statistics::Scalar totTimeTagCheckResRdMD; + statistics::Scalar totTimeInLocRead; + statistics::Scalar totTimeInLocWrite; + statistics::Scalar totTimeInFarRead; + statistics::Scalar totTimeFarRdtoSend; + statistics::Scalar totTimeFarRdtoRecv; + statistics::Scalar totTimeFarWrtoSend; + + statistics::Formula avgPktLifeTime; + statistics::Formula avgPktLifeTimeRd; + statistics::Formula avgPktLifeTimeWr; + statistics::Formula avgPktORBTime; + statistics::Formula avgPktORBTimeRd; + statistics::Formula avgPktORBTimeWr; + statistics::Formula avgPktRespTime; + statistics::Formula avgPktRespTimeRd; + statistics::Formula avgPktRespTimeWr; + statistics::Formula avgTimeTagCheckRes; + statistics::Formula avgTimeTagCheckResRd; + statistics::Formula avgTimeTagCheckResWr; + statistics::Formula avgTimeTagCheckResRdH; + statistics::Formula avgTimeTagCheckResRdMC; + statistics::Formula avgTimeTagCheckResRdMD; + statistics::Formula avgTimeInLocRead; + statistics::Formula avgTimeInLocWrite; + statistics::Formula avgTimeInFarRead; + statistics::Formula avgTimeFarRdtoSend; + statistics::Formula avgTimeFarRdtoRecv; + statistics::Formula avgTimeFarWrtoSend; + + statistics::Scalar numTotHits; + statistics::Scalar numTotMisses; + statistics::Scalar numColdMisses; + statistics::Scalar numColdMissesSet; + statistics::Scalar numHotMisses; + statistics::Scalar numRdMissClean; + statistics::Scalar numRdMissDirty; + statistics::Scalar numRdHit; + statistics::Scalar numWrMissClean; + statistics::Scalar numWrMissDirty; + statistics::Scalar numWrHit; + statistics::Scalar numRdHitDirty; + statistics::Scalar numRdHitClean; + statistics::Scalar numWrHitDirty; + statistics::Scalar numWrHitClean; + + statistics::Formula missRatio; + statistics::Formula dirtyRatio; + statistics::Histogram missDistance; + statistics::Histogram blkReuse; + statistics::Histogram ticksBeforeEviction; + + }; + + PolicyManagerStats polManStats; + + public: + + PolicyManager(const PolicyManagerParams &p); + + void init(); + + Port &getPort(const std::string &if_name, + PortID idx=InvalidPortID); + + // For preparing for checkpoints + DrainState drain() override; + + // Serializes the tag state so that we don't have to warm up each time. + void serialize(CheckpointOut &cp) const override; + void unserialize(CheckpointIn &cp) override; + + bool recvReadFlushBuffer(Addr addr) override; + + protected: + + Tick recvAtomic(PacketPtr pkt); + Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor); + void recvFunctional(PacketPtr pkt); + bool recvTimingReq(PacketPtr pkt); +}; + +} // namespace memory +} // namespace gem5 + +#endif //__POLICY_MANAGER_HH__ diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc index 5a5eaffa02..ea0ecc2e02 100644 --- a/src/mem/ruby/structures/CacheMemory.cc +++ b/src/mem/ruby/structures/CacheMemory.cc @@ -52,6 +52,7 @@ #include "mem/cache/replacement_policies/weighted_lru_rp.hh" #include "mem/ruby/protocol/AccessPermission.hh" #include "mem/ruby/system/RubySystem.hh" +#include "mem/simple_mem.hh" namespace gem5 { @@ -436,9 +437,20 @@ CacheMemory::recordCacheContents(int cntrl, CacheRecorder* tr) const if (request_type != RubyRequestType_NULL) { Tick lastAccessTick; lastAccessTick = m_cache[i][j]->getLastAccess(); - tr->addRecord(cntrl, m_cache[i][j]->m_Address, + // I want to get the data from the backing store if using + // access backing store + DataBlock data; + Addr addr = m_cache[i][j]->m_Address; + auto rs = params().ruby_system; + if (rs->getAccessBackingStore()) { + uint8_t *ptr = rs->getPhysMem(addr)->toHostAddr(addr); + data.setData(ptr, 0, rs->getBlockSizeBytes()); + } else { + data = m_cache[i][j]->getDataBlk(); + } + tr->addRecord(cntrl, addr, 0, request_type, lastAccessTick, - m_cache[i][j]->getDataBlk()); + data); warmedUpBlocks++; } } diff --git a/src/mem/ruby/structures/CacheMemory.hh b/src/mem/ruby/structures/CacheMemory.hh index a63bb02748..001d3cb152 100644 --- a/src/mem/ruby/structures/CacheMemory.hh +++ b/src/mem/ruby/structures/CacheMemory.hh @@ -69,7 +69,7 @@ namespace ruby class CacheMemory : public SimObject { public: - typedef RubyCacheParams Params; + PARAMS(RubyCache); typedef std::shared_ptr ReplData; CacheMemory(const Params &p); ~CacheMemory(); diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc index ae21dc95ad..69177c0e08 100644 --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -367,9 +367,18 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt) pkt->getAddr(), (MachineType)mem_interface_type); AbstractController *mem_interface = rs->m_abstract_controls[mem_interface_type][id.getNum()]; + // Tick latency; + // if (access_backing_store) { + // rs->getPhysMem(pkt->getAddr())->access(pkt); + // latency = 1000; + // } else { + // latency = mem_interface->recvAtomic(pkt); + // } + Tick latency = mem_interface->recvAtomic(pkt); if (access_backing_store) - rs->getPhysMem()->access(pkt); + rs->getPhysMem(pkt->getAddr())->access(pkt); + return latency; } @@ -411,7 +420,7 @@ RubyPort::MemResponsePort::recvFunctional(PacketPtr pkt) // The following command performs the real functional access. // This line should be removed once Ruby supplies the official version // of data. - rs->getPhysMem()->functionalAccess(pkt); + rs->getPhysMem(pkt->getAddr())->functionalAccess(pkt); } else { bool accessSucceeded = false; bool needsResponse = pkt->needsResponse(); @@ -632,7 +641,7 @@ RubyPort::MemResponsePort::hitCallback(PacketPtr pkt) auto dmem = owner.system->getDeviceMemory(pkt); dmem->access(pkt); } else if (owner.system->isMemAddr(pkt->getAddr())) { - rs->getPhysMem()->access(pkt); + rs->getPhysMem(pkt->getAddr())->access(pkt); } else { panic("Packet is in neither device nor system memory!"); } diff --git a/src/mem/ruby/system/RubySystem.cc b/src/mem/ruby/system/RubySystem.cc index b38c903b09..e30804df35 100644 --- a/src/mem/ruby/system/RubySystem.cc +++ b/src/mem/ruby/system/RubySystem.cc @@ -94,7 +94,15 @@ RubySystem::RubySystem(const Params &p) statistics::registerDumpCallback([this]() { collateStats(); }); // Create the profiler m_profiler = new Profiler(p, this); - m_phys_mem = p.phys_mem; + + // Set up the physical memories for the backing store + for (auto &mem : p.phys_mem) { + m_phys_mem.insert(mem->getAddrRange(), mem); + } + + fatal_if(m_access_backing_store && m_phys_mem.empty(), + "If using access backing store, a phys_mem must be provided to the " + "Ruby system."); } void @@ -218,10 +226,8 @@ RubySystem::memWriteback() } DPRINTF(RubyCacheTrace, "Cache Trace Complete\n"); - // If there is no dirty block, we don't need to flush the cache - if (m_cache_recorder->getNumRecords() == 0) - { - m_cooldown_enabled = false; + if (m_access_backing_store) { + // Nothing to flush if we're using access backing store. return; } diff --git a/src/mem/ruby/system/RubySystem.hh b/src/mem/ruby/system/RubySystem.hh index e16d699204..7e51d60d6c 100644 --- a/src/mem/ruby/system/RubySystem.hh +++ b/src/mem/ruby/system/RubySystem.hh @@ -37,6 +37,7 @@ #include +#include "base/addr_range_map.hh" #include "base/callback.hh" #include "base/output.hh" #include "mem/packet.hh" @@ -75,7 +76,13 @@ class RubySystem : public ClockedObject static bool getWarmupEnabled() { return m_warmup_enabled; } static bool getCooldownEnabled() { return m_cooldown_enabled; } - memory::SimpleMemory *getPhysMem() { return m_phys_mem; } + memory::SimpleMemory *getPhysMem(Addr addr) { + auto it = m_phys_mem.contains(addr); + panic_if (it == m_phys_mem.end(), + "Cannot find physical memory for address %#x", addr); + return it->second; + } + Cycles getStartCycle() { return m_start_cycle; } bool getAccessBackingStore() { return m_access_backing_store; } @@ -142,7 +149,7 @@ class RubySystem : public ClockedObject static bool m_warmup_enabled; static unsigned m_systems_to_warmup; static bool m_cooldown_enabled; - memory::SimpleMemory *m_phys_mem; + AddrRangeMap m_phys_mem; const bool m_access_backing_store; //std::vector m_networks; diff --git a/src/mem/ruby/system/RubySystem.py b/src/mem/ruby/system/RubySystem.py index 64e39bda4c..36b5d0dcf5 100644 --- a/src/mem/ruby/system/RubySystem.py +++ b/src/mem/ruby/system/RubySystem.py @@ -48,7 +48,7 @@ class RubySystem(ClockedObject): 64, "number of bits that a memory address requires" ) - phys_mem = Param.SimpleMemory(NULL, "") + phys_mem = VectorParam.SimpleMemory([], "Memories for backing store") system = Param.System(Parent.any, "system object") access_backing_store = Param.Bool( diff --git a/src/mem/xbar.cc b/src/mem/xbar.cc index 0d4b2fca97..ce1a294ae6 100644 --- a/src/mem/xbar.cc +++ b/src/mem/xbar.cc @@ -123,8 +123,8 @@ BaseXBar::calcPacketTiming(PacketPtr pkt, Tick header_delay) // do a quick sanity check to ensure the timings are not being // ignored, note that this specific value may cause problems for // slower interconnects - panic_if(pkt->headerDelay > sim_clock::as_int::us, - "Encountered header delay exceeding 1 us\n"); + // panic_if(pkt->headerDelay > sim_clock::as_int::us, + // "Encountered header delay exceeding 1 us\n"); if (pkt->hasData()) { // the payloadDelay takes into account the relative time to @@ -196,8 +196,8 @@ BaseXBar::Layer::tryTiming(SrcType* src_port) // for a retry from the peer if (state == BUSY || waitingForPeer != NULL) { // the port should not be waiting already - assert(std::find(waitingForLayer.begin(), waitingForLayer.end(), - src_port) == waitingForLayer.end()); + // assert(std::find(waitingForLayer.begin(), waitingForLayer.end(), + // src_port) == waitingForLayer.end()); // put the port at the end of the retry list waiting for the // layer to be freed up (and in the case of a busy peer, for diff --git a/src/python/SConscript b/src/python/SConscript index eaaea203f0..766b6edd62 100644 --- a/src/python/SConscript +++ b/src/python/SConscript @@ -191,6 +191,7 @@ PySource('gem5.components.memory', 'gem5/components/memory/memory.py') PySource('gem5.components.memory', 'gem5/components/memory/single_channel.py') PySource('gem5.components.memory', 'gem5/components/memory/multi_channel.py') PySource('gem5.components.memory', 'gem5/components/memory/hbm.py') +PySource('gem5.components.memory', 'gem5/components/memory/dcache.py') PySource('gem5.components.memory.dram_interfaces', 'gem5/components/memory/dram_interfaces/__init__.py') PySource('gem5.components.memory.dram_interfaces', diff --git a/src/python/gem5/components/memory/__init__.py b/src/python/gem5/components/memory/__init__.py index 546d5d98ed..2ea8c0651d 100644 --- a/src/python/gem5/components/memory/__init__.py +++ b/src/python/gem5/components/memory/__init__.py @@ -37,6 +37,9 @@ from .multi_channel import DualChannelDDR4_2400 from .multi_channel import DualChannelLPDDR3_1600 from .hbm import HBM2Stack +from .dcache import CascadeLakeCache +from .dcache import OracleCache +from .dcache import RambusCache try: from .dramsys import DRAMSysMem diff --git a/src/python/gem5/components/memory/dcache.py b/src/python/gem5/components/memory/dcache.py new file mode 100644 index 0000000000..56915f04a0 --- /dev/null +++ b/src/python/gem5/components/memory/dcache.py @@ -0,0 +1,190 @@ +# Copyright (c) 2022 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" DRAM Cache based memory system + Uses Policy Manager and two other memory systems +""" + +from .memory import ChanneledMemory +from .abstract_memory_system import AbstractMemorySystem +from ..boards.abstract_board import AbstractBoard +from math import log +from ...utils.override import overrides +from m5.objects import AddrRange, DRAMInterface, Port, PolicyManager, L2XBar, IOXBar +from typing import Type, Optional, Union, Sequence, Tuple, List +from .memory import _try_convert +from .dram_interfaces.hbm import TDRAM +from .dram_interfaces.ddr4 import DDR4_2400_8x8 +from .multi_channel import DualChannelDDR4_2400 +from .single_channel import SingleChannelDDR4_2400 + +class DCacheSystem(AbstractMemorySystem): + """ + This class creates a DRAM cache based memory system. + It can connect two memory systems with a DRAM cache + policy manager. + """ + + def __init__( + self, + loc_mem: Type[ChanneledMemory], + far_mem: Type[ChanneledMemory], + loc_mem_policy: [str] = None, + size: [str] = None, + cache_size: [str] = None, + ) -> None: + """ + :param loc_mem_policy: DRAM cache policy to be used + :param size: Optionally specify the size of the DRAM controller's + address space. By default, it starts at 0 and ends at the size of + the DRAM device specified + """ + super().__init__() + + self._size = size + + self.policy_manager = PolicyManager() + self.policy_manager.static_frontend_latency = "10ns" + self.policy_manager.static_backend_latency = "10ns" + + + self.policy_manager.loc_mem_policy = loc_mem_policy + self.policy_manager.bypass_dcache = False + + self.policy_manager.dram_cache_size = cache_size + self.policy_manager.cache_warmup_ratio = 0.95 + + self.loc_mem = loc_mem() + self.far_mem = far_mem() + + for dram in self.loc_mem._dram: + dram.in_addr_map = False + dram.null = True + #dram.range = AddrRange('1GiB') + + # TODO: this loc_mem in policy manager + # is a single DRAM interface, which probably + # needs to be updated for a multi-channel local + # memory, the stdlib component can then be updated. + self.policy_manager.loc_mem = self.loc_mem._dram[0] + + self.policy_manager.orb_max_size = 128 + + for dram in self.far_mem._dram: + dram.in_addr_map = False + dram.null = True + # DRAM interface DDR4_2400_16x4 + # by default has a write buffer size of 128 + dram.read_buffer_size = 64 + dram.write_buffer_size = 64 + + self._loc_mem_controller = self.loc_mem.get_memory_controllers()[0] + self._far_mem_controller = self.far_mem.get_memory_controllers()[0] + + self._loc_mem_controller.consider_oldest_write= True + self._loc_mem_controller.static_frontend_latency = "1ns" + self._loc_mem_controller.static_backend_latency = "1ns" + + self._far_mem_controller.static_frontend_latency = "1ns" + self._far_mem_controller.static_backend_latency = "1ns" + + self._loc_mem_controller.port = self.policy_manager.loc_req_port + self._far_mem_controller.port = self.policy_manager.far_req_port + + # If need to use XBar + #self.farMemXBar = L2XBar(width=64) + #self.nearMemXBar = L2XBar(width=64) + + #self.policy_manager.far_req_port = self.farMemXBar.cpu_side_ports + #self.policy_manager.loc_req_port = self.nearMemXBar.cpu_side_ports + + #for ctrl in self.loc_mem.get_memory_controllers(): + # self.nearMemXBar.mem_side_ports = ctrl.port + + #for ctrl in self.far_mem.get_memory_controllers(): + # self.farMemXBar.mem_side_ports = ctrl.port + + @overrides(AbstractMemorySystem) + def get_size(self) -> int: + return self._size + + @overrides(AbstractMemorySystem) + def set_memory_range(self, ranges: List[AddrRange]) -> None: + + self.policy_manager.range = ranges[0] + for dram in self.far_mem._dram: + dram.range = ranges[0] + for dram in self.loc_mem._dram: + dram.range = ranges[0] + + @overrides(AbstractMemorySystem) + def incorporate_memory(self, board: AbstractBoard) -> None: + pass + + @overrides(AbstractMemorySystem) + def get_memory_controllers(self): + return [self.policy_manager] + + @overrides(AbstractMemorySystem) + def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return [(self.policy_manager.range, self.policy_manager.port)] + +def SingleChannelTDRAM( + size: Optional[str] = None, +) -> AbstractMemorySystem: + if not size: + size = "256MiB" + return ChanneledMemory( + TDRAM, + 1, + 64, + size=size + ) + + +def CascadeLakeCache(cache_size) -> AbstractMemorySystem: + return DCacheSystem( + SingleChannelTDRAM, + SingleChannelDDR4_2400, + 'CascadeLakeNoPartWrs', + size='64GiB', + cache_size=cache_size) + +def OracleCache(cache_size) -> AbstractMemorySystem: + return DCacheSystem( + SingleChannelTDRAM, + SingleChannelDDR4_2400, + 'Oracle', + size='64GiB', + cache_size=cache_size) + +def RambusCache(cache_size) -> AbstractMemorySystem: + return DCacheSystem( + SingleChannelTDRAM, + SingleChannelDDR4_2400, + 'Rambus', + size='64GiB', + cache_size=cache_size) \ No newline at end of file diff --git a/src/python/gem5/components/memory/dram_interfaces/hbm.py b/src/python/gem5/components/memory/dram_interfaces/hbm.py index 5063c4d9e1..daece11fad 100644 --- a/src/python/gem5/components/memory/dram_interfaces/hbm.py +++ b/src/python/gem5/components/memory/dram_interfaces/hbm.py @@ -276,3 +276,91 @@ class HBM_2000_4H_1x64(DRAMInterface): write_buffer_size = 64 two_cycle_activate = True + +class TDRAM(DRAMInterface): + + # 64-bit interface for a single pseudo channel + device_bus_width = 32 + + # HBM2 supports BL4 + burst_length = 16 + + # size of channel in bytes, 4H stack of 8Gb dies is 4GiB per stack; + # with 16 pseudo channels, 256MiB per pseudo channel + device_size = "1GiB" + + device_rowbuffer_size = "2KiB" + + # 1x128 configuration + devices_per_rank = 1 + + ranks_per_channel = 1 + + banks_per_rank = 32 + + bank_groups_per_rank = 8 + + # 1000 MHz for 2Gbps DDR data rate + tCK = "0.5ns" + + # new + tTAGBURST = "0.5ns" + tRLFAST = "7.5ns" + tHM2DQ = "0ns" + tRTW_int = "1ns" + tRFBD = "1ns" + tRCD_FAST = "7.5ns" + enable_read_flush_buffer = True + flushBuffer_high_thresh_perc = 80 + + tRP = "14ns" + + tCCD_L = "2ns" + + tRCD = "12ns" + tRCD_WR = "6ns" + tCL = "18ns" + tCWL = "7ns" + tRAS = "28ns" + + tBURST = "2ns" + + # value for 2Gb device from JEDEC spec + tRFC = "220ns" + + # value for 2Gb device from JEDEC spec + tREFI = "3.9us" + + tWR = "14ns" + tRTP = "5ns" + tWTR = "4ns" + tWTR_L = "9ns" + tRTW = "18ns" + + #tAAD from RBus + tAAD = "1ns" + + # single rank device, set to 0 + tCS = "0ns" + + tRRD = "2ns" + tRRD_L = "2ns" + + # for a single pseudo channel + tXAW = "16ns" + activation_limit = 8 + + # 4tCK + tXP = "8ns" + + # start with tRFC + tXP -> 160ns + 8ns = 168ns + tXS = "216ns" + + page_policy = 'close' + + read_buffer_size = 64 + write_buffer_size = 64 + + two_cycle_activate = True + + addr_mapping = 'RoCoRaBaCh' diff --git a/src/python/gem5/components/memory/multi_channel.py b/src/python/gem5/components/memory/multi_channel.py index 1f14190c97..732dc1d49a 100644 --- a/src/python/gem5/components/memory/multi_channel.py +++ b/src/python/gem5/components/memory/multi_channel.py @@ -60,7 +60,6 @@ def DualChannelDDR4_2400( """ return ChanneledMemory(DDR4_2400_8x8, 2, 64, size=size) - def DualChannelLPDDR3_1600( size: Optional[str] = None, ) -> AbstractMemorySystem: diff --git a/src/python/gem5/components/processors/simple_switchable_processor.py b/src/python/gem5/components/processors/simple_switchable_processor.py index e3978412c3..fa8990d0c8 100644 --- a/src/python/gem5/components/processors/simple_switchable_processor.py +++ b/src/python/gem5/components/processors/simple_switchable_processor.py @@ -123,3 +123,39 @@ def switch(self): self.switch_to_processor(self._start_key) self._current_is_start = not self._current_is_start + +class MySimpleProcessor(SimpleSwitchableProcessor): + def __init__( + self, + starting_core_type: CPUTypes, + switch_core_type: CPUTypes, + num_cores: int, + isa = None, + ) -> None: + if not isa: + warn( + "An ISA for the SimpleSwitchableProcessor was not set. This " + "will result in usage of `runtime.get_runtime_isa` to obtain " + "the ISA. This function is deprecated and will be removed in " + "future releases of gem5. Please explicitly state the ISA " + "via the processor constructor." + ) + if num_cores <= 0: + raise AssertionError("Number of cores must be a positive integer!") + self._start_key = "switch" + self._switch_key = "start" + self._current_is_start = True + self._mem_mode = get_mem_mode(starting_core_type) + switchable_cores = { + self._start_key: [ + SimpleCore(cpu_type=starting_core_type, core_id=i, isa=isa) + for i in range(num_cores) + ], + self._switch_key: [ + SimpleCore(cpu_type=switch_core_type, core_id=i, isa=isa) + for i in range(num_cores) + ], + } + SwitchableProcessor.__init__(self, + switchable_cores=switchable_cores, starting_cores=self._start_key + ) \ No newline at end of file diff --git a/src/python/gem5/simulate/exit_event.py b/src/python/gem5/simulate/exit_event.py index cffe864f06..e150440979 100644 --- a/src/python/gem5/simulate/exit_event.py +++ b/src/python/gem5/simulate/exit_event.py @@ -43,6 +43,7 @@ class ExitEvent(Enum): FAIL = "fail" # An exit because the simulation has failed. CHECKPOINT = "checkpoint" # An exit to load a checkpoint. SCHEDULED_TICK = "scheduled tick exit" + SCHEDULED_TICK_PROGRESS = "progress update" MAX_TICK = "max tick" # An exit due to a maximum tick value being met. USER_INTERRUPT = ( # An exit due to a user interrupt (e.g., cntr + c) "user interupt" @@ -53,6 +54,7 @@ class ExitEvent(Enum): PERF_COUNTER_DISABLE = "performance counter disabled" PERF_COUNTER_RESET = "performance counter reset" PERF_COUNTER_INTERRUPT = "performance counter interrupt" + CACHE_WARMUP = "dram cache is warmed up" @classmethod def translate_exit_status(cls, exit_string: str) -> "ExitEvent": @@ -82,6 +84,8 @@ def translate_exit_status(cls, exit_string: str) -> "ExitEvent": return ExitEvent.MAX_TICK elif exit_string == "Tick exit reached": return ExitEvent.SCHEDULED_TICK + elif exit_string == "progress_update": + return ExitEvent.SCHEDULED_TICK_PROGRESS elif exit_string == "switchcpu": return ExitEvent.SWITCHCPU elif exit_string == "m5_fail instruction encountered": @@ -102,6 +106,9 @@ def translate_exit_status(cls, exit_string: str) -> "ExitEvent": return ExitEvent.PERF_COUNTER_RESET elif exit_string == "performance counter interrupt": return ExitEvent.PERF_COUNTER_INTERRUPT + elif exit_string == "cacheIsWarmedup": + # This is for the DRAM cache warmup + return ExitEvent.CACHE_WARMUP elif exit_string.endswith("will terminate the simulation.\n"): # This is for the traffic generator exit event return ExitEvent.EXIT diff --git a/src/python/gem5/simulate/simulator.py b/src/python/gem5/simulate/simulator.py index e355d200ad..9b5c7f552a 100644 --- a/src/python/gem5/simulate/simulator.py +++ b/src/python/gem5/simulate/simulator.py @@ -277,6 +277,7 @@ def print_hello() -> bool: ExitEvent.USER_INTERRUPT: exit_generator(), ExitEvent.MAX_TICK: exit_generator(), ExitEvent.SCHEDULED_TICK: exit_generator(), + ExitEvent.SCHEDULED_TICK_PROGRESS: exit_generator(), ExitEvent.SIMPOINT_BEGIN: warn_default_decorator( reset_stats_generator, "simpoint begin", diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py index f2c1b30def..96695ec1e6 100644 --- a/src/python/m5/SimObject.py +++ b/src/python/m5/SimObject.py @@ -38,6 +38,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from builtins import print import sys from types import FunctionType, MethodType, ModuleType from functools import wraps @@ -1075,7 +1076,11 @@ def find_all(self, ptype): if issubclass(pdesc.ptype, ptype): match_obj = self._values[pname] if not isproxy(match_obj) and not isNullPointer(match_obj): - all[match_obj] = True + if isinstance(match_obj, SimObjectVector): + for obj in match_obj: + all[obj] = True + else: + all[match_obj] = True # Also make sure to sort the keys based on the objects' path to # ensure that the order is the same on all hosts return sorted(all.keys(), key=lambda o: o.path()), True @@ -1231,7 +1236,9 @@ def getCCParams(self): # necessary to construct it. Does *not* recursively create # children. def getCCObject(self): + # print(self) if not self._ccObject: + # print(f"{self}: Doing initial creation") # Make sure this object is in the configuration hierarchy if not self._parent and not isRoot(self): raise RuntimeError("Attempt to instantiate orphan node") @@ -1241,10 +1248,14 @@ def getCCObject(self): if not self.abstract: params = self.getCCParams() self._ccObject = params.create() + # print(f"{self}: Actually created") + # else: + # print("I am abstract?") elif self._ccObject == -1: raise RuntimeError( - f"{self.path()}: Cycle found in configuration hierarchy." + "%s: Cycle found in configuration hierarchy." % self.path() ) + # print(f"retuning {self._ccObject}") return self._ccObject def descendants(self): diff --git a/state_machine b/state_machine new file mode 100644 index 0000000000..3c8e45e42b --- /dev/null +++ b/state_machine @@ -0,0 +1,32 @@ +STATE 0 0 LINEAR 70 0 33554368 64 500 500 33554368 +STATE 1 1000000 IDLE +STATE 2 0 EXIT +STATE 3 0 LINEAR 70 0 67108800 64 500 500 67108800 +STATE 4 1000000 IDLE +STATE 5 0 EXIT +STATE 6 1000000 IDLE +INIT 0 +TRANSITION 0 1 1 +TRANSITION 1 2 1 +TRANSITION 2 3 1 +TRANSITION 3 4 1 +TRANSITION 4 5 1 +TRANSITION 5 6 1 +TRANSITION 6 6 1 + +# STATE 0 0 LINEAR 50 0 960 64 500 500 1024 +# STATE 1 1000000 IDLE +# STATE 2 0 EXIT +# STATE 3 0 RANDOM 50 0 33554432 64 500 500 16777216 +# STATE 4 1000000 IDLE +# STATE 5 0 EXIT +# STATE 6 1000000 IDLE +# INIT 0 +# TRANSITION 0 1 1 +# TRANSITION 1 2 1 +# TRANSITION 2 3 1 +# TRANSITION 3 4 1 +# TRANSITION 4 5 1 +# TRANSITION 5 6 1 +# TRANSITION 6 6 1 + diff --git a/test.sh b/test.sh new file mode 100755 index 0000000000..73ac2bc2fc --- /dev/null +++ b/test.sh @@ -0,0 +1,937 @@ + + + +######### no-dc + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D CascadeLakeNoPartWrs 1 0 0 1 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/qdel/no-dc/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C CascadeLakeNoPartWrs 1 0 0 1 & + + +######### no-dc link 750 + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D CascadeLakeNoPartWrs 1 1 1875 1 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C CascadeLakeNoPartWrs 1 1 1875 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/no-dc/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C CascadeLakeNoPartWrs 1 1 1875 1 & + + +######### tdram link 750 + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D RambusTagProbOpt 1 1 1875 0 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C RambusTagProbOpt 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/tdram/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C RambusTagProbOpt 1 1 1875 0 & + +######### cascade link 750 + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D CascadeLakeNoPartWrs 1 1 1875 0 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C CascadeLakeNoPartWrs 1 1 1875 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/750/cascade/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C CascadeLakeNoPartWrs 1 1 1875 0 & + + +######### tdram 32 way + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D RambusTagProbOpt 32 0 0 0 & + + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C RambusTagProbOpt 32 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/32/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C RambusTagProbOpt 32 0 0 0 & + +######### tdram 4 way + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D RambusTagProbOpt 4 0 0 0 & + + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C RambusTagProbOpt 4 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/4/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C RambusTagProbOpt 4 0 0 0 & + +######### tdram 2 way + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D RambusTagProbOpt 2 0 0 0 & + + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C RambusTagProbOpt 2 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/2/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C RambusTagProbOpt 2 0 0 0 & + + +######### tdram 16 way + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D RambusTagProbOpt 16 0 0 0 & + + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C RambusTagProbOpt 16 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/16/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C RambusTagProbOpt 16 0 0 0 & + + + +######### tdram 8 way + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D RambusTagProbOpt 8 0 0 0 & + + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C RambusTagProbOpt 8 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/set-assoc/tdram/8/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C RambusTagProbOpt 8 0 0 0 & + +######### no-dc link 500 + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D CascadeLakeNoPartWrs 1 1 1250 1 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C CascadeLakeNoPartWrs 1 1 1250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/no-dc/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C CascadeLakeNoPartWrs 1 1 1250 1 & + + +######### tdram link 500 + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D RambusTagProbOpt 1 1 1250 0 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C RambusTagProbOpt 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/tdram/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C RambusTagProbOpt 1 1 1250 0 & + +######### cascade link 500 + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D CascadeLakeNoPartWrs 1 1 1250 0 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C CascadeLakeNoPartWrs 1 1 1250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/500/cascade/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C CascadeLakeNoPartWrs 1 1 1250 0 & + + +######### no-dc link 250 + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D CascadeLakeNoPartWrs 1 1 625 1 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C CascadeLakeNoPartWrs 1 1 625 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/no-dc/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C CascadeLakeNoPartWrs 1 1 625 1 & + + + +######### tdram link 250 + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D RambusTagProbOpt 1 1 625 0 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C RambusTagProbOpt 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/tdram/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C RambusTagProbOpt 1 1 625 0 & + + + +######### cascade link 250 + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D CascadeLakeNoPartWrs 1 1 625 0 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C CascadeLakeNoPartWrs 1 1 625 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/250/cascade/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C CascadeLakeNoPartWrs 1 1 625 0 & + + + +######### no-dc link 100 + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D CascadeLakeNoPartWrs 1 1 250 1 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C CascadeLakeNoPartWrs 1 1 250 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/no-dc/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C CascadeLakeNoPartWrs 1 1 250 1 & + + +######### tdram link 100 + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D RambusTagProbOpt 1 1 250 0 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C RambusTagProbOpt 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/tdram/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C RambusTagProbOpt 1 1 250 0 & + + +######### cascade link 100 + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D CascadeLakeNoPartWrs 1 1 250 0 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C CascadeLakeNoPartWrs 1 1 250 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/100/cascade/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C CascadeLakeNoPartWrs 1 1 250 0 & + + + + +######### no-dc link 50 + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D CascadeLakeNoPartWrs 1 1 125 1 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C CascadeLakeNoPartWrs 1 1 125 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/no-dc/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C CascadeLakeNoPartWrs 1 1 125 1 & + + +######### tdram link 50 + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D RambusTagProbOpt 1 1 125 0 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C RambusTagProbOpt 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/tdram/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C RambusTagProbOpt 1 1 125 0 & + + + +######### cascade link 50 + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D CascadeLakeNoPartWrs 1 1 125 0 & +#build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D CascadeLakeNoPartWrs 1 1 125 0 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C CascadeLakeNoPartWrs 1 1 125 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/link/50/cascade/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C CascadeLakeNoPartWrs 1 1 125 0 & + + +######### no-dc + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D CascadeLakeNoPartWrs 1 0 0 1 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C CascadeLakeNoPartWrs 1 0 0 1 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/no-dc/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C CascadeLakeNoPartWrs 1 0 0 1 & + + + +######### cascade + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D CascadeLakeNoPartWrs 1 0 0 0 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/cascade/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C CascadeLakeNoPartWrs 1 0 0 0 & + + + +######### TDRAM-NP + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D Rambus 1 0 0 0 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C Rambus 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram-np/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C Rambus 1 0 0 0 & + + +######### oracle + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D Oracle 1 0 0 0 & + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C Oracle 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/oracle/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C Oracle 1 0 0 0 & + + + +####### tdram + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D RambusTagProbOpt 1 0 0 0 & + + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C RambusTagProbOpt 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/result-resub/8-ch/baseline/tdram/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C RambusTagProbOpt 1 0 0 0 & + +######## count pc ######## + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/gapbs/25/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 25 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 25 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/gapbs/25/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 25 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/gapbs/25/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 25 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 25 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/gapbs/25/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 25 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/npb/D/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt D CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/npb/D/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg D CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/npb/D/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft D CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/npb/D/is configs-npb-gapbs-chkpt-restore/restore_both.py is D CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/npb/D/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu D CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/npb/D/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg D CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/npb/D/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp D CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/npb/D/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua D CascadeLakeNoPartWrs 1 0 0 0 & + + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/gapbs/22/bc configs-npb-gapbs-chkpt-restore/restore_both.py bc 22 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/restore_both.py bfs 22 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/gapbs/22/cc configs-npb-gapbs-chkpt-restore/restore_both.py cc 22 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/gapbs/22/pr configs-npb-gapbs-chkpt-restore/restore_both.py pr 22 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/restore_both.py sssp 22 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/gapbs/22/tc configs-npb-gapbs-chkpt-restore/restore_both.py tc 22 CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/npb/C/bt configs-npb-gapbs-chkpt-restore/restore_both.py bt C CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/npb/C/cg configs-npb-gapbs-chkpt-restore/restore_both.py cg C CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/npb/C/ft configs-npb-gapbs-chkpt-restore/restore_both.py ft C CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/npb/C/is configs-npb-gapbs-chkpt-restore/restore_both.py is C CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/npb/C/lu configs-npb-gapbs-chkpt-restore/restore_both.py lu C CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/npb/C/mg configs-npb-gapbs-chkpt-restore/restore_both.py mg C CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/npb/C/sp configs-npb-gapbs-chkpt-restore/restore_both.py sp C CascadeLakeNoPartWrs 1 0 0 0 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/pcCount-resub/8-ch/npb/C/ua configs-npb-gapbs-chkpt-restore/restore_both.py ua C CascadeLakeNoPartWrs 1 0 0 0 & + + +######## checkpoints ######## + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/gapbs/25/bc configs-npb-gapbs-chkpt-restore/checkpoint_both.py bc 25 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/gapbs/25/bfs configs-npb-gapbs-chkpt-restore/checkpoint_both.py bfs 25 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/gapbs/25/cc configs-npb-gapbs-chkpt-restore/checkpoint_both.py cc 25 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/gapbs/25/pr configs-npb-gapbs-chkpt-restore/checkpoint_both.py pr 25 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/gapbs/25/sssp configs-npb-gapbs-chkpt-restore/checkpoint_both.py sssp 25 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/gapbs/25/tc configs-npb-gapbs-chkpt-restore/checkpoint_both.py tc 25 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/npb/D/bt configs-npb-gapbs-chkpt-restore/checkpoint_both.py bt D & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/npb/D/cg configs-npb-gapbs-chkpt-restore/checkpoint_both.py cg D & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/npb/D/ft configs-npb-gapbs-chkpt-restore/checkpoint_both.py ft D & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/npb/D/is configs-npb-gapbs-chkpt-restore/checkpoint_both.py is D & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/npb/D/lu configs-npb-gapbs-chkpt-restore/checkpoint_both.py lu D & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/npb/D/mg configs-npb-gapbs-chkpt-restore/checkpoint_both.py mg D & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/npb/D/sp configs-npb-gapbs-chkpt-restore/checkpoint_both.py sp D & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/npb/D/ua configs-npb-gapbs-chkpt-restore/checkpoint_both.py ua D & + + +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/gapbs/22/bc configs-npb-gapbs-chkpt-restore/checkpoint_both.py bc 22 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/gapbs/22/bfs configs-npb-gapbs-chkpt-restore/checkpoint_both.py bfs 22 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/gapbs/22/cc configs-npb-gapbs-chkpt-restore/checkpoint_both.py cc 22 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/gapbs/22/pr configs-npb-gapbs-chkpt-restore/checkpoint_both.py pr 22 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/gapbs/22/sssp configs-npb-gapbs-chkpt-restore/checkpoint_both.py sssp 22 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/gapbs/22/tc configs-npb-gapbs-chkpt-restore/checkpoint_both.py tc 22 & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/npb/C/bt configs-npb-gapbs-chkpt-restore/checkpoint_both.py bt C & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/npb/C/cg configs-npb-gapbs-chkpt-restore/checkpoint_both.py cg C & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/npb/C/ft configs-npb-gapbs-chkpt-restore/checkpoint_both.py ft C & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/npb/C/is configs-npb-gapbs-chkpt-restore/checkpoint_both.py is C & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/npb/C/lu configs-npb-gapbs-chkpt-restore/checkpoint_both.py lu C & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/npb/C/mg configs-npb-gapbs-chkpt-restore/checkpoint_both.py mg C & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/npb/C/sp configs-npb-gapbs-chkpt-restore/checkpoint_both.py sp C & +# build/X86_MESI_Two_Level/gem5.opt -re --outdir=/home/babaie/projects/TDRAM-resubmission/cpt-resub/8-ch/atomic/npb/C/ua configs-npb-gapbs-chkpt-restore/checkpoint_both.py ua C & diff --git a/traffGen.py b/traffGen.py new file mode 100644 index 0000000000..80c65a32d7 --- /dev/null +++ b/traffGen.py @@ -0,0 +1,131 @@ +from m5.objects import * +import m5 +import argparse +from m5.objects.DRAMInterface import * +from m5.objects.NVMInterface import * + +args = argparse.ArgumentParser() + +args.add_argument( + "traffic_mode", + type = str, + help = "Traffic type to use" +) + +args.add_argument( + "rd_prct", + type=int, + help="Read Percentage", +) + +args.add_argument( + "extreme", + type=int, + help="extreme", +) + +args.add_argument( + "hit_miss", + type=int, + help="hit_miss", +) + +args.add_argument( + "clean_dirty", + type=int, + help="clean_dirty", +) + +options = args.parse_args() + +system = System() +system.clk_domain = SrcClockDomain() +system.clk_domain.clock = "4GHz" +system.clk_domain.voltage_domain = VoltageDomain() +system.mem_mode = 'timing' + +system.generator = PyTrafficGen() + +system.mem_ctrl = PolicyManager(range=AddrRange('3GiB')) + +system.mem_ctrl.orb_max_size = 128 +system.mem_ctrl.assoc = 8 +system.mem_ctrl.static_frontend_latency = "10ns" +system.mem_ctrl.static_backend_latency = "10ns" + +system.loc_mem_ctrl = MemCtrl() +system.loc_mem_ctrl.dram = TDRAM(range=AddrRange('3GiB'), in_addr_map=False, null=True) +system.mem_ctrl.loc_mem_policy = 'RambusTagProbOpt' + +system.mem_ctrl.loc_mem = system.loc_mem_ctrl.dram +system.loc_mem_ctrl.static_frontend_latency = "1ns" +system.loc_mem_ctrl.static_backend_latency = "1ns" +system.loc_mem_ctrl.static_frontend_latency_tc = "0ns" +system.loc_mem_ctrl.static_backend_latency_tc = "0ns" +system.loc_mem_ctrl.consider_oldest_write = True +system.loc_mem_ctrl.oldest_write_age_threshold = 2500000 + +system.far_mem_ctrl = MemCtrl() +system.far_mem_ctrl.dram = DDR4_2400_16x4(range=AddrRange('3GiB'),in_addr_map=False, null=True) +system.far_mem_ctrl.dram.read_buffer_size = 64 +system.far_mem_ctrl.dram.write_buffer_size = 64 +system.far_mem_ctrl.static_frontend_latency = "1ns" +system.far_mem_ctrl.static_backend_latency = "1ns" + +if options.extreme == 1: + system.mem_ctrl.extreme = True +else : + system.mem_ctrl.extreme = False + +if options.hit_miss == 1: + system.mem_ctrl.always_hit = True +else : + system.mem_ctrl.always_hit = False + +if options.clean_dirty == 1: + system.mem_ctrl.always_dirty = True +else : + system.mem_ctrl.always_dirty = False + +system.mem_ctrl.dram_cache_size = "128MiB" + +system.generator.port = system.mem_ctrl.port +system.loc_mem_ctrl.port = system.mem_ctrl.loc_req_port +system.far_mem_ctrl.port = system.mem_ctrl.far_req_port + +def createRandomTraffic(tgen): + yield tgen.createRandom(10000000000, # duration + 0, # min_addr + AddrRange('3GiB').end, # max_adr + 64, # block_size + 1000, # min_period + 1000, # max_period + options.rd_prct, # rd_perc + 0) # data_limit + yield tgen.createExit(0) + +def createLinearTraffic(tgen): + yield tgen.createLinear(10000000000, # duration + 0, # min_addr + AddrRange('3GiB').end, # max_adr + 64, # block_size + 1000, # min_period + 1000, # max_period + options.rd_prct, # rd_perc + 0) # data_limit + yield tgen.createExit(0) + +root = Root(full_system=False, system=system) + +m5.instantiate() + +if options.traffic_mode == 'linear': + system.generator.start(createLinearTraffic(system.generator)) +elif options.traffic_mode == 'random': + system.generator.start(createRandomTraffic(system.generator)) +else: + print('Wrong traffic type! Exiting!') + exit() + +exit_event = m5.simulate() +print(f"Exit reason {exit_event.getCause()}") diff --git a/traffGen_stateMachine.py b/traffGen_stateMachine.py new file mode 100644 index 0000000000..0577fb7a0b --- /dev/null +++ b/traffGen_stateMachine.py @@ -0,0 +1,83 @@ +from m5.objects import * +import m5 +import argparse +from m5.objects.DRAMInterface import * +from m5.objects.NVMInterface import * + +args = argparse.ArgumentParser() + +args.add_argument( + "associativity", + type=int, + help="associativity", +) + +options = args.parse_args() + +system = System() +system.clk_domain = SrcClockDomain() +system.clk_domain.clock = "4GHz" +system.clk_domain.voltage_domain = VoltageDomain() +system.mem_mode = 'timing' + +system.generator = TrafficGen(config_file="state_machine") +system.generator.progress_check = "2ms" + +system.mem_ctrl = PolicyManager(range=AddrRange('4GiB')) + +system.mem_ctrl.orb_max_size = 8 +system.mem_ctrl.assoc = options.associativity +system.mem_ctrl.static_frontend_latency = "10ns" +system.mem_ctrl.static_backend_latency = "10ns" + +system.loc_mem_ctrl = MemCtrl() +system.loc_mem_ctrl.dram = TDRAM(range=AddrRange('4GiB'), in_addr_map=False, null=True) +system.mem_ctrl.loc_mem_policy = 'RambusTagProbOpt' + +system.mem_ctrl.loc_mem = system.loc_mem_ctrl.dram +system.loc_mem_ctrl.dram.read_buffer_size = 4 +system.loc_mem_ctrl.dram.write_buffer_size = 4 +system.loc_mem_ctrl.static_frontend_latency = "1ns" +system.loc_mem_ctrl.static_backend_latency = "1ns" +system.loc_mem_ctrl.static_frontend_latency_tc = "0ns" +system.loc_mem_ctrl.static_backend_latency_tc = "0ns" +system.loc_mem_ctrl.consider_oldest_write = True +system.loc_mem_ctrl.oldest_write_age_threshold = 2500000 +# system.loc_mem_ctrl.dram.tRLFAST = "32ns" +# system.loc_mem_ctrl.dram.tRCD_FAST = "32ns" + +system.far_mem_ctrl = MemCtrl() +system.far_mem_ctrl.dram = DDR4_2400_16x4(range=AddrRange('4GiB'),in_addr_map=False, null=True) +system.far_mem_ctrl.dram.read_buffer_size = 4 +system.far_mem_ctrl.dram.write_buffer_size = 4 +system.far_mem_ctrl.static_frontend_latency = "1ns" +system.far_mem_ctrl.static_backend_latency = "1ns" + +system.mem_ctrl.dram_cache_size = "16MiB" + +system.generator.port = system.mem_ctrl.port +system.loc_mem_ctrl.port = system.mem_ctrl.loc_req_port +system.far_mem_ctrl.port = system.mem_ctrl.far_req_port + +root = Root(full_system=False, system=system) + +m5.instantiate() + +exitSimCount = 0 + +while True: + exit_event = m5.simulate() + print(f"Exit reason {exit_event.getCause()}") + if exit_event.getCause().endswith("will terminate the simulation.\n") and exitSimCount == 0: + print("here0") + m5.stats.dump() + m5.stats.reset() + exitSimCount = exitSimCount + 1 + + elif exit_event.getCause().endswith("will terminate the simulation.\n") and exitSimCount == 1: + print("here1") + break + +print(f"Exit reason {exit_event.getCause()}") +print("here2") +