From c73b7e50d06a8313174de0133ea4050ccb5959d9 Mon Sep 17 00:00:00 2001 From: Tiago Muck Date: Thu, 7 Feb 2019 13:37:40 -0600 Subject: [PATCH] mem: Snoop filter support for large systems Changed SnoopMask to use std::bitset instead of uint64 so we can simulate larger systems without having to workaround limitations on the number of ports. No noticeable performance drop was observed after this change. The size of the bitset is currently set to 256 which should fit most needs. Change-Id: I216882300500e2dcb789889756e73a1033271621 Signed-off-by: Tiago Muck Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/18791 Reviewed-by: Daniel Carvalho Reviewed-by: Nikos Nikoleris Maintainer: Nikos Nikoleris Tested-by: kokoro --- src/mem/snoop_filter.cc | 41 +++++++++++++++++++++++------------------ src/mem/snoop_filter.hh | 21 +++++++++++---------- 2 files changed, 34 insertions(+), 28 deletions(-) diff --git a/src/mem/snoop_filter.cc b/src/mem/snoop_filter.cc index 3e1dae6c7..8168c3d91 100644 --- a/src/mem/snoop_filter.cc +++ b/src/mem/snoop_filter.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2017 ARM Limited + * Copyright (c) 2013-2017,2019 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -49,11 +49,13 @@ #include "debug/SnoopFilter.hh" #include "sim/system.hh" +const int SnoopFilter::SNOOP_MASK_SIZE; + void SnoopFilter::eraseIfNullEntry(SnoopFilterCache::iterator& sf_it) { SnoopItem& sf_item = sf_it->second; - if (!(sf_item.requested | sf_item.holder)) { + if ((sf_item.requested | sf_item.holder).none()) { cachedLocations.erase(sf_it); DPRINTF(SnoopFilter, "%s: Removed SF entry.\n", __func__); @@ -96,8 +98,7 @@ SnoopFilter::lookupRequest(const Packet* cpkt, const SlavePort& slave_port) totRequests++; if (is_hit) { - // Single bit set -> value is a power of two - if (isPow2(interested)) + if (interested.count() == 1) hitSingleRequests++; else hitMultiRequests++; @@ -114,8 +115,9 @@ SnoopFilter::lookupRequest(const Packet* cpkt, const SlavePort& slave_port) if (cpkt->needsResponse()) { if (!cpkt->cacheResponding()) { // Max one request per address per port - panic_if(sf_item.requested & req_port, "double request :( " \ - "SF value %x.%x\n", sf_item.requested, sf_item.holder); + panic_if((sf_item.requested & req_port).any(), + "double request :( SF value %x.%x\n", + sf_item.requested, sf_item.holder); // Mark in-flight requests to distinguish later on sf_item.requested |= req_port; @@ -126,7 +128,8 @@ SnoopFilter::lookupRequest(const Packet* cpkt, const SlavePort& slave_port) // to the CPU, already -> the response will not be seen by this // filter -> we do not need to keep the in-flight request, but make // sure that we know that that cluster has a copy - panic_if(!(sf_item.holder & req_port), "Need to hold the value!"); + panic_if((sf_item.holder & req_port).none(), + "Need to hold the value!"); DPRINTF(SnoopFilter, "%s: not marking request. SF value %x.%x\n", __func__, sf_item.requested, sf_item.holder); @@ -134,7 +137,7 @@ SnoopFilter::lookupRequest(const Packet* cpkt, const SlavePort& slave_port) } else { // if (!cpkt->needsResponse()) assert(cpkt->isEviction()); // make sure that the sender actually had the line - panic_if(!(sf_item.holder & req_port), "requester %x is not a " \ + panic_if((sf_item.holder & req_port).none(), "requester %x is not a " \ "holder :( SF value %x.%x\n", req_port, sf_item.requested, sf_item.holder); // CleanEvicts and Writebacks -> the sender and all caches above @@ -206,8 +209,8 @@ SnoopFilter::lookupSnoop(const Packet* cpkt) SnoopMask interested = (sf_item.holder | sf_item.requested); totSnoops++; - // Single bit set -> value is a power of two - if (isPow2(interested)) + + if (interested.count() == 1) hitSingleSnoops++; else hitMultiSnoops++; @@ -222,7 +225,7 @@ SnoopFilter::lookupSnoop(const Packet* cpkt) assert(cpkt->isWriteback() || cpkt->req->isUncacheable() || (cpkt->isInvalidate() == cpkt->needsWritable()) || cpkt->req->isCacheMaintenance()); - if (cpkt->isInvalidate() && !sf_item.requested) { + if (cpkt->isInvalidate() && sf_item.requested.none()) { // Early clear of the holder, if no other request is currently going on // @todo: This should possibly be updated even though we do not filter // upward snoops @@ -266,11 +269,12 @@ SnoopFilter::updateSnoopResponse(const Packet* cpkt, __func__, sf_item.requested, sf_item.holder); // The source should have the line - panic_if(!(sf_item.holder & rsp_mask), "SF value %x.%x does not have "\ - "the line\n", sf_item.requested, sf_item.holder); + panic_if((sf_item.holder & rsp_mask).none(), + "SF value %x.%x does not have the line\n", + sf_item.requested, sf_item.holder); // The destination should have had a request in - panic_if(!(sf_item.requested & req_mask), "SF value %x.%x missing "\ + panic_if((sf_item.requested & req_mask).none(), "SF value %x.%x missing "\ "the original request\n", sf_item.requested, sf_item.holder); // If the snoop response has no sharers the line is passed in @@ -287,7 +291,7 @@ SnoopFilter::updateSnoopResponse(const Packet* cpkt, // @todo Deal with invalidating responses sf_item.holder |= req_mask; sf_item.requested &= ~req_mask; - assert(sf_item.requested | sf_item.holder); + assert((sf_item.requested | sf_item.holder).any()); DPRINTF(SnoopFilter, "%s: new SF value %x.%x\n", __func__, sf_item.requested, sf_item.holder); } @@ -359,8 +363,9 @@ SnoopFilter::updateResponse(const Packet* cpkt, const SlavePort& slave_port) __func__, sf_item.requested, sf_item.holder); // Make sure we have seen the actual request, too - panic_if(!(sf_item.requested & slave_mask), "SF value %x.%x missing "\ - "request bit\n", sf_item.requested, sf_item.holder); + panic_if((sf_item.requested & slave_mask).none(), + "SF value %x.%x missing request bit\n", + sf_item.requested, sf_item.holder); sf_item.requested &= ~slave_mask; // Update the residency of the cache line. @@ -376,7 +381,7 @@ SnoopFilter::updateResponse(const Packet* cpkt, const SlavePort& slave_port) // Any other response implies that a cache above will have the // block. sf_item.holder |= slave_mask; - assert(sf_item.holder | sf_item.requested); + assert((sf_item.holder | sf_item.requested).any()); } DPRINTF(SnoopFilter, "%s: new SF value %x.%x\n", __func__, sf_item.requested, sf_item.holder); diff --git a/src/mem/snoop_filter.hh b/src/mem/snoop_filter.hh index 85cc75e9b..9c066155c 100644 --- a/src/mem/snoop_filter.hh +++ b/src/mem/snoop_filter.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2016 ARM Limited + * Copyright (c) 2013-2016,2019 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -45,6 +45,7 @@ #ifndef __MEM_SNOOP_FILTER_HH__ #define __MEM_SNOOP_FILTER_HH__ +#include #include #include @@ -86,6 +87,10 @@ */ class SnoopFilter : public SimObject { public: + + // Change for systems with more than 256 ports tracked by this object + static const int SNOOP_MASK_SIZE = 256; + typedef std::vector SnoopList; SnoopFilter (const SnoopFilterParams *p) : @@ -114,9 +119,9 @@ class SnoopFilter : public SimObject { } // make sure we can deal with this many ports - fatal_if(id > 8 * sizeof(SnoopMask), + fatal_if(id > SNOOP_MASK_SIZE, "Snoop filter only supports %d snooping ports, got %d\n", - 8 * sizeof(SnoopMask), id); + SNOOP_MASK_SIZE, id); } /** @@ -198,13 +203,9 @@ class SnoopFilter : public SimObject { /** * The underlying type for the bitmask we use for tracking. This - * limits the number of snooping ports supported per crossbar. For - * the moment it is an uint64_t to offer maximum - * scalability. However, it is possible to use e.g. a uint16_t or - * uint32_to slim down the footprint of the hash map (and - * ultimately improve the simulation performance). + * limits the number of snooping ports supported per crossbar. */ - typedef uint64_t SnoopMask; + typedef std::bitset SnoopMask; /** * Per cache line item tracking a bitmask of SlavePorts who have an @@ -314,7 +315,7 @@ SnoopFilter::maskToPortList(SnoopMask port_mask) const { SnoopList res; for (const auto& p : slavePorts) - if (port_mask & portToMask(*p)) + if ((port_mask & portToMask(*p)).any()) res.push_back(p); return res; } -- 2.30.2