From b9f8a548a1cbf07a72ddc42c98961755f203f583 Mon Sep 17 00:00:00 2001
From: Brandon Potter <brandon.potter@amd.com>
Date: Fri, 27 Apr 2018 14:56:11 -0400
Subject: [PATCH] gpu-compute: use X86ISA::TlbEntry over GpuTlbEntry

GpuTlbEntry was derived from a vanilla X86ISA::TlbEntry definition. It
wrapped the class and included an extra member "valid". This member was
intended to report on the validity of the entry, however it introduced
bugs when folks forgot to set field properly in the code. So, instead of
keeping the extra field which we might forget to set, we track validity by
using nullptr for invalid tlb entries (as the tlb entries are dynamically
allocated). This saves on the extra class definition and prevents bugs
creeping into the code since the checks are intrinsically tied into
accessing any of the X86ISA::TlbEntry members.

This changeset fixes the issues introduced by a8d030522, a4e722725, and
2a15bfd79.

Change-Id: I30ebe3ec223fb833f3795bf0403d0016ac9a8bc2
Reviewed-on: https://gem5-review.googlesource.com/10481
Reviewed-by: Brandon Potter <Brandon.Potter@amd.com>
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
---
 src/gpu-compute/compute_unit.cc  |  6 +--
 src/gpu-compute/gpu_tlb.cc       | 74 ++++++++++++++++++--------------
 src/gpu-compute/gpu_tlb.hh       | 25 +++--------
 src/gpu-compute/tlb_coalescer.cc |  9 ++--
 4 files changed, 55 insertions(+), 59 deletions(-)

diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc
index a46e965a5..aa4f0a322 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -1083,7 +1083,7 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
                safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
 
     // no PageFaults are permitted for data accesses
-    if (!translation_state->tlbEntry->valid) {
+    if (!translation_state->tlbEntry) {
         DTLBPort::SenderState *sender_state =
             safe_cast<DTLBPort::SenderState*>(translation_state->saved);
 
@@ -1095,8 +1095,6 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
                  pkt->req->getVaddr());
     }
 
-    assert(translation_state->tlbEntry->valid);
-
     // update the hitLevel distribution
     int hit_level = translation_state->hitLevel;
     computeUnit->hitsPerTLBLevel[hit_level]++;
@@ -1329,7 +1327,7 @@ ComputeUnit::ITLBPort::recvTimingResp(PacketPtr pkt)
     TheISA::GpuTLB::TranslationState *translation_state =
                  safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
 
-    bool success = translation_state->tlbEntry->valid;
+    bool success = translation_state->tlbEntry != nullptr;
     delete translation_state->tlbEntry;
     assert(!translation_state->ports.size());
     pkt->senderState = translation_state->saved;
diff --git a/src/gpu-compute/gpu_tlb.cc b/src/gpu-compute/gpu_tlb.cc
index 5691f3556..8b9bd4320 100644
--- a/src/gpu-compute/gpu_tlb.cc
+++ b/src/gpu-compute/gpu_tlb.cc
@@ -73,7 +73,7 @@ namespace X86ISA
         accessDistance = p->accessDistance;
         clock = p->clk_domain->clockPeriod();
 
-        tlb.assign(size, GpuTlbEntry());
+        tlb.assign(size, TlbEntry());
 
         freeList.resize(numSets);
         entryList.resize(numSets);
@@ -166,10 +166,10 @@ namespace X86ISA
         }
     }
 
-    GpuTlbEntry*
-    GpuTLB::insert(Addr vpn, GpuTlbEntry &entry)
+    TlbEntry*
+    GpuTLB::insert(Addr vpn, TlbEntry &entry)
     {
-        GpuTlbEntry *newEntry = nullptr;
+        TlbEntry *newEntry = nullptr;
 
         /**
          * vpn holds the virtual page address
@@ -222,7 +222,7 @@ namespace X86ISA
         return entry;
     }
 
-    GpuTlbEntry*
+    TlbEntry*
     GpuTLB::lookup(Addr va, bool update_lru)
     {
         int set = (va >> TheISA::PageShift) & setMask;
@@ -242,7 +242,7 @@ namespace X86ISA
 
         for (int i = 0; i < numSets; ++i) {
             while (!entryList[i].empty()) {
-                GpuTlbEntry *entry = entryList[i].front();
+                TlbEntry *entry = entryList[i].front();
                 entryList[i].pop_front();
                 freeList[i].push_back(entry);
             }
@@ -684,7 +684,7 @@ namespace X86ISA
             if (m5Reg.paging) {
                 DPRINTF(GPUTLB, "Paging enabled.\n");
                 //update LRU stack on a hit
-                GpuTlbEntry *entry = lookup(vaddr, true);
+                TlbEntry *entry = lookup(vaddr, true);
 
                 if (entry)
                     tlb_hit = true;
@@ -792,7 +792,7 @@ namespace X86ISA
             if (m5Reg.paging) {
                 DPRINTF(GPUTLB, "Paging enabled.\n");
                 // The vaddr already has the segment base applied.
-                GpuTlbEntry *entry = lookup(vaddr);
+                TlbEntry *entry = lookup(vaddr);
                 localNumTLBAccesses++;
 
                 if (!entry) {
@@ -830,9 +830,8 @@ namespace X86ISA
                             DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
                                     alignedVaddr, pte->paddr);
 
-                            GpuTlbEntry gpuEntry(
-                                p->pTable->pid(), alignedVaddr,
-                                pte->paddr, true);
+                            TlbEntry gpuEntry(p->pid(), alignedVaddr,
+                                              pte->paddr, false, false);
                             entry = insert(alignedVaddr, gpuEntry);
                         }
 
@@ -1078,11 +1077,13 @@ namespace X86ISA
         if (success) {
             lookup_outcome = TLB_HIT;
             // Put the entry in SenderState
-            GpuTlbEntry *entry = lookup(tmp_req->getVaddr(), false);
+            TlbEntry *entry = lookup(tmp_req->getVaddr(), false);
             assert(entry);
 
+            auto p = sender_state->tc->getProcessPtr();
             sender_state->tlbEntry =
-                new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
+                new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
+                             false, false);
 
             if (update_stats) {
                 // the reqCnt has an entry per level, so its size tells us
@@ -1134,7 +1135,7 @@ namespace X86ISA
      */
     void
     GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
-            GpuTlbEntry * tlb_entry, Mode mode)
+            TlbEntry * tlb_entry, Mode mode)
     {
         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
         uint32_t flags = pkt->req->getFlags();
@@ -1180,7 +1181,7 @@ namespace X86ISA
         ThreadContext *tc = sender_state->tc;
         Mode mode = sender_state->tlbMode;
 
-        GpuTlbEntry *local_entry, *new_entry;
+        TlbEntry *local_entry, *new_entry;
 
         if (tlb_outcome == TLB_HIT) {
             DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
@@ -1347,10 +1348,10 @@ namespace X86ISA
                         pte->paddr);
 
                 sender_state->tlbEntry =
-                    new GpuTlbEntry(0, virtPageAddr, pte->paddr, true);
+                    new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false,
+                                 false);
             } else {
-                sender_state->tlbEntry =
-                    new GpuTlbEntry(0, 0, 0, false);
+                sender_state->tlbEntry = nullptr;
             }
 
             handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
@@ -1427,7 +1428,7 @@ namespace X86ISA
         Mode mode = sender_state->tlbMode;
         Addr vaddr = pkt->req->getVaddr();
 
-        GpuTlbEntry *local_entry, *new_entry;
+        TlbEntry *local_entry, *new_entry;
 
         if (tlb_outcome == TLB_HIT) {
             DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
@@ -1461,13 +1462,18 @@ namespace X86ISA
                 "while paddr was %#x.\n", local_entry->vaddr,
                 local_entry->paddr);
 
-        // Do paging checks if it's a normal functional access.  If it's for a
-        // prefetch, then sometimes you can try to prefetch something that won't
-        // pass protection. We don't actually want to fault becuase there is no
-        // demand access to deem this a violation.  Just put it in the TLB and
-        // it will fault if indeed a future demand access touches it in
-        // violation.
-        if (!sender_state->prefetch && sender_state->tlbEntry->valid)
+        /**
+         * Do paging checks if it's a normal functional access.  If it's for a
+         * prefetch, then sometimes you can try to prefetch something that
+         * won't pass protection. We don't actually want to fault becuase there
+         * is no demand access to deem this a violation.  Just put it in the
+         * TLB and it will fault if indeed a future demand access touches it in
+         * violation.
+         *
+         * This feature could be used to explore security issues around
+         * speculative memory accesses.
+         */
+        if (!sender_state->prefetch && sender_state->tlbEntry)
             pagingProtectionChecks(tc, pkt, local_entry, mode);
 
         int page_size = local_entry->size();
@@ -1550,8 +1556,8 @@ namespace X86ISA
                             pte->paddr);
 
                     sender_state->tlbEntry =
-                        new GpuTlbEntry(0, virt_page_addr,
-                                        pte->paddr, true);
+                        new TlbEntry(p->pid(), virt_page_addr,
+                                     pte->paddr, false, false);
                 } else {
                     // If this was a prefetch, then do the normal thing if it
                     // was a successful translation.  Otherwise, send an empty
@@ -1562,13 +1568,13 @@ namespace X86ISA
                                 pte->paddr);
 
                         sender_state->tlbEntry =
-                            new GpuTlbEntry(0, virt_page_addr,
-                                            pte->paddr, true);
+                            new TlbEntry(p->pid(), virt_page_addr,
+                                         pte->paddr, false, false);
                     } else {
                         DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
                                 alignedVaddr);
 
-                        sender_state->tlbEntry = new GpuTlbEntry();
+                        sender_state->tlbEntry = nullptr;
 
                         return;
                     }
@@ -1578,13 +1584,15 @@ namespace X86ISA
             DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
                     tlb->lookup(pkt->req->getVaddr()));
 
-            GpuTlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
+            TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
                                              update_stats);
 
             assert(entry);
 
+            auto p = sender_state->tc->getProcessPtr();
             sender_state->tlbEntry =
-                new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
+                new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
+                             false, false);
         }
         // This is the function that would populate pkt->req with the paddr of
         // the translation. But if no translation happens (i.e Prefetch fails)
diff --git a/src/gpu-compute/gpu_tlb.hh b/src/gpu-compute/gpu_tlb.hh
index 7819d487b..f479eb6ce 100644
--- a/src/gpu-compute/gpu_tlb.hh
+++ b/src/gpu-compute/gpu_tlb.hh
@@ -62,23 +62,12 @@ class ThreadContext;
 
 namespace X86ISA
 {
-    class GpuTlbEntry : public TlbEntry
-    {
-      public:
-        GpuTlbEntry(Addr asn, Addr _vaddr, Addr _paddr, bool _valid)
-          : TlbEntry(asn, _vaddr, _paddr, false, false), valid(_valid) { }
-
-        GpuTlbEntry() : TlbEntry(), valid(false) { }
-
-        bool valid;
-    };
-
     class GpuTLB : public MemObject
     {
       protected:
         friend class Walker;
 
-        typedef std::list<GpuTlbEntry*> EntryList;
+        typedef std::list<TlbEntry*> EntryList;
 
         uint32_t configAddress;
 
@@ -129,7 +118,7 @@ namespace X86ISA
         };
 
         void dumpAll();
-        GpuTlbEntry *lookup(Addr va, bool update_lru=true);
+        TlbEntry *lookup(Addr va, bool update_lru=true);
         void setConfigAddress(uint32_t addr);
 
       protected:
@@ -170,7 +159,7 @@ namespace X86ISA
          */
         bool accessDistance;
 
-        std::vector<GpuTlbEntry> tlb;
+        std::vector<TlbEntry> tlb;
 
         /*
          * It's a per-set list. As long as we have not reached
@@ -243,7 +232,7 @@ namespace X86ISA
         Tick doMmuRegRead(ThreadContext *tc, Packet *pkt);
         Tick doMmuRegWrite(ThreadContext *tc, Packet *pkt);
 
-        GpuTlbEntry *insert(Addr vpn, GpuTlbEntry &entry);
+        TlbEntry *insert(Addr vpn, TlbEntry &entry);
 
         // Checkpointing
         virtual void serialize(CheckpointOut& cp) const;
@@ -258,9 +247,9 @@ namespace X86ISA
         void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome);
 
         void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
-                                    GpuTlbEntry *tlb_entry, Mode mode);
+                                    TlbEntry *tlb_entry, Mode mode);
 
-        void updatePhysAddresses(Addr virt_page_addr, GpuTlbEntry *tlb_entry,
+        void updatePhysAddresses(Addr virt_page_addr, TlbEntry *tlb_entry,
                                  Addr phys_page_addr);
 
         void issueTLBLookup(PacketPtr pkt);
@@ -352,7 +341,7 @@ namespace X86ISA
             * previous TLBs.  Equivalent to the data cache concept of
             * "data return."
             */
-            GpuTlbEntry *tlbEntry;
+            TlbEntry *tlbEntry;
             // Is this a TLB prefetch request?
             bool prefetch;
             // When was the req for this translation issued
diff --git a/src/gpu-compute/tlb_coalescer.cc b/src/gpu-compute/tlb_coalescer.cc
index 9c5d30b92..68d2689ef 100644
--- a/src/gpu-compute/tlb_coalescer.cc
+++ b/src/gpu-compute/tlb_coalescer.cc
@@ -38,6 +38,7 @@
 #include <cstring>
 
 #include "debug/GPUTLB.hh"
+#include "sim/process.hh"
 
 TLBCoalescer::TLBCoalescer(const Params *p)
     : MemObject(p),
@@ -155,14 +156,13 @@ TLBCoalescer::updatePhysAddresses(PacketPtr pkt)
     TheISA::GpuTLB::TranslationState *sender_state =
         safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
 
-    TheISA::GpuTlbEntry *tlb_entry = sender_state->tlbEntry;
+    TheISA::TlbEntry *tlb_entry = sender_state->tlbEntry;
     assert(tlb_entry);
     Addr first_entry_vaddr = tlb_entry->vaddr;
     Addr first_entry_paddr = tlb_entry->paddr;
     int page_size = tlb_entry->size();
     bool uncacheable = tlb_entry->uncacheable;
     int first_hit_level = sender_state->hitLevel;
-    bool valid = tlb_entry->valid;
 
     // Get the physical page address of the translated request
     // Using the page_size specified in the TLBEntry allows us
@@ -197,9 +197,10 @@ TLBCoalescer::updatePhysAddresses(PacketPtr pkt)
 
             // update senderState->tlbEntry, so we can insert
             // the correct TLBEentry in the TLBs above.
+            auto p = sender_state->tc->getProcessPtr();
             sender_state->tlbEntry =
-                new TheISA::GpuTlbEntry(0, first_entry_vaddr, first_entry_paddr,
-                                        valid);
+                new TheISA::TlbEntry(p->pid(), first_entry_vaddr,
+                    first_entry_paddr, false, false);
 
             // update the hitLevel for all uncoalesced reqs
             // so that each packet knows where it hit
-- 
2.30.2