From 978782f23cc12ee17fca76cc18cdc1544ad67df8 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Mon, 16 Dec 2019 22:27:12 -0800
Subject: [PATCH] arm: Implement the AAPCS64 ABI.

This implementation has been tested a tiny bit by intercepting a call
which passed an argument of this type to a function.

struct Test
{
    int32_t a;
    float *b;
};

The gem5 intercept printed out the value of a, the value of b, and the
value of the float it pointed to.

I was able to get things to work by commenting out the panic in
fixFuncEventAddr and making it return its argument unmodified, and by
calling addFuncEvent instead of addKernelFuncEvent which injects the
kernel symbol table. I substitured the Process's debugSymbolTable which
had the right symbols.

Note that this implementation is not completely correct. First of all,
I used a dummy type in place of the Short Vector type which is just
a byte array with the appropriate alignment forced on it. It sounds
like this type would be something the compiler would need an intrinsic
and architecture specific type for to behave correctly, and so in
gem5 we'd have to define our own type for ARM which could feed in here.

Also, strictly speaking, it sounds like HVA and HFA category of types,
the Homogeneous Short-Vector Aggregates and Homogeneous Floating-point
Aggregates, are supposed to apply to any type which is an aggregate of
all the same type (short vector for one, floating point for the other)
with 4 or fewer members.

In this implementation, I capture any *array* of 4 or fewer elements of
the appropriate type as an HVA or HFA, but I believe these structures
would also count and are not included in my implementation.

struct {
    float a;
    float b;
    float c;
};

struct {
    ShortVector a;
    ShortVector b;
};

This only matters if those sorts of structures are passed by value as
top level arguments to a function, ie they are not included in some
larger structure.

Also, rule B.6 talks about what to do with an "aignment adjusted type",
and I have no idea what that's supposed to be. Those may not be handled
correctly either.

Change-Id: I5a599a03d38075d7c0a06988c05e7fb5423c68c0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/23751
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby R. Bruce <bbruce@ucdavis.edu>
Maintainer: Gabe Black <gabeblack@google.com>
---
 src/arch/arm/aapcs64.hh | 417 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 417 insertions(+)
 create mode 100644 src/arch/arm/aapcs64.hh

diff --git a/src/arch/arm/aapcs64.hh b/src/arch/arm/aapcs64.hh
new file mode 100644
index 000000000..16edcb389
--- /dev/null
+++ b/src/arch/arm/aapcs64.hh
@@ -0,0 +1,417 @@
+/*
+ * Copyright 2019 Google Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __ARCH_ARM_AAPCS64_HH__
+#define __ARCH_ARM_AAPCS64_HH__
+
+#include <algorithm>
+#include <array>
+#include <type_traits>
+#include <utility>
+
+#include "arch/arm/intregs.hh"
+#include "arch/arm/utility.hh"
+#include "base/intmath.hh"
+#include "cpu/thread_context.hh"
+#include "sim/guest_abi.hh"
+#include "sim/syscall_emul_buf.hh"
+
+class ThreadContext;
+
+struct Aapcs64
+{
+    struct Position
+    {
+        int ngrn=0; // Next general purpose register number.
+        int nsrn=0; // Next SIMD and floating point register number.
+        Addr nsaa; // Next stacked argument address.
+
+        // The maximum allowed general purpose register number.
+        static const int MAX_GRN = 7;
+        // The maximum allowed SIMD and floating point register number.
+        static const int MAX_SRN = 7;
+
+        explicit Position(const ThreadContext *tc) :
+            nsaa(tc->readIntReg(ArmISA::INTREG_SPX))
+        {}
+    };
+};
+
+namespace GuestABI
+{
+
+/*
+ * Short Vectors
+ */
+
+// A short vector is a machine type that is composed of repeated instances of
+// one fundamental integral or floating- point type. It may be 8 or 16 bytes
+// in total size. We represent it here as an opaque blob of data with an
+// appropriate alignment requirement.
+
+template <typename T, std::size_t count, typename Enabled=void>
+using Aapcs64ShortVectorCandidate =
+    alignas(sizeof(T) * count) uint8_t [sizeof(T) * count];
+
+template <typename T, std::size_t count>
+using Aapcs64ShortVector = Aapcs64ShortVectorCandidate<T, count,
+    typename std::enable_if<
+        (std::is_integral<T>::value || std::is_floating_point<T>::value) &&
+        (sizeof(T) * count == 8 || sizeof(T) * count == 16)>::type>;
+
+template <typename T>
+struct IsAapcs64ShortVector : public std::false_type {};
+
+template <typename E, size_t N>
+struct IsAapcs64ShortVector<Aapcs64ShortVector<E, N>> : public std::true_type
+{};
+
+/*
+ * Composite Types
+ */
+
+template <typename T, typename Enabled=void>
+struct IsAapcs64Composite : public std::false_type {};
+
+template <typename T>
+struct IsAapcs64Composite<T, typename std::enable_if<
+    (std::is_array<T>::value ||
+     std::is_class<T>::value ||
+     std::is_union<T>::value) &&
+    // VarArgs is technically a composite type, but it's not a normal argument.
+    !IsVarArgs<T>::value &&
+    // Short vectors are also composite types, but don't treat them as one.
+    !IsAapcs64ShortVector<T>::value
+    >::type> : public std::true_type
+{};
+
+// Homogeneous Aggregates
+// These *should* be any aggregate type which has only one type of member, but
+// we can't actually detect that or manipulate that with templates. Instead,
+// we approximate that by detecting only arrays with that property.
+
+template <typename T, std::size_t count, typename Enabled=void>
+using Aapcs64HomogeneousAggregate = T[count];
+
+// An Homogeneous Floating-Point Aggregate (HFA) is an Homogeneous Aggregate
+// with a Fundemental Data Type that is a Floating-Point type and at most four
+// uniquely addressable members.
+
+template <typename T, std::size_t count>
+using Aapcs64Hfa = Aapcs64HomogeneousAggregate<T, count,
+      typename std::enable_if<std::is_floating_point<T>::value &&
+                              count <= 4>::type>;
+
+template <typename T, typename Enabled=void>
+struct IsAapcs64Hfa : public std::false_type {};
+
+template <typename E, size_t N>
+struct IsAapcs64Hfa<E[N], Aapcs64Hfa<E, N>> : public std::true_type {};
+
+// An Homogeneous Short-Vector Aggregate (HVA) is an Homogeneous Aggregate with
+// a Fundamental Data Type that is a Short-Vector type and at most four
+// uniquely addressable members.
+
+template <typename T, std::size_t count>
+using Aapcs64Hva = Aapcs64HomogeneousAggregate<T, count,
+      typename std::enable_if<IsAapcs64ShortVector<T>::value &&
+                              count <= 4>::type>;
+
+template <typename T, typename Enabled=void>
+struct IsAapcs64Hva : public std::false_type {};
+
+template <typename E, size_t N>
+struct IsAapcs64Hva<E[N], Aapcs64Hva<E, N>> : public std::true_type {};
+
+// A shorthand to test if a type is an HVA or an HFA.
+template <typename T, typename Enabled=void>
+struct IsAapcs64Hxa : public std::false_type {};
+
+template <typename T>
+struct IsAapcs64Hxa<T, typename std::enable_if<
+    IsAapcs64Hfa<T>::value && IsAapcs64Hva<T>::value>::type> :
+    public std::true_type
+{};
+
+struct Aapcs64ArgumentBase
+{
+    template <typename T>
+    static T
+    loadFromStack(ThreadContext *tc, Aapcs64::Position &position)
+    {
+        // The alignment is the larger of 8 or the natural alignment of T.
+        size_t align = std::max<size_t>(8, alignof(T));
+        // Increase the size to the next multiple of 8.
+        size_t size = roundUp(sizeof(T), 8);
+
+        // Align the stack.
+        position.nsaa = roundUp(position.nsaa, align);
+
+        // Extract the value from it.
+        TypedBufferArg<T> val(position.nsaa);
+        val.copyIn(tc->getVirtProxy());
+
+        // Move the nsaa past this argument.
+        position.nsaa += size;
+
+        // Return the value we extracted.
+        return gtoh(*val, ArmISA::byteOrder(tc));
+    }
+};
+
+
+/*
+ * Floating point and Short-Vector arguments and return values.
+ */
+
+template <typename Float>
+struct Argument<Aapcs64, Float, typename std::enable_if<
+    std::is_floating_point<Float>::value ||
+    IsAapcs64ShortVector<Float>::value>::type> :
+    public Aapcs64ArgumentBase
+{
+    static Float
+    get(ThreadContext *tc, Aapcs64::Position &position)
+    {
+        if (position.nsrn <= position.MAX_SRN) {
+            RegId id(VecRegClass, position.nsrn++);
+            return tc->readVecReg(id).laneView<Float, 0>();
+        }
+
+        return loadFromStack<Float>(tc, position);
+    }
+};
+
+template <typename Float>
+struct Result<Aapcs64, Float, typename std::enable_if<
+    std::is_floating_point<Float>::value ||
+    IsAapcs64ShortVector<Float>::value>::type>
+{
+    static void
+    store(ThreadContext *tc, const Float &f)
+    {
+        RegId id(VecRegClass, 0);
+        auto reg = tc->readVecReg(id);
+        reg.laneView<Float, 0>() = f;
+        tc->setVecReg(id, reg);
+    }
+};
+
+
+/*
+ * Integer arguments and return values.
+ */
+
+// This will pick up Addr as well, which should be used for guest pointers.
+template <typename Integer>
+struct Argument<Aapcs64, Integer, typename std::enable_if<
+    std::is_integral<Integer>::value>::type> : public Aapcs64ArgumentBase
+{
+    static Integer
+    get(ThreadContext *tc, Aapcs64::Position &position)
+    {
+        if (sizeof(Integer) <= 8 && position.ngrn <= position.MAX_GRN)
+            return tc->readIntReg(position.ngrn++);
+
+        if (alignof(Integer) == 16 && (position.ngrn % 2))
+            position.ngrn++;
+
+        if (sizeof(Integer) == 16 && position.ngrn + 1 <= position.MAX_GRN) {
+            Integer low = tc->readIntReg(position.ngrn++);
+            Integer high = tc->readIntReg(position.ngrn++);
+            high = high << 64;
+            return high | low;
+        }
+
+        // Max out ngrn since we've effectively saturated it.
+        position.ngrn = position.MAX_GRN + 1;
+
+        return loadFromStack<Integer>(tc, position);
+    }
+};
+
+template <typename Integer>
+struct Result<Aapcs64, Integer, typename std::enable_if<
+    std::is_integral<Integer>::value>::type>
+{
+    static void
+    store(ThreadContext *tc, const Integer &i)
+    {
+        if (sizeof(Integer) <= 8) {
+            tc->setIntReg(0, i);
+        } else {
+            tc->setIntReg(0, (uint64_t)i);
+            tc->setIntReg(1, (uint64_t)(i >> 64));
+        }
+    }
+};
+
+
+/*
+ * Homogeneous Floating-Point and Short-Vector Aggregates (HFAs and HVAs)
+ * argument and return values.
+ */
+
+template <typename T>
+struct Aapcs64ArrayType { using Type = void; };
+
+template <typename E, size_t N>
+struct Aapcs64ArrayType<E[N]> { using Type = E; };
+
+template <typename HA>
+struct Argument<Aapcs64, HA, typename std::enable_if<
+    IsAapcs64Hxa<HA>::value>::type> : public Aapcs64ArgumentBase
+{
+    static HA
+    get(ThreadContext *tc, Aapcs64::Position &position)
+    {
+        using Elem = typename Aapcs64ArrayType<HA>::Type;
+        constexpr size_t Count = sizeof(HA) / sizeof(Elem);
+
+        if (position.nsrn + Count - 1 <= position.MAX_SRN) {
+            HA ha;
+            for (int i = 0; i < Count; i++)
+                ha[i] = Argument<Aapcs64, Elem>::get(tc, position);
+            return ha;
+        }
+
+        // Max out the nsrn since we effectively exhausted it.
+        position.nsrn = position.MAX_SRN + 1;
+
+        return loadFromStack<HA>(tc, position);
+    }
+};
+
+template <typename HA>
+struct Result<Aapcs64, HA,
+    typename std::enable_if<IsAapcs64Hxa<HA>::value>::type>
+{
+    static HA
+    store(ThreadContext *tc, const HA &ha)
+    {
+        using Elem = typename Aapcs64ArrayType<HA>::Type;
+        constexpr size_t Count = sizeof(HA) / sizeof(Elem);
+
+        for (int i = 0; i < Count; i++)
+            Result<Aapcs64, Elem>::store(tc, ha[i]);
+    }
+};
+
+
+/*
+ * Composite arguments and return values which are not HVAs or HFAs.
+ */
+
+template <typename Composite>
+struct Argument<Aapcs64, Composite, typename std::enable_if<
+    IsAapcs64Composite<Composite>::value && !IsAapcs64Hxa<Composite>::value
+    >::type> : public Aapcs64ArgumentBase
+{
+    static Composite
+    get(ThreadContext *tc, Aapcs64::Position &position)
+    {
+        if (sizeof(Composite) > 16) {
+            // Composite values larger than 16 which aren't HFAs or HVAs are
+            // kept in a buffer, and the argument is actually a pointer to that
+            // buffer.
+            Addr addr = Argument<Aapcs64, Addr>::get(tc, position);
+            TypedBufferArg<Composite> composite(addr);
+            composite.copyIn(tc->getVirtProxy());
+            return gtoh(*composite, ArmISA::byteOrder(tc));
+        }
+
+        // The size of Composite must be 16 bytes or less after this point.
+
+        size_t bytes = sizeof(Composite);
+        using Chunk = uint64_t;
+
+        const int chunk_size = sizeof(Chunk);
+        const int regs = (bytes + chunk_size - 1) / chunk_size;
+
+        // Can it fit in GPRs?
+        if (position.ngrn + regs - 1 <= position.MAX_GRN) {
+            alignas(alignof(Composite)) uint8_t buf[bytes];
+            for (int i = 0; i < regs; i++) {
+                Chunk val = tc->readIntReg(position.ngrn++);
+                val = htog(val, ArmISA::byteOrder(tc));
+                size_t to_copy = std::min<size_t>(bytes, chunk_size);
+                memcpy(buf + i * chunk_size, &val, to_copy);
+                bytes -= to_copy;
+            }
+            return gtoh(*(Composite *)buf, ArmISA::byteOrder(tc));
+        }
+
+        // Max out the ngrn since we effectively exhausted it.
+        position.ngrn = position.MAX_GRN;
+
+        return loadFromStack<Composite>(tc, position);
+    }
+};
+
+template <typename Composite>
+struct Result<Aapcs64, Composite, typename std::enable_if<
+    IsAapcs64Composite<Composite>::value && !IsAapcs64Hxa<Composite>::value
+    >::type>
+{
+    static void
+    store(ThreadContext *tc, const Composite &c)
+    {
+        if (sizeof(Composite) > 16) {
+            Addr addr = tc->readIntReg(ArmISA::INTREG_X8);
+            TypedBufferArg<Composite> composite(addr);
+            *composite = htog(c, ArmISA::byteOrder(tc));
+            return;
+        }
+
+        // The size of Composite must be 16 bytes or less after this point.
+
+        size_t bytes = sizeof(Composite);
+        using Chunk = uint64_t;
+
+        const int chunk_size = sizeof(Chunk);
+        const int regs = (bytes + chunk_size - 1) / chunk_size;
+
+        Composite cp = htog(c, ArmISA::byteOrder(tc));
+        uint8_t *buf = (uint8_t *)&cp;
+        for (int i = 0; i < regs; i++) {
+            size_t to_copy = std::min<size_t>(bytes, chunk_size);
+
+            Chunk val;
+            memcpy(&val, buf, to_copy);
+            val = gtoh(val, ArmISA::byteOrder(tc));
+
+            tc->setIntReg(i, val);
+
+            bytes -= to_copy;
+            buf += to_copy;
+        }
+    }
+};
+
+} // namespace GuestABI
+
+#endif // __ARCH_ARM_AAPCS64_HH__
-- 
2.30.2