From: Gabe Black Date: Tue, 17 Dec 2019 06:27:12 +0000 (-0800) Subject: arm: Implement the AAPCS64 ABI. X-Git-Tag: v20.0.0.0~362 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=978782f23cc12ee17fca76cc18cdc1544ad67df8;p=gem5.git arm: Implement the AAPCS64 ABI. This implementation has been tested a tiny bit by intercepting a call which passed an argument of this type to a function. struct Test { int32_t a; float *b; }; The gem5 intercept printed out the value of a, the value of b, and the value of the float it pointed to. I was able to get things to work by commenting out the panic in fixFuncEventAddr and making it return its argument unmodified, and by calling addFuncEvent instead of addKernelFuncEvent which injects the kernel symbol table. I substitured the Process's debugSymbolTable which had the right symbols. Note that this implementation is not completely correct. First of all, I used a dummy type in place of the Short Vector type which is just a byte array with the appropriate alignment forced on it. It sounds like this type would be something the compiler would need an intrinsic and architecture specific type for to behave correctly, and so in gem5 we'd have to define our own type for ARM which could feed in here. Also, strictly speaking, it sounds like HVA and HFA category of types, the Homogeneous Short-Vector Aggregates and Homogeneous Floating-point Aggregates, are supposed to apply to any type which is an aggregate of all the same type (short vector for one, floating point for the other) with 4 or fewer members. In this implementation, I capture any *array* of 4 or fewer elements of the appropriate type as an HVA or HFA, but I believe these structures would also count and are not included in my implementation. struct { float a; float b; float c; }; struct { ShortVector a; ShortVector b; }; This only matters if those sorts of structures are passed by value as top level arguments to a function, ie they are not included in some larger structure. Also, rule B.6 talks about what to do with an "aignment adjusted type", and I have no idea what that's supposed to be. Those may not be handled correctly either. Change-Id: I5a599a03d38075d7c0a06988c05e7fb5423c68c0 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/23751 Tested-by: kokoro Reviewed-by: Bobby R. Bruce Maintainer: Gabe Black --- diff --git a/src/arch/arm/aapcs64.hh b/src/arch/arm/aapcs64.hh new file mode 100644 index 000000000..16edcb389 --- /dev/null +++ b/src/arch/arm/aapcs64.hh @@ -0,0 +1,417 @@ +/* + * Copyright 2019 Google Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_ARM_AAPCS64_HH__ +#define __ARCH_ARM_AAPCS64_HH__ + +#include +#include +#include +#include + +#include "arch/arm/intregs.hh" +#include "arch/arm/utility.hh" +#include "base/intmath.hh" +#include "cpu/thread_context.hh" +#include "sim/guest_abi.hh" +#include "sim/syscall_emul_buf.hh" + +class ThreadContext; + +struct Aapcs64 +{ + struct Position + { + int ngrn=0; // Next general purpose register number. + int nsrn=0; // Next SIMD and floating point register number. + Addr nsaa; // Next stacked argument address. + + // The maximum allowed general purpose register number. + static const int MAX_GRN = 7; + // The maximum allowed SIMD and floating point register number. + static const int MAX_SRN = 7; + + explicit Position(const ThreadContext *tc) : + nsaa(tc->readIntReg(ArmISA::INTREG_SPX)) + {} + }; +}; + +namespace GuestABI +{ + +/* + * Short Vectors + */ + +// A short vector is a machine type that is composed of repeated instances of +// one fundamental integral or floating- point type. It may be 8 or 16 bytes +// in total size. We represent it here as an opaque blob of data with an +// appropriate alignment requirement. + +template +using Aapcs64ShortVectorCandidate = + alignas(sizeof(T) * count) uint8_t [sizeof(T) * count]; + +template +using Aapcs64ShortVector = Aapcs64ShortVectorCandidate::value || std::is_floating_point::value) && + (sizeof(T) * count == 8 || sizeof(T) * count == 16)>::type>; + +template +struct IsAapcs64ShortVector : public std::false_type {}; + +template +struct IsAapcs64ShortVector> : public std::true_type +{}; + +/* + * Composite Types + */ + +template +struct IsAapcs64Composite : public std::false_type {}; + +template +struct IsAapcs64Composite::value || + std::is_class::value || + std::is_union::value) && + // VarArgs is technically a composite type, but it's not a normal argument. + !IsVarArgs::value && + // Short vectors are also composite types, but don't treat them as one. + !IsAapcs64ShortVector::value + >::type> : public std::true_type +{}; + +// Homogeneous Aggregates +// These *should* be any aggregate type which has only one type of member, but +// we can't actually detect that or manipulate that with templates. Instead, +// we approximate that by detecting only arrays with that property. + +template +using Aapcs64HomogeneousAggregate = T[count]; + +// An Homogeneous Floating-Point Aggregate (HFA) is an Homogeneous Aggregate +// with a Fundemental Data Type that is a Floating-Point type and at most four +// uniquely addressable members. + +template +using Aapcs64Hfa = Aapcs64HomogeneousAggregate::value && + count <= 4>::type>; + +template +struct IsAapcs64Hfa : public std::false_type {}; + +template +struct IsAapcs64Hfa> : public std::true_type {}; + +// An Homogeneous Short-Vector Aggregate (HVA) is an Homogeneous Aggregate with +// a Fundamental Data Type that is a Short-Vector type and at most four +// uniquely addressable members. + +template +using Aapcs64Hva = Aapcs64HomogeneousAggregate::value && + count <= 4>::type>; + +template +struct IsAapcs64Hva : public std::false_type {}; + +template +struct IsAapcs64Hva> : public std::true_type {}; + +// A shorthand to test if a type is an HVA or an HFA. +template +struct IsAapcs64Hxa : public std::false_type {}; + +template +struct IsAapcs64Hxa::value && IsAapcs64Hva::value>::type> : + public std::true_type +{}; + +struct Aapcs64ArgumentBase +{ + template + static T + loadFromStack(ThreadContext *tc, Aapcs64::Position &position) + { + // The alignment is the larger of 8 or the natural alignment of T. + size_t align = std::max(8, alignof(T)); + // Increase the size to the next multiple of 8. + size_t size = roundUp(sizeof(T), 8); + + // Align the stack. + position.nsaa = roundUp(position.nsaa, align); + + // Extract the value from it. + TypedBufferArg val(position.nsaa); + val.copyIn(tc->getVirtProxy()); + + // Move the nsaa past this argument. + position.nsaa += size; + + // Return the value we extracted. + return gtoh(*val, ArmISA::byteOrder(tc)); + } +}; + + +/* + * Floating point and Short-Vector arguments and return values. + */ + +template +struct Argument::value || + IsAapcs64ShortVector::value>::type> : + public Aapcs64ArgumentBase +{ + static Float + get(ThreadContext *tc, Aapcs64::Position &position) + { + if (position.nsrn <= position.MAX_SRN) { + RegId id(VecRegClass, position.nsrn++); + return tc->readVecReg(id).laneView(); + } + + return loadFromStack(tc, position); + } +}; + +template +struct Result::value || + IsAapcs64ShortVector::value>::type> +{ + static void + store(ThreadContext *tc, const Float &f) + { + RegId id(VecRegClass, 0); + auto reg = tc->readVecReg(id); + reg.laneView() = f; + tc->setVecReg(id, reg); + } +}; + + +/* + * Integer arguments and return values. + */ + +// This will pick up Addr as well, which should be used for guest pointers. +template +struct Argument::value>::type> : public Aapcs64ArgumentBase +{ + static Integer + get(ThreadContext *tc, Aapcs64::Position &position) + { + if (sizeof(Integer) <= 8 && position.ngrn <= position.MAX_GRN) + return tc->readIntReg(position.ngrn++); + + if (alignof(Integer) == 16 && (position.ngrn % 2)) + position.ngrn++; + + if (sizeof(Integer) == 16 && position.ngrn + 1 <= position.MAX_GRN) { + Integer low = tc->readIntReg(position.ngrn++); + Integer high = tc->readIntReg(position.ngrn++); + high = high << 64; + return high | low; + } + + // Max out ngrn since we've effectively saturated it. + position.ngrn = position.MAX_GRN + 1; + + return loadFromStack(tc, position); + } +}; + +template +struct Result::value>::type> +{ + static void + store(ThreadContext *tc, const Integer &i) + { + if (sizeof(Integer) <= 8) { + tc->setIntReg(0, i); + } else { + tc->setIntReg(0, (uint64_t)i); + tc->setIntReg(1, (uint64_t)(i >> 64)); + } + } +}; + + +/* + * Homogeneous Floating-Point and Short-Vector Aggregates (HFAs and HVAs) + * argument and return values. + */ + +template +struct Aapcs64ArrayType { using Type = void; }; + +template +struct Aapcs64ArrayType { using Type = E; }; + +template +struct Argument::value>::type> : public Aapcs64ArgumentBase +{ + static HA + get(ThreadContext *tc, Aapcs64::Position &position) + { + using Elem = typename Aapcs64ArrayType::Type; + constexpr size_t Count = sizeof(HA) / sizeof(Elem); + + if (position.nsrn + Count - 1 <= position.MAX_SRN) { + HA ha; + for (int i = 0; i < Count; i++) + ha[i] = Argument::get(tc, position); + return ha; + } + + // Max out the nsrn since we effectively exhausted it. + position.nsrn = position.MAX_SRN + 1; + + return loadFromStack(tc, position); + } +}; + +template +struct Result::value>::type> +{ + static HA + store(ThreadContext *tc, const HA &ha) + { + using Elem = typename Aapcs64ArrayType::Type; + constexpr size_t Count = sizeof(HA) / sizeof(Elem); + + for (int i = 0; i < Count; i++) + Result::store(tc, ha[i]); + } +}; + + +/* + * Composite arguments and return values which are not HVAs or HFAs. + */ + +template +struct Argument::value && !IsAapcs64Hxa::value + >::type> : public Aapcs64ArgumentBase +{ + static Composite + get(ThreadContext *tc, Aapcs64::Position &position) + { + if (sizeof(Composite) > 16) { + // Composite values larger than 16 which aren't HFAs or HVAs are + // kept in a buffer, and the argument is actually a pointer to that + // buffer. + Addr addr = Argument::get(tc, position); + TypedBufferArg composite(addr); + composite.copyIn(tc->getVirtProxy()); + return gtoh(*composite, ArmISA::byteOrder(tc)); + } + + // The size of Composite must be 16 bytes or less after this point. + + size_t bytes = sizeof(Composite); + using Chunk = uint64_t; + + const int chunk_size = sizeof(Chunk); + const int regs = (bytes + chunk_size - 1) / chunk_size; + + // Can it fit in GPRs? + if (position.ngrn + regs - 1 <= position.MAX_GRN) { + alignas(alignof(Composite)) uint8_t buf[bytes]; + for (int i = 0; i < regs; i++) { + Chunk val = tc->readIntReg(position.ngrn++); + val = htog(val, ArmISA::byteOrder(tc)); + size_t to_copy = std::min(bytes, chunk_size); + memcpy(buf + i * chunk_size, &val, to_copy); + bytes -= to_copy; + } + return gtoh(*(Composite *)buf, ArmISA::byteOrder(tc)); + } + + // Max out the ngrn since we effectively exhausted it. + position.ngrn = position.MAX_GRN; + + return loadFromStack(tc, position); + } +}; + +template +struct Result::value && !IsAapcs64Hxa::value + >::type> +{ + static void + store(ThreadContext *tc, const Composite &c) + { + if (sizeof(Composite) > 16) { + Addr addr = tc->readIntReg(ArmISA::INTREG_X8); + TypedBufferArg composite(addr); + *composite = htog(c, ArmISA::byteOrder(tc)); + return; + } + + // The size of Composite must be 16 bytes or less after this point. + + size_t bytes = sizeof(Composite); + using Chunk = uint64_t; + + const int chunk_size = sizeof(Chunk); + const int regs = (bytes + chunk_size - 1) / chunk_size; + + Composite cp = htog(c, ArmISA::byteOrder(tc)); + uint8_t *buf = (uint8_t *)&cp; + for (int i = 0; i < regs; i++) { + size_t to_copy = std::min(bytes, chunk_size); + + Chunk val; + memcpy(&val, buf, to_copy); + val = gtoh(val, ArmISA::byteOrder(tc)); + + tc->setIntReg(i, val); + + bytes -= to_copy; + buf += to_copy; + } + } +}; + +} // namespace GuestABI + +#endif // __ARCH_ARM_AAPCS64_HH__