From f71345128b46fe68c1d5905a28a9f5f25aabb58c Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 22 Dec 2019 23:06:51 -0500 Subject: [PATCH] arm: Implement the AAPCS32 ABI. Change-Id: I63b2ec586146163642392f5164fb01335d811471 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/24108 Tested-by: kokoro Reviewed-by: Bobby R. Bruce Maintainer: Gabe Black --- src/arch/arm/aapcs32.hh | 611 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 611 insertions(+) create mode 100644 src/arch/arm/aapcs32.hh diff --git a/src/arch/arm/aapcs32.hh b/src/arch/arm/aapcs32.hh new file mode 100644 index 000000000..3e9ad54a0 --- /dev/null +++ b/src/arch/arm/aapcs32.hh @@ -0,0 +1,611 @@ +/* + * Copyright 2019 Google Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_ARM_AAPCS32_HH__ +#define __ARCH_ARM_AAPCS32_HH__ + +#include +#include +#include +#include + +#include "arch/arm/intregs.hh" +#include "arch/arm/utility.hh" +#include "base/intmath.hh" +#include "cpu/thread_context.hh" +#include "sim/guest_abi.hh" +#include "sim/syscall_emul_buf.hh" + +class ThreadContext; + +struct Aapcs32 +{ + struct State + { + bool stackUsed=false; // Whether anything has been put on the stack. + + int ncrn=0; // Next general purpose register number. + Addr nsaa; // Next stacked argument address. + + // The maximum allowed general purpose register number. + static const int MAX_CRN = 3; + + Addr retAddr=0; + + explicit State(const ThreadContext *tc) : + nsaa(tc->readIntReg(ArmISA::INTREG_SPX)) + {} + }; +}; + +namespace GuestABI +{ + +/* + * Composite Types + */ + +template +struct IsAapcs32Composite : public std::false_type {}; + +template +struct IsAapcs32Composite::value || + std::is_class::value || + std::is_union::value) && + // VarArgs is technically a composite type, but it's not a normal argument. + !IsVarArgs::value + >::type> : public std::true_type +{}; + +// Homogeneous Aggregates +// These *should* be any aggregate type which has only one type of member, but +// we can't actually detect that or manipulate that with templates. Instead, +// we approximate that by detecting only arrays with that property. + +template +using Aapcs32HomogeneousAggregate = T[count]; + +template +struct IsAapcs32HomogeneousAggregate : public std::false_type {}; + +template +struct IsAapcs32HomogeneousAggregate : public std::true_type {}; + +struct Aapcs32ArgumentBase +{ + template + static T + loadFromStack(ThreadContext *tc, Aapcs32::State &state) + { + state.stackUsed = true; + + // The alignment is the larger of 4 or the natural alignment of T. + size_t align = std::max(4, alignof(T)); + // Increase the size to the next multiple of 4. + size_t size = roundUp(sizeof(T), 4); + + // Align the stack. + state.nsaa = roundUp(state.nsaa, align); + + // Extract the value from it. + TypedBufferArg val(state.nsaa); + val.copyIn(tc->getVirtProxy()); + + // Move the nsaa past this argument. + state.nsaa += size; + + // Return the value we extracted. + return gtoh(*val, ArmISA::byteOrder(tc)); + } +}; + + +/* + * Integer arguments and return values. + */ + +template +struct Result::value>::type> +{ + static void + store(ThreadContext *tc, const Integer &i) + { + if (sizeof(Integer) < sizeof(uint32_t)) { + uint32_t val = std::is_signed::value ? + sext(i) : i; + tc->setIntReg(ArmISA::INTREG_R0, val); + } else if (sizeof(Integer) == sizeof(uint32_t) || + std::is_same::value) { + tc->setIntReg(ArmISA::INTREG_R0, (uint32_t)i); + } else if (sizeof(Integer) == sizeof(uint64_t)) { + if (ArmISA::byteOrder(tc) == LittleEndianByteOrder) { + tc->setIntReg(ArmISA::INTREG_R0, (uint32_t)(i >> 0)); + tc->setIntReg(ArmISA::INTREG_R1, (uint32_t)(i >> 32)); + } else { + tc->setIntReg(ArmISA::INTREG_R0, (uint32_t)(i >> 32)); + tc->setIntReg(ArmISA::INTREG_R1, (uint32_t)(i >> 0)); + } + } + } +}; + +template +struct Argument::value>::type> : public Aapcs32ArgumentBase +{ + static Integer + get(ThreadContext *tc, Aapcs32::State &state) + { + if ((sizeof(Integer) <= sizeof(uint32_t) || + std::is_same::value) && + state.ncrn <= state.MAX_CRN) { + return tc->readIntReg(state.ncrn++); + } + + if (alignof(Integer) == 8 && (state.ncrn % 2)) + state.ncrn++; + + if (sizeof(Integer) == sizeof(uint64_t) && + state.ncrn + 1 <= state.MAX_CRN) { + Integer low, high; + if (ArmISA::byteOrder(tc) == LittleEndianByteOrder) { + low = tc->readIntReg(state.ncrn++) & mask(32); + high = tc->readIntReg(state.ncrn++) & mask(32); + } else { + high = tc->readIntReg(state.ncrn++) & mask(32); + low = tc->readIntReg(state.ncrn++) & mask(32); + } + return low | (high << 32); + } + + // Max out the ncrn since we effectively exhausted it. + state.ncrn = state.MAX_CRN + 1; + + return loadFromStack(tc, state); + } +}; + + +/* + * Floating point and Short-Vector arguments and return values. + */ + +template +struct Result::value>::type> +{ + static void + store(ThreadContext *tc, const Float &f, Aapcs32::State &state) + { + auto i = floatToBits(f); + storeResult(tc, i, state); + }; +}; + +template +struct Argument::value>::type> : public Aapcs32ArgumentBase +{ + static Float + get(ThreadContext *tc, Aapcs32::State &state) + { + if (sizeof(Float) == sizeof(uint32_t)) { + return bitsToFloat32( + getArgument(tc, state)); + } else { + return bitsToFloat64( + getArgument(tc, state)); + } + } +}; + + +/* + * Composite arguments and return values. + */ + +template +struct Result::value>::type> +{ + static void + store(ThreadContext *tc, const Composite &composite, + Aapcs32::State &state) + { + if (sizeof(Composite) <= sizeof(uint32_t)) { + Composite cp = htog(composite, ArmISA::byteOrder(tc)); + uint32_t val; + memcpy((void *)&val, (void *)&cp, sizeof(Composite)); + val = gtoh(val, ArmISA::byteOrder(tc)); + tc->setIntReg(ArmISA::INTREG_R0, val); + } else { + TypedBufferArg cp(state.retAddr); + cp = htog(composite, ArmISA::byteOrder(tc)); + cp.copyOut(tc->getVirtProxy()); + } + } + + static void + prepare(ThreadContext *tc, Aapcs32::State &state) + { + if (sizeof(Composite) > sizeof(uint32_t)) + state.retAddr = tc->readIntReg(state.ncrn++); + } +}; + +template +struct Argument::value>::type> : + public Aapcs32ArgumentBase +{ + static Composite + get(ThreadContext *tc, Aapcs32::State &state) + { + size_t bytes = sizeof(Composite); + using Chunk = uint32_t; + + const int chunk_size = sizeof(Chunk); + const int regs = (bytes + chunk_size - 1) / chunk_size; + + if (bytes <= chunk_size) { + if (state.ncrn++ <= state.MAX_CRN) { + alignas(alignof(Composite)) uint32_t val = + tc->readIntReg(state.ncrn++); + val = htog(val, ArmISA::byteOrder(tc)); + return gtoh(*(Composite *)&val, ArmISA::byteOrder(tc)); + } + } + + if (alignof(Composite) == 8 && (state.ncrn % 2)) + state.ncrn++; + + if (state.ncrn + regs - 1 <= state.MAX_CRN) { + alignas(alignof(Composite)) uint8_t buf[bytes]; + for (int i = 0; i < regs; i++) { + Chunk val = tc->readIntReg(state.ncrn++); + val = htog(val, ArmISA::byteOrder(tc)); + size_t to_copy = std::min(bytes, chunk_size); + memcpy(buf + i * chunk_size, &val, to_copy); + bytes -= to_copy; + } + return gtoh(*(Composite *)buf, ArmISA::byteOrder(tc)); + } + + if (!state.stackUsed && state.ncrn <= state.MAX_CRN) { + alignas(alignof(Composite)) uint8_t buf[bytes]; + + int offset = 0; + while (state.ncrn <= state.MAX_CRN) { + Chunk val = tc->readIntReg(state.ncrn++); + val = htog(val, ArmISA::byteOrder(tc)); + size_t to_copy = std::min(bytes, chunk_size); + memcpy(buf + offset, &val, to_copy); + offset += to_copy; + bytes -= to_copy; + } + + if (bytes) { + tc->getVirtProxy().readBlob(state.nsaa, buf, bytes); + + state.stackUsed = true; + state.nsaa += roundUp(bytes, 4); + state.ncrn = state.MAX_CRN + 1; + } + + return gtoh(*(Composite *)buf, ArmISA::byteOrder(tc)); + } + + state.ncrn = state.MAX_CRN + 1; + + return loadFromStack(tc, state); + } +}; + +} // namespace GuestABI + + +/* + * VFP ABI variant. + */ + +struct Aapcs32Vfp : public Aapcs32 +{ + struct State : public Aapcs32::State + { + bool variadic=false; // Whether this function is variadic. + + // Whether the various single and double precision registers have + // been allocated. + std::array s; + std::array d; + + explicit State(const ThreadContext *tc) : Aapcs32::State(tc) + { + s.fill(false); + d.fill(false); + } + + int + allocate(float, int count) + { + int last = 0; + for (int i = 0; i <= s.size() - count; i++) { + if (s[i]) { + last = i + 1; + continue; + } + if (i - last + 1 == count) { + for (int j = 0; j < count; j++) { + s[last + j] = true; + d[(last + j) / 2] = true; + } + return last; + } + } + s.fill(true); + d.fill(true); + return -1; + } + + int + allocate(double, int count) + { + int last = 0; + for (int i = 0; i <= d.size() - count; i++) { + if (d[i]) { + last = i + 1; + continue; + } + if (i - last + 1 == count) { + for (int j = 0; j < count; j++) { + d[last + j] = true; + s[(last + j) * 2] = true; + s[(last + j) * 2 + 1] = true; + } + return last; + } + } + s.fill(true); + d.fill(true); + return -1; + } + }; +}; + +namespace GuestABI +{ + +/* + * Integer arguments and return values. + */ + +template +struct Result::value>::type> : public Result +{}; + +template +struct Argument::value>::type> : + public Argument +{}; + + +/* + * Floating point arguments and return values. + */ + +template +struct Result::value>::type> +{ + static void + store(ThreadContext *tc, const Float &f, Aapcs32Vfp::State &state) + { + if (state.variadic) { + storeResult(tc, f, state); + return; + } + + RegId id(VecRegClass, 0); + auto reg = tc->readVecReg(id); + reg.laneView() = f; + tc->setVecReg(id, reg); + }; +}; + +template +struct Argument::value>::type> : public Aapcs32ArgumentBase +{ + static Float + get(ThreadContext *tc, Aapcs32Vfp::State &state) + { + if (state.variadic) + return getArgument(tc, state); + + int index = state.allocate(Float{}, 1); + + if (index >= 0) { + constexpr int lane_per_reg = 16 / sizeof(Float); + constexpr int reg = index / lane_per_reg; + constexpr int lane = index % lane_per_reg; + + RegId id(VecRegClass, reg); + auto val = tc->readVecReg(id); + return val.laneView(lane); + } + + return loadFromStack(tc, state); + } +}; + + +/* + * Composite arguments and return values which are not Homogeneous Aggregates. + */ + +template +struct Result::value && + !IsAapcs32HomogeneousAggregate::value>::type> : + public Result +{}; + +template +struct Argument::value && + !IsAapcs32HomogeneousAggregate::value>::type> : + public Argument +{}; + + +/* + * Homogeneous Aggregate argument and return values. + */ + +template +struct Aapcs32ArrayType { using Type = void; }; + +template +struct Aapcs32ArrayType { using Type = E; }; + +template +struct Argument::value>::type> : + public Aapcs32ArgumentBase +{ + static bool + useBaseABI(Aapcs32Vfp::State &state) + { + using Elem = typename Aapcs32ArrayType::Type; + constexpr size_t Count = sizeof(HA) / sizeof(Elem); + return state.variadic || !std::is_floating_point::value || + Count > 4; + } + + static HA + get(ThreadContext *tc, Aapcs32Vfp::State &state) + { + using Elem = typename Aapcs32ArrayType::Type; + constexpr size_t Count = sizeof(HA) / sizeof(Elem); + + if (useBaseABI(state)) + return getArgument(tc, state); + + int base = state.allocate(Elem{}, Count); + if (base >= 0) { + constexpr int lane_per_reg = 16 / sizeof(Elem); + HA ha; + for (int i = 0; i < Count; i++) { + constexpr int index = base + i; + constexpr int reg = index / lane_per_reg; + constexpr int lane = index % lane_per_reg; + + RegId id(VecRegClass, reg); + auto val = tc->readVecReg(id); + ha[i] = val.laneView(lane); + } + return ha; + } + + return loadFromStack(tc, state); + } + + static void + prepare(ThreadContext *tc, Aapcs32Vfp::State &state) + { + if (useBaseABI(state)) + return Argument::prepare(tc, state); + } +}; + +template +struct Result::value>::type> +{ + static bool + useBaseABI(Aapcs32Vfp::State &state) + { + using Elem = typename Aapcs32ArrayType::Type; + constexpr size_t Count = sizeof(HA) / sizeof(Elem); + return state.variadic || !std::is_floating_point::value || + Count > 4; + } + + static HA + store(ThreadContext *tc, const HA &ha, Aapcs32Vfp::State &state) + { + using Elem = typename Aapcs32ArrayType::Type; + constexpr size_t Count = sizeof(HA) / sizeof(Elem); + + if (useBaseABI(state)) { + storeResult(tc, ha, state); + return; + } + + constexpr int lane_per_reg = 16 / sizeof(Elem); + for (int i = 0; i < Count; i++) { + constexpr int reg = i / lane_per_reg; + constexpr int lane = i % lane_per_reg; + + RegId id(VecRegClass, reg); + auto val = tc->readVecReg(id); + val.laneView() = ha[i]; + tc->setVecReg(id, val); + } + } + + static void + prepare(ThreadContext *tc, Aapcs32Vfp::State &state) + { + if (useBaseABI(state)) + return Result::prepare(tc, state); + } +}; + + +/* + * Varargs + */ + +template +struct Argument> +{ + static VarArgs + get(ThreadContext *tc, typename Aapcs32Vfp::State &state) + { + state.variadic = true; + return getArgument>(tc, state); + } +}; + +} // namespace GuestABI + +#endif // __ARCH_ARM_AAPCS32_HH__ -- 2.30.2