From 612f8f074fa1099cf70faf495d46cc647762a031 Mon Sep 17 00:00:00 2001 From: ARM gem5 Developers Date: Fri, 24 Jan 2014 15:29:34 -0600 Subject: [PATCH] arm: Add support for ARMv8 (AArch64 & AArch32) Note: AArch64 and AArch32 interworking is not supported. If you use an AArch64 kernel you are restricted to AArch64 user-mode binaries. This will be addressed in a later patch. Note: Virtualization is only supported in AArch32 mode. This will also be fixed in a later patch. Contributors: Giacomo Gabrielli (TrustZone, LPAE, system-level AArch64, AArch64 NEON, validation) Thomas Grocutt (AArch32 Virtualization, AArch64 FP, validation) Mbou Eyole (AArch64 NEON, validation) Ali Saidi (AArch64 Linux support, code integration, validation) Edmund Grimley-Evans (AArch64 FP) William Wang (AArch64 Linux support) Rene De Jong (AArch64 Linux support, performance opt.) Matt Horsnell (AArch64 MP, validation) Matt Evans (device models, code integration, validation) Chris Adeniyi-Jones (AArch64 syscall-emulation) Prakash Ramrakhyani (validation) Dam Sunwoo (validation) Chander Sudanthi (validation) Stephan Diestelhorst (validation) Andreas Hansson (code integration, performance opt.) Eric Van Hensbergen (performance opt.) Gabe Black --- configs/common/FSConfig.py | 3 +- configs/common/O3_ARM_v7a.py | 2 +- configs/common/Options.py | 11 + configs/common/cpu2000.py | 2 +- configs/example/fs.py | 6 + configs/example/se.py | 9 +- ext/libelf/elf_common.h | 1 + src/arch/arm/ArmISA.py | 58 +- src/arch/arm/ArmSystem.py | 28 +- src/arch/arm/ArmTLB.py | 29 +- src/arch/arm/SConscript | 9 +- src/arch/arm/decoder.cc | 21 +- src/arch/arm/decoder.hh | 12 + src/arch/arm/faults.cc | 1286 +++++++- src/arch/arm/faults.hh | 451 ++- src/arch/arm/insts/branch64.cc | 146 + src/arch/arm/insts/branch64.hh | 166 + src/arch/arm/insts/data64.cc | 203 ++ src/arch/arm/insts/data64.hh | 256 ++ src/arch/arm/insts/fplib.cc | 3086 +++++++++++++++++++ src/arch/arm/insts/fplib.hh | 283 ++ src/arch/arm/insts/macromem.cc | 528 +++- src/arch/arm/insts/macromem.hh | 207 +- src/arch/arm/insts/mem.cc | 5 +- src/arch/arm/insts/mem64.cc | 193 ++ src/arch/arm/insts/mem64.hh | 253 ++ src/arch/arm/insts/misc.cc | 38 +- src/arch/arm/insts/misc.hh | 55 +- src/arch/arm/insts/misc64.cc | 73 + src/arch/arm/insts/misc64.hh | 92 + src/arch/arm/insts/neon64_mem.hh | 128 + src/arch/arm/insts/pred_inst.hh | 36 +- src/arch/arm/insts/static_inst.cc | 312 +- src/arch/arm/insts/static_inst.hh | 99 +- src/arch/arm/insts/vfp.cc | 484 +-- src/arch/arm/insts/vfp.hh | 489 ++- src/arch/arm/interrupts.cc | 121 +- src/arch/arm/interrupts.hh | 112 +- src/arch/arm/intregs.hh | 188 +- src/arch/arm/isa.cc | 1629 +++++++++- src/arch/arm/isa.hh | 331 +- src/arch/arm/isa/bitfields.isa | 3 +- src/arch/arm/isa/decoder/aarch64.isa | 48 + src/arch/arm/isa/decoder/arm.isa | 12 +- src/arch/arm/isa/decoder/decoder.isa | 10 +- src/arch/arm/isa/decoder/thumb.isa | 10 +- src/arch/arm/isa/formats/aarch64.isa | 2035 +++++++++++++ src/arch/arm/isa/formats/branch.isa | 66 +- src/arch/arm/isa/formats/formats.isa | 8 +- src/arch/arm/isa/formats/fp.isa | 103 +- src/arch/arm/isa/formats/mem.isa | 2 +- src/arch/arm/isa/formats/misc.isa | 230 +- src/arch/arm/isa/formats/neon64.isa | 2626 ++++++++++++++++ src/arch/arm/isa/formats/uncond.isa | 15 +- src/arch/arm/isa/formats/unimp.isa | 23 +- src/arch/arm/isa/includes.isa | 9 +- src/arch/arm/isa/insts/aarch64.isa | 58 + src/arch/arm/isa/insts/branch.isa | 29 +- src/arch/arm/isa/insts/branch64.isa | 248 ++ src/arch/arm/isa/insts/data.isa | 5 +- src/arch/arm/isa/insts/data64.isa | 465 +++ src/arch/arm/isa/insts/div.isa | 12 - src/arch/arm/isa/insts/fp.isa | 154 +- src/arch/arm/isa/insts/fp64.isa | 811 +++++ src/arch/arm/isa/insts/insts.isa | 21 +- src/arch/arm/isa/insts/ldr.isa | 8 +- src/arch/arm/isa/insts/ldr64.isa | 446 +++ src/arch/arm/isa/insts/m5ops.isa | 212 +- src/arch/arm/isa/insts/macromem.isa | 71 +- src/arch/arm/isa/insts/mem.isa | 32 +- src/arch/arm/isa/insts/misc.isa | 446 ++- src/arch/arm/isa/insts/misc64.isa | 147 + src/arch/arm/isa/insts/neon.isa | 569 +++- src/arch/arm/isa/insts/neon64.isa | 3355 +++++++++++++++++++++ src/arch/arm/isa/insts/neon64_mem.isa | 471 +++ src/arch/arm/isa/insts/str.isa | 9 +- src/arch/arm/isa/insts/str64.isa | 372 +++ src/arch/arm/isa/insts/swap.isa | 7 +- src/arch/arm/isa/operands.isa | 175 +- src/arch/arm/isa/templates/basic.isa | 19 + src/arch/arm/isa/templates/branch64.isa | 141 + src/arch/arm/isa/templates/data64.isa | 279 ++ src/arch/arm/isa/templates/macromem.isa | 126 +- src/arch/arm/isa/templates/mem.isa | 22 +- src/arch/arm/isa/templates/mem64.isa | 686 +++++ src/arch/arm/isa/templates/misc.isa | 154 +- src/arch/arm/isa/templates/misc64.isa | 91 + src/arch/arm/isa/templates/neon.isa | 24 +- src/arch/arm/isa/templates/neon64.isa | 527 ++++ src/arch/arm/isa/templates/templates.isa | 13 +- src/arch/arm/isa/templates/vfp.isa | 105 +- src/arch/arm/isa/templates/vfp64.isa | 140 + src/arch/arm/isa_traits.hh | 7 +- src/arch/arm/linux/linux.cc | 121 +- src/arch/arm/linux/linux.hh | 190 +- src/arch/arm/linux/process.cc | 1308 +++++++- src/arch/arm/linux/process.hh | 61 +- src/arch/arm/linux/system.cc | 26 +- src/arch/arm/linux/system.hh | 4 +- src/arch/arm/locked_mem.hh | 35 +- src/arch/arm/miscregs.cc | 3130 ++++++++++++++++++- src/arch/arm/miscregs.hh | 1875 ++++++++++-- src/arch/arm/nativetrace.cc | 4 +- src/arch/arm/pagetable.hh | 218 +- src/arch/arm/process.cc | 132 +- src/arch/arm/process.hh | 41 +- src/arch/arm/registers.hh | 10 +- src/arch/arm/remote_gdb.cc | 197 +- src/arch/arm/remote_gdb.hh | 25 +- src/arch/arm/stage2_lookup.cc | 200 ++ src/arch/arm/stage2_lookup.hh | 108 + src/arch/arm/stage2_mmu.cc | 146 + src/arch/arm/stage2_mmu.hh | 115 + src/arch/arm/system.cc | 143 +- src/arch/arm/system.hh | 167 +- src/arch/arm/table_walker.cc | 1551 ++++++++-- src/arch/arm/table_walker.hh | 635 +++- src/arch/arm/tlb.cc | 1204 ++++++-- src/arch/arm/tlb.hh | 195 +- src/arch/arm/types.hh | 202 +- src/arch/arm/utility.cc | 865 +++++- src/arch/arm/utility.hh | 171 +- src/arch/arm/vtophys.cc | 71 +- src/base/loader/elf_object.cc | 47 +- src/base/loader/elf_object.hh | 15 +- src/base/loader/object_file.cc | 12 +- src/base/loader/object_file.hh | 7 +- src/cpu/BaseCPU.py | 30 +- src/dev/arm/RealView.py | 43 +- src/dev/arm/SConscript | 5 +- src/dev/arm/generic_timer.cc | 204 ++ src/dev/arm/generic_timer.hh | 199 ++ src/dev/arm/gic_pl390.cc | 3 +- src/dev/arm/vgic.cc | 553 ++++ src/dev/arm/vgic.hh | 262 ++ src/sim/System.py | 3 +- src/sim/process.cc | 13 +- src/sim/serialize.hh | 2 +- src/sim/system.cc | 12 +- src/sim/system.hh | 13 +- system/arm/aarch64_bootloader/LICENSE.txt | 28 + system/arm/aarch64_bootloader/boot.S | 124 + system/arm/aarch64_bootloader/makefile | 4 + util/cpt_upgrader.py | 323 +- util/m5/m5op_arm_A64.S | 149 + 145 files changed, 39766 insertions(+), 2533 deletions(-) create mode 100644 src/arch/arm/insts/branch64.cc create mode 100644 src/arch/arm/insts/branch64.hh create mode 100644 src/arch/arm/insts/data64.cc create mode 100644 src/arch/arm/insts/data64.hh create mode 100644 src/arch/arm/insts/fplib.cc create mode 100644 src/arch/arm/insts/fplib.hh create mode 100644 src/arch/arm/insts/mem64.cc create mode 100644 src/arch/arm/insts/mem64.hh create mode 100644 src/arch/arm/insts/misc64.cc create mode 100644 src/arch/arm/insts/misc64.hh create mode 100644 src/arch/arm/insts/neon64_mem.hh create mode 100644 src/arch/arm/isa/decoder/aarch64.isa create mode 100644 src/arch/arm/isa/formats/aarch64.isa create mode 100644 src/arch/arm/isa/formats/neon64.isa create mode 100644 src/arch/arm/isa/insts/aarch64.isa create mode 100644 src/arch/arm/isa/insts/branch64.isa create mode 100644 src/arch/arm/isa/insts/data64.isa create mode 100644 src/arch/arm/isa/insts/fp64.isa create mode 100644 src/arch/arm/isa/insts/ldr64.isa create mode 100644 src/arch/arm/isa/insts/misc64.isa create mode 100644 src/arch/arm/isa/insts/neon64.isa create mode 100644 src/arch/arm/isa/insts/neon64_mem.isa create mode 100644 src/arch/arm/isa/insts/str64.isa create mode 100644 src/arch/arm/isa/templates/branch64.isa create mode 100644 src/arch/arm/isa/templates/data64.isa create mode 100644 src/arch/arm/isa/templates/mem64.isa create mode 100644 src/arch/arm/isa/templates/misc64.isa create mode 100644 src/arch/arm/isa/templates/neon64.isa create mode 100644 src/arch/arm/isa/templates/vfp64.isa create mode 100755 src/arch/arm/stage2_lookup.cc create mode 100755 src/arch/arm/stage2_lookup.hh create mode 100755 src/arch/arm/stage2_mmu.cc create mode 100755 src/arch/arm/stage2_mmu.hh create mode 100644 src/dev/arm/generic_timer.cc create mode 100644 src/dev/arm/generic_timer.hh create mode 100644 src/dev/arm/vgic.cc create mode 100644 src/dev/arm/vgic.hh create mode 100644 system/arm/aarch64_bootloader/LICENSE.txt create mode 100644 system/arm/aarch64_bootloader/boot.S create mode 100644 system/arm/aarch64_bootloader/makefile create mode 100644 util/m5/m5op_arm_A64.S diff --git a/configs/common/FSConfig.py b/configs/common/FSConfig.py index 8905ba2fa..58ad1a7c9 100644 --- a/configs/common/FSConfig.py +++ b/configs/common/FSConfig.py @@ -242,7 +242,8 @@ def makeArmSystem(mem_mode, machine_type, mdesc = None, self.realview = VExpress_ELT() elif machine_type == "VExpress_EMM": self.realview = VExpress_EMM() - self.load_addr_mask = 0xffffffff + elif machine_type == "VExpress_EMM64": + self.realview = VExpress_EMM64() else: print "Unknown Machine Type" sys.exit(1) diff --git a/configs/common/O3_ARM_v7a.py b/configs/common/O3_ARM_v7a.py index aedafaf4d..3b7df9988 100644 --- a/configs/common/O3_ARM_v7a.py +++ b/configs/common/O3_ARM_v7a.py @@ -139,7 +139,7 @@ class O3_ARM_v7a_3(DerivO3CPU): backComSize = 5 forwardComSize = 5 numPhysIntRegs = 128 - numPhysFloatRegs = 128 + numPhysFloatRegs = 192 numIQEntries = 32 numROBEntries = 40 diff --git a/configs/common/Options.py b/configs/common/Options.py index 2cca4ef57..209d24357 100644 --- a/configs/common/Options.py +++ b/configs/common/Options.py @@ -94,6 +94,9 @@ def addCommonOptions(parser): default="512MB", help="Specify the physical memory size (single memory)") + parser.add_option("-l", "--lpae", action="store_true") + parser.add_option("-V", "--virtualisation", action="store_true") + # Cache Options parser.add_option("--caches", action="store_true") parser.add_option("--l2cache", action="store_true") @@ -197,6 +200,14 @@ def addCommonOptions(parser): parser.add_option("--at-instruction", action="store_true", default=False, help="""Treat value of --checkpoint-restore or --take-checkpoint as a number of instructions.""") + parser.add_option("--spec-input", default="ref", type="choice", + choices=["ref", "test", "train", "smred", "mdred", + "lgred"], + help="Input set size for SPEC CPU2000 benchmarks.") + parser.add_option("--arm-iset", default="arm", type="choice", + choices=["arm", "thumb", "aarch64"], + help="ARM instruction set.") + def addSEOptions(parser): # Benchmark options diff --git a/configs/common/cpu2000.py b/configs/common/cpu2000.py index 443399234..5f01d28bf 100644 --- a/configs/common/cpu2000.py +++ b/configs/common/cpu2000.py @@ -663,7 +663,7 @@ class vortex(Benchmark): stdin = None def __init__(self, isa, os, input_set): - if (isa == 'alpha' or isa == 'arm'): + if (isa in ('alpha', 'arm', 'thumb', 'aarch64')): self.endian = 'lendian' elif (isa == 'sparc' or isa == 'sparc32'): self.endian = 'bendian' diff --git a/configs/example/fs.py b/configs/example/fs.py index 4cfb3e8e2..cb9b264d2 100644 --- a/configs/example/fs.py +++ b/configs/example/fs.py @@ -140,6 +140,12 @@ if options.kernel is not None: if options.script is not None: test_sys.readfile = options.script +if options.lpae: + test_sys.have_lpae = True + +if options.virtualisation: + test_sys.have_virtualization = True + test_sys.init_param = options.init_param # For now, assign all the CPUs to the same clock domain diff --git a/configs/example/se.py b/configs/example/se.py index f7e7f1a65..d4f3e2dd9 100644 --- a/configs/example/se.py +++ b/configs/example/se.py @@ -135,9 +135,14 @@ if options.bench: for app in apps: try: if buildEnv['TARGET_ISA'] == 'alpha': - exec("workload = %s('alpha', 'tru64', 'ref')" % app) + exec("workload = %s('alpha', 'tru64', '%s')" % ( + app, options.spec_input)) + elif buildEnv['TARGET_ISA'] == 'arm': + exec("workload = %s('arm_%s', 'linux', '%s')" % ( + app, options.arm_iset, options.spec_input)) else: - exec("workload = %s(buildEnv['TARGET_ISA'], 'linux', 'ref')" % app) + exec("workload = %s(buildEnv['TARGET_ISA', 'linux', '%s')" % ( + app, options.spec_input)) multiprocesses.append(workload.makeLiveProcess()) except: print >>sys.stderr, "Unable to find workload for %s: %s" % (buildEnv['TARGET_ISA'], app) diff --git a/ext/libelf/elf_common.h b/ext/libelf/elf_common.h index c169e7e40..bad988d87 100644 --- a/ext/libelf/elf_common.h +++ b/ext/libelf/elf_common.h @@ -172,6 +172,7 @@ typedef struct { #define EM_TINYJ 61 /* Advanced Logic Corp. TinyJ processor. */ #define EM_X86_64 62 /* Advanced Micro Devices x86-64 */ #define EM_AMD64 EM_X86_64 /* Advanced Micro Devices x86-64 (compat) */ +#define EM_AARCH64 183 /* AArch64 64 bit ARM. */ /* Non-standard or deprecated. */ #define EM_486 6 /* Intel i486. */ diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py index 55a589c32..eaec92f4d 100644 --- a/src/arch/arm/ArmISA.py +++ b/src/arch/arm/ArmISA.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012 ARM Limited +# Copyright (c) 2012-2013 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -34,8 +34,10 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Authors: Andreas Sandberg +# Giacomo Gabrielli from m5.params import * +from m5.proxy import * from m5.SimObject import SimObject class ArmISA(SimObject): @@ -43,12 +45,9 @@ class ArmISA(SimObject): cxx_class = 'ArmISA::ISA' cxx_header = "arch/arm/isa.hh" - # 0x35 Implementor is '5' from "M5" - # 0x0 Variant - # 0xf Architecture from CPUID scheme - # 0xc00 Primary part number ("c" or higher implies ARM v7) - # 0x0 Revision - midr = Param.UInt32(0x350fc000, "Main ID Register") + system = Param.System(Parent.any, "System this ISA object belongs to") + + midr = Param.UInt32(0x410fc0f0, "MIDR value") # See section B4.1.93 - B4.1.94 of the ARM ARM # @@ -56,19 +55,19 @@ class ArmISA(SimObject): # Note: ThumbEE is disabled for now since we don't support CP14 # config registers and jumping to ThumbEE vectors id_pfr0 = Param.UInt32(0x00000031, "Processor Feature Register 0") - # !Timer | !Virti | !M Profile | !TrustZone | ARMv4 - id_pfr1 = Param.UInt32(0x00000001, "Processor Feature Register 1") + # !Timer | Virti | !M Profile | TrustZone | ARMv4 + id_pfr1 = Param.UInt32(0x00001011, "Processor Feature Register 1") # See section B4.1.89 - B4.1.92 of the ARM ARM # VMSAv7 support - id_mmfr0 = Param.UInt32(0x00000003, "Memory Model Feature Register 0") + id_mmfr0 = Param.UInt32(0x10201103, "Memory Model Feature Register 0") id_mmfr1 = Param.UInt32(0x00000000, "Memory Model Feature Register 1") # no HW access | WFI stalling | ISB and DSB | # all TLB maintenance | no Harvard id_mmfr2 = Param.UInt32(0x01230000, "Memory Model Feature Register 2") # SuperSec | Coherent TLB | Bcast Maint | # BP Maint | Cache Maint Set/way | Cache Maint MVA - id_mmfr3 = Param.UInt32(0xF0102211, "Memory Model Feature Register 3") + id_mmfr3 = Param.UInt32(0x02102211, "Memory Model Feature Register 3") # See section B4.1.84 of ARM ARM # All values are latest for ARMv7-A profile @@ -79,5 +78,40 @@ class ArmISA(SimObject): id_isar4 = Param.UInt32(0x10010142, "Instruction Set Attribute Register 4") id_isar5 = Param.UInt32(0x00000000, "Instruction Set Attribute Register 5") + fpsid = Param.UInt32(0x410430a0, "Floating-point System ID Register") + + # [31:0] is implementation defined + id_aa64afr0_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Auxiliary Feature Register 0") + # Reserved for future expansion + id_aa64afr1_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Auxiliary Feature Register 1") + + # 1 CTX CMPs | 2 WRPs | 2 BRPs | !PMU | !Trace | Debug v8-A + id_aa64dfr0_el1 = Param.UInt64(0x0000000000101006, + "AArch64 Debug Feature Register 0") + # Reserved for future expansion + id_aa64dfr1_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Debug Feature Register 1") + + # !CRC32 | !SHA2 | !SHA1 | !AES + id_aa64isar0_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Instruction Set Attribute Register 0") + # Reserved for future expansion + id_aa64isar1_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Instruction Set Attribute Register 1") + + # 4K | 64K | !16K | !BigEndEL0 | !SNSMem | !BigEnd | 8b ASID | 40b PA + id_aa64mmfr0_el1 = Param.UInt64(0x0000000000f00002, + "AArch64 Memory Model Feature Register 0") + # Reserved for future expansion + id_aa64mmfr1_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Memory Model Feature Register 1") - fpsid = Param.UInt32(0x410430A0, "Floating-point System ID Register") + # !GICv3 CP15 | AdvSIMD | FP | !EL3 | !EL2 | EL1 (AArch64) | EL0 (AArch64) + # (no AArch32/64 interprocessing support for now) + id_aa64pfr0_el1 = Param.UInt64(0x0000000000000011, + "AArch64 Processor Feature Register 0") + # Reserved for future expansion + id_aa64pfr1_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Processor Feature Register 1") diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py index b48c2a29d..39b7ec8ff 100644 --- a/src/arch/arm/ArmSystem.py +++ b/src/arch/arm/ArmSystem.py @@ -1,4 +1,4 @@ -# Copyright (c) 2009 ARM Limited +# Copyright (c) 2009, 2012-2013 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -44,7 +44,8 @@ class ArmMachineType(Enum): 'RealView_PBX' : 1901, 'VExpress_ELT' : 2272, 'VExpress_CA9' : 2272, - 'VExpress_EMM' : 2272} + 'VExpress_EMM' : 2272, + 'VExpress_EMM64' : 2272} class ArmSystem(System): type = 'ArmSystem' @@ -54,6 +55,23 @@ class ArmSystem(System): boot_loader = Param.String("", "File that contains the boot loader code if any") gic_cpu_addr = Param.Addr(0, "Addres of the GIC CPU interface") flags_addr = Param.Addr(0, "Address of the flags register for MP booting") + have_security = Param.Bool(False, + "True if Security Extensions are implemented") + have_virtualization = Param.Bool(False, + "True if Virtualization Extensions are implemented") + have_lpae = Param.Bool(False, "True if LPAE is implemented") + have_generic_timer = Param.Bool(False, + "True if the Generic Timer extension is implemented") + highest_el_is_64 = Param.Bool(False, + "True if the register width of the highest implemented exception level " + "is 64 bits (ARMv8)") + reset_addr_64 = Param.UInt64(0x0, + "Reset address if the highest implemented exception level is 64 bits " + "(ARMv8)") + phys_addr_range_64 = Param.UInt8(40, + "Supported physical address range in bits when using AArch64 (ARMv8)") + have_large_asid_64 = Param.Bool(False, + "True if ASID is 16 bits in AArch64 (ARMv8)") class LinuxArmSystem(ArmSystem): type = 'LinuxArmSystem' @@ -61,8 +79,10 @@ class LinuxArmSystem(ArmSystem): load_addr_mask = 0x0fffffff machine_type = Param.ArmMachineType('RealView_PBX', "Machine id from http://www.arm.linux.org.uk/developer/machines/") - atags_addr = Param.Addr(0x100, - "Address where default atags structure should be written") + atags_addr = Param.Addr("Address where default atags structure should " \ + "be written") + boot_release_addr = Param.Addr(0xfff8, "Address where secondary CPUs " \ + "spin waiting boot in the loader") dtb_filename = Param.String("", "File that contains the Device Tree Blob. Don't use DTB if empty.") early_kernel_symbols = Param.Bool(False, diff --git a/src/arch/arm/ArmTLB.py b/src/arch/arm/ArmTLB.py index c70dd80c8..01ac8016a 100644 --- a/src/arch/arm/ArmTLB.py +++ b/src/arch/arm/ArmTLB.py @@ -1,6 +1,6 @@ # -*- mode:python -*- -# Copyright (c) 2009 ARM Limited +# Copyright (c) 2009, 2013 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -42,10 +42,12 @@ from m5.params import * from m5.proxy import * from MemObject import MemObject +# Basic stage 1 translation objects class ArmTableWalker(MemObject): type = 'ArmTableWalker' cxx_class = 'ArmISA::TableWalker' cxx_header = "arch/arm/table_walker.hh" + is_stage2 = Param.Bool(False, "Is this object for stage 2 translation?") port = MasterPort("Port for TableWalker to do walk the translation with") sys = Param.System(Parent.any, "system object parameter") num_squash_per_cycle = Param.Unsigned(2, @@ -57,3 +59,28 @@ class ArmTLB(SimObject): cxx_header = "arch/arm/tlb.hh" size = Param.Int(64, "TLB size") walker = Param.ArmTableWalker(ArmTableWalker(), "HW Table walker") + is_stage2 = Param.Bool(False, "Is this a stage 2 TLB?") + +# Stage 2 translation objects, only used when virtualisation is being used +class ArmStage2TableWalker(ArmTableWalker): + is_stage2 = True + +class ArmStage2TLB(ArmTLB): + size = 32 + walker = ArmStage2TableWalker() + is_stage2 = True + +class ArmStage2MMU(SimObject): + type = 'ArmStage2MMU' + cxx_class = 'ArmISA::Stage2MMU' + cxx_header = 'arch/arm/stage2_mmu.hh' + tlb = Param.ArmTLB("Stage 1 TLB") + stage2_tlb = Param.ArmTLB("Stage 2 TLB") + +class ArmStage2IMMU(ArmStage2MMU): + tlb = Parent.itb + stage2_tlb = ArmStage2TLB(walker = ArmStage2TableWalker()) + +class ArmStage2DMMU(ArmStage2MMU): + tlb = Parent.dtb + stage2_tlb = ArmStage2TLB(walker = ArmStage2TableWalker()) diff --git a/src/arch/arm/SConscript b/src/arch/arm/SConscript index 8d13a9b2d..aa9ce417b 100644 --- a/src/arch/arm/SConscript +++ b/src/arch/arm/SConscript @@ -1,6 +1,6 @@ # -*- mode:python -*- -# Copyright (c) 2009 ARM Limited +# Copyright (c) 2009, 2012-2013 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -49,12 +49,17 @@ if env['TARGET_ISA'] == 'arm': Dir('isa/formats') Source('decoder.cc') Source('faults.cc') + Source('insts/branch64.cc') + Source('insts/data64.cc') Source('insts/macromem.cc') Source('insts/mem.cc') + Source('insts/mem64.cc') Source('insts/misc.cc') + Source('insts/misc64.cc') Source('insts/pred_inst.cc') Source('insts/static_inst.cc') Source('insts/vfp.cc') + Source('insts/fplib.cc') Source('interrupts.cc') Source('isa.cc') Source('linux/linux.cc') @@ -67,6 +72,8 @@ if env['TARGET_ISA'] == 'arm': Source('stacktrace.cc') Source('system.cc') Source('table_walker.cc') + Source('stage2_mmu.cc') + Source('stage2_lookup.cc') Source('tlb.cc') Source('utility.cc') Source('vtophys.cc') diff --git a/src/arch/arm/decoder.cc b/src/arch/arm/decoder.cc index e957ce0e7..940d85b8e 100644 --- a/src/arch/arm/decoder.cc +++ b/src/arch/arm/decoder.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2012-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2012 Google * All rights reserved. * @@ -47,9 +59,11 @@ Decoder::process() if (!emi.thumb) { emi.instBits = data; - emi.sevenAndFour = bits(data, 7) && bits(data, 4); - emi.isMisc = (bits(data, 24, 23) == 0x2 && - bits(data, 20) == 0); + if (!emi.aarch64) { + emi.sevenAndFour = bits(data, 7) && bits(data, 4); + emi.isMisc = (bits(data, 24, 23) == 0x2 && + bits(data, 20) == 0); + } consumeBytes(4); DPRINTF(Decoder, "Arm inst: %#x.\n", (uint64_t)emi); } else { @@ -112,6 +126,7 @@ Decoder::moreBytes(const PCState &pc, Addr fetchPC, MachInst inst) data = inst; offset = (fetchPC >= pc.instAddr()) ? 0 : pc.instAddr() - fetchPC; emi.thumb = pc.thumb(); + emi.aarch64 = pc.aarch64(); emi.fpscrLen = fpscrLen; emi.fpscrStride = fpscrStride; diff --git a/src/arch/arm/decoder.hh b/src/arch/arm/decoder.hh index 72776bcfd..315a3b6ad 100644 --- a/src/arch/arm/decoder.hh +++ b/src/arch/arm/decoder.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2012 Google * All rights reserved. * diff --git a/src/arch/arm/faults.cc b/src/arch/arm/faults.cc index be1c7ecc2..f8313efd2 100644 --- a/src/arch/arm/faults.cc +++ b/src/arch/arm/faults.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -40,9 +40,15 @@ * * Authors: Ali Saidi * Gabe Black + * Giacomo Gabrielli + * Thomas Grocutt */ #include "arch/arm/faults.hh" +#include "arch/arm/system.hh" +#include "arch/arm/utility.hh" +#include "arch/arm/insts/static_inst.hh" +#include "base/compiler.hh" #include "base/trace.hh" #include "cpu/base.hh" #include "cpu/thread_context.hh" @@ -52,61 +58,413 @@ namespace ArmISA { -template<> ArmFault::FaultVals ArmFaultVals::vals = -{"reset", 0x00, MODE_SVC, 0, 0, true, true, FaultStat()}; +uint8_t ArmFault::shortDescFaultSources[] = { + 0x01, // AlignmentFault + 0x04, // InstructionCacheMaintenance + 0xff, // SynchExtAbtOnTranslTableWalkL0 (INVALID) + 0x0c, // SynchExtAbtOnTranslTableWalkL1 + 0x0e, // SynchExtAbtOnTranslTableWalkL2 + 0xff, // SynchExtAbtOnTranslTableWalkL3 (INVALID) + 0xff, // SynchPtyErrOnTranslTableWalkL0 (INVALID) + 0x1c, // SynchPtyErrOnTranslTableWalkL1 + 0x1e, // SynchPtyErrOnTranslTableWalkL2 + 0xff, // SynchPtyErrOnTranslTableWalkL3 (INVALID) + 0xff, // TranslationL0 (INVALID) + 0x05, // TranslationL1 + 0x07, // TranslationL2 + 0xff, // TranslationL3 (INVALID) + 0xff, // AccessFlagL0 (INVALID) + 0x03, // AccessFlagL1 + 0x06, // AccessFlagL2 + 0xff, // AccessFlagL3 (INVALID) + 0xff, // DomainL0 (INVALID) + 0x09, // DomainL1 + 0x0b, // DomainL2 + 0xff, // DomainL3 (INVALID) + 0xff, // PermissionL0 (INVALID) + 0x0d, // PermissionL1 + 0x0f, // PermissionL2 + 0xff, // PermissionL3 (INVALID) + 0x02, // DebugEvent + 0x08, // SynchronousExternalAbort + 0x10, // TLBConflictAbort + 0x19, // SynchPtyErrOnMemoryAccess + 0x16, // AsynchronousExternalAbort + 0x18, // AsynchPtyErrOnMemoryAccess + 0xff, // AddressSizeL0 (INVALID) + 0xff, // AddressSizeL1 (INVALID) + 0xff, // AddressSizeL2 (INVALID) + 0xff, // AddressSizeL3 (INVALID) + 0x40, // PrefetchTLBMiss + 0x80 // PrefetchUncacheable +}; -template<> ArmFault::FaultVals ArmFaultVals::vals = -{"Undefined Instruction", 0x04, MODE_UNDEFINED, 4 ,2, false, false, - FaultStat()} ; +static_assert(sizeof(ArmFault::shortDescFaultSources) == + ArmFault::NumFaultSources, + "Invalid size of ArmFault::shortDescFaultSources[]"); -template<> ArmFault::FaultVals ArmFaultVals::vals = -{"Supervisor Call", 0x08, MODE_SVC, 4, 2, false, false, FaultStat()}; +uint8_t ArmFault::longDescFaultSources[] = { + 0x21, // AlignmentFault + 0xff, // InstructionCacheMaintenance (INVALID) + 0xff, // SynchExtAbtOnTranslTableWalkL0 (INVALID) + 0x15, // SynchExtAbtOnTranslTableWalkL1 + 0x16, // SynchExtAbtOnTranslTableWalkL2 + 0x17, // SynchExtAbtOnTranslTableWalkL3 + 0xff, // SynchPtyErrOnTranslTableWalkL0 (INVALID) + 0x1d, // SynchPtyErrOnTranslTableWalkL1 + 0x1e, // SynchPtyErrOnTranslTableWalkL2 + 0x1f, // SynchPtyErrOnTranslTableWalkL3 + 0xff, // TranslationL0 (INVALID) + 0x05, // TranslationL1 + 0x06, // TranslationL2 + 0x07, // TranslationL3 + 0xff, // AccessFlagL0 (INVALID) + 0x09, // AccessFlagL1 + 0x0a, // AccessFlagL2 + 0x0b, // AccessFlagL3 + 0xff, // DomainL0 (INVALID) + 0x3d, // DomainL1 + 0x3e, // DomainL2 + 0xff, // DomainL3 (RESERVED) + 0xff, // PermissionL0 (INVALID) + 0x0d, // PermissionL1 + 0x0e, // PermissionL2 + 0x0f, // PermissionL3 + 0x22, // DebugEvent + 0x10, // SynchronousExternalAbort + 0x30, // TLBConflictAbort + 0x18, // SynchPtyErrOnMemoryAccess + 0x11, // AsynchronousExternalAbort + 0x19, // AsynchPtyErrOnMemoryAccess + 0xff, // AddressSizeL0 (INVALID) + 0xff, // AddressSizeL1 (INVALID) + 0xff, // AddressSizeL2 (INVALID) + 0xff, // AddressSizeL3 (INVALID) + 0x40, // PrefetchTLBMiss + 0x80 // PrefetchUncacheable +}; -template<> ArmFault::FaultVals ArmFaultVals::vals = -{"Prefetch Abort", 0x0C, MODE_ABORT, 4, 4, true, false, FaultStat()}; +static_assert(sizeof(ArmFault::longDescFaultSources) == + ArmFault::NumFaultSources, + "Invalid size of ArmFault::longDescFaultSources[]"); -template<> ArmFault::FaultVals ArmFaultVals::vals = -{"Data Abort", 0x10, MODE_ABORT, 8, 8, true, false, FaultStat()}; +uint8_t ArmFault::aarch64FaultSources[] = { + 0x21, // AlignmentFault + 0xff, // InstructionCacheMaintenance (INVALID) + 0x14, // SynchExtAbtOnTranslTableWalkL0 + 0x15, // SynchExtAbtOnTranslTableWalkL1 + 0x16, // SynchExtAbtOnTranslTableWalkL2 + 0x17, // SynchExtAbtOnTranslTableWalkL3 + 0x1c, // SynchPtyErrOnTranslTableWalkL0 + 0x1d, // SynchPtyErrOnTranslTableWalkL1 + 0x1e, // SynchPtyErrOnTranslTableWalkL2 + 0x1f, // SynchPtyErrOnTranslTableWalkL3 + 0x04, // TranslationL0 + 0x05, // TranslationL1 + 0x06, // TranslationL2 + 0x07, // TranslationL3 + 0x08, // AccessFlagL0 + 0x09, // AccessFlagL1 + 0x0a, // AccessFlagL2 + 0x0b, // AccessFlagL3 + // @todo: Section & Page Domain Fault in AArch64? + 0xff, // DomainL0 (INVALID) + 0xff, // DomainL1 (INVALID) + 0xff, // DomainL2 (INVALID) + 0xff, // DomainL3 (INVALID) + 0x0c, // PermissionL0 + 0x0d, // PermissionL1 + 0x0e, // PermissionL2 + 0x0f, // PermissionL3 + 0xff, // DebugEvent (INVALID) + 0x10, // SynchronousExternalAbort + 0x30, // TLBConflictAbort + 0x18, // SynchPtyErrOnMemoryAccess + 0xff, // AsynchronousExternalAbort (INVALID) + 0xff, // AsynchPtyErrOnMemoryAccess (INVALID) + 0x00, // AddressSizeL0 + 0x01, // AddressSizeL1 + 0x02, // AddressSizeL2 + 0x03, // AddressSizeL3 + 0x40, // PrefetchTLBMiss + 0x80 // PrefetchUncacheable +}; -template<> ArmFault::FaultVals ArmFaultVals::vals = -{"IRQ", 0x18, MODE_IRQ, 4, 4, true, false, FaultStat()}; +static_assert(sizeof(ArmFault::aarch64FaultSources) == + ArmFault::NumFaultSources, + "Invalid size of ArmFault::aarch64FaultSources[]"); -template<> ArmFault::FaultVals ArmFaultVals::vals = -{"FIQ", 0x1C, MODE_FIQ, 4, 4, true, true, FaultStat()}; +// Fields: name, offset, cur{ELT,ELH}Offset, lowerEL{64,32}Offset, next mode, +// {ARM, Thumb, ARM_ELR, Thumb_ELR} PC offset, hyp trap, +// {A, F} disable, class, stat +template<> ArmFault::FaultVals ArmFaultVals::vals = { + // Some dummy values (the reset vector has an IMPLEMENTATION DEFINED + // location in AArch64) + "Reset", 0x000, 0x000, 0x000, 0x000, 0x000, MODE_SVC, + 0, 0, 0, 0, false, true, true, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + "Undefined Instruction", 0x004, 0x000, 0x200, 0x400, 0x600, MODE_UNDEFINED, + 4, 2, 0, 0, true, false, false, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + "Supervisor Call", 0x008, 0x000, 0x200, 0x400, 0x600, MODE_SVC, + 4, 2, 4, 2, true, false, false, EC_SVC_TO_HYP, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + "Secure Monitor Call", 0x008, 0x000, 0x200, 0x400, 0x600, MODE_MON, + 4, 4, 4, 4, false, true, true, EC_SMC_TO_HYP, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + "Hypervisor Call", 0x008, 0x000, 0x200, 0x400, 0x600, MODE_HYP, + 4, 4, 4, 4, true, false, false, EC_HVC, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + "Prefetch Abort", 0x00C, 0x000, 0x200, 0x400, 0x600, MODE_ABORT, + 4, 4, 0, 0, true, true, false, EC_PREFETCH_ABORT_TO_HYP, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + "Data Abort", 0x010, 0x000, 0x200, 0x400, 0x600, MODE_ABORT, + 8, 8, 0, 0, true, true, false, EC_DATA_ABORT_TO_HYP, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + "Virtual Data Abort", 0x010, 0x000, 0x200, 0x400, 0x600, MODE_ABORT, + 8, 8, 0, 0, true, true, false, EC_INVALID, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + // @todo: double check these values + "Hypervisor Trap", 0x014, 0x000, 0x200, 0x400, 0x600, MODE_HYP, + 0, 0, 0, 0, false, false, false, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + "IRQ", 0x018, 0x080, 0x280, 0x480, 0x680, MODE_IRQ, + 4, 4, 0, 0, false, true, false, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + "Virtual IRQ", 0x018, 0x080, 0x280, 0x480, 0x680, MODE_IRQ, + 4, 4, 0, 0, false, true, false, EC_INVALID, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + "FIQ", 0x01C, 0x100, 0x300, 0x500, 0x700, MODE_FIQ, + 4, 4, 0, 0, false, true, true, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + "Virtual FIQ", 0x01C, 0x100, 0x300, 0x500, 0x700, MODE_FIQ, + 4, 4, 0, 0, false, true, true, EC_INVALID, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + // Some dummy values (SupervisorTrap is AArch64-only) + "Supervisor Trap", 0x014, 0x000, 0x200, 0x400, 0x600, MODE_SVC, + 0, 0, 0, 0, false, false, false, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + // Some dummy values (SecureMonitorTrap is AArch64-only) + "Secure Monitor Trap", 0x014, 0x000, 0x200, 0x400, 0x600, MODE_MON, + 0, 0, 0, 0, false, false, false, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + // Some dummy values (PCAlignmentFault is AArch64-only) + "PC Alignment Fault", 0x000, 0x000, 0x200, 0x400, 0x600, MODE_SVC, + 0, 0, 0, 0, true, false, false, EC_PC_ALIGNMENT, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + // Some dummy values (SPAlignmentFault is AArch64-only) + "SP Alignment Fault", 0x000, 0x000, 0x200, 0x400, 0x600, MODE_SVC, + 0, 0, 0, 0, true, false, false, EC_STACK_PTR_ALIGNMENT, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + // Some dummy values (SError is AArch64-only) + "SError", 0x000, 0x180, 0x380, 0x580, 0x780, MODE_SVC, + 0, 0, 0, 0, false, true, true, EC_SERROR, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + // Some dummy values + "Pipe Flush", 0x000, 0x000, 0x000, 0x000, 0x000, MODE_SVC, + 0, 0, 0, 0, false, true, true, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + // Some dummy values + "ArmSev Flush", 0x000, 0x000, 0x000, 0x000, 0x000, MODE_SVC, + 0, 0, 0, 0, false, true, true, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals::vals = { + // Some dummy values (SPAlignmentFault is AArch64-only) + "Illegal Inst Set State Fault", 0x000, 0x000, 0x200, 0x400, 0x600, MODE_SVC, + 0, 0, 0, 0, true, false, false, EC_ILLEGAL_INST, FaultStat() +}; -template<> ArmFault::FaultVals ArmFaultVals::vals = -{"Pipe Flush", 0x00, MODE_SVC, 0, 0, true, true, FaultStat()}; // dummy values - -template<> ArmFault::FaultVals ArmFaultVals::vals = -{"ArmSev Flush", 0x00, MODE_SVC, 0, 0, true, true, FaultStat()}; // dummy values -Addr +Addr ArmFault::getVector(ThreadContext *tc) { - // ARM ARM B1-3 + Addr base; - SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR); + // ARM ARM issue C B1.8.1 + bool haveSecurity = ArmSystem::haveSecurity(tc); // panic if SCTLR.VE because I have no idea what to do with vectored // interrupts + SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR); assert(!sctlr.ve); + // Check for invalid modes + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + assert(haveSecurity || cpsr.mode != MODE_MON); + assert(ArmSystem::haveVirtualization(tc) || cpsr.mode != MODE_HYP); - if (!sctlr.v) - return offset(); - return offset() + HighVecs; + switch (cpsr.mode) + { + case MODE_MON: + base = tc->readMiscReg(MISCREG_MVBAR); + break; + case MODE_HYP: + base = tc->readMiscReg(MISCREG_HVBAR); + break; + default: + if (sctlr.v) { + base = HighVecs; + } else { + base = haveSecurity ? tc->readMiscReg(MISCREG_VBAR) : 0; + } + break; + } + return base + offset(tc); +} +Addr +ArmFault::getVector64(ThreadContext *tc) +{ + Addr vbar; + switch (toEL) { + case EL3: + assert(ArmSystem::haveSecurity(tc)); + vbar = tc->readMiscReg(MISCREG_VBAR_EL3); + break; + // @todo: uncomment this to enable Virtualization + // case EL2: + // assert(ArmSystem::haveVirtualization(tc)); + // vbar = tc->readMiscReg(MISCREG_VBAR_EL2); + // break; + case EL1: + vbar = tc->readMiscReg(MISCREG_VBAR_EL1); + break; + default: + panic("Invalid target exception level"); + break; + } + return vbar + offset64(); } -void +MiscRegIndex +ArmFault::getSyndromeReg64() const +{ + switch (toEL) { + case EL1: + return MISCREG_ESR_EL1; + case EL2: + return MISCREG_ESR_EL2; + case EL3: + return MISCREG_ESR_EL3; + default: + panic("Invalid exception level"); + break; + } +} + +MiscRegIndex +ArmFault::getFaultAddrReg64() const +{ + switch (toEL) { + case EL1: + return MISCREG_FAR_EL1; + case EL2: + return MISCREG_FAR_EL2; + case EL3: + return MISCREG_FAR_EL3; + default: + panic("Invalid exception level"); + break; + } +} + +void +ArmFault::setSyndrome(ThreadContext *tc, MiscRegIndex syndrome_reg) +{ + uint32_t value; + uint32_t exc_class = (uint32_t) ec(tc); + uint32_t issVal = iss(); + assert(!from64 || ArmSystem::highestELIs64(tc)); + + value = exc_class << 26; + + // HSR.IL not valid for Prefetch Aborts (0x20, 0x21) and Data Aborts (0x24, + // 0x25) for which the ISS information is not valid (ARMv7). + // @todo: ARMv8 revises AArch32 functionality: when HSR.IL is not + // valid it is treated as RES1. + if (to64) { + value |= 1 << 25; + } else if ((bits(exc_class, 5, 3) != 4) || + (bits(exc_class, 2) && bits(issVal, 24))) { + if (!machInst.thumb || machInst.bigThumb) + value |= 1 << 25; + } + // Condition code valid for EC[5:4] nonzero + if (!from64 && ((bits(exc_class, 5, 4) == 0) && + (bits(exc_class, 3, 0) != 0))) { + if (!machInst.thumb) { + uint32_t cond; + ConditionCode condCode = (ConditionCode) (uint32_t) machInst.condCode; + // If its on unconditional instruction report with a cond code of + // 0xE, ie the unconditional code + cond = (condCode == COND_UC) ? COND_AL : condCode; + value |= cond << 20; + value |= 1 << 24; + } + value |= bits(issVal, 19, 0); + } else { + value |= issVal; + } + tc->setMiscReg(syndrome_reg, value); +} + +void ArmFault::invoke(ThreadContext *tc, StaticInstPtr inst) { - // ARM ARM B1.6.3 + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + + if (ArmSystem::highestELIs64(tc)) { // ARMv8 + // Determine source exception level and mode + fromMode = (OperatingMode) (uint8_t) cpsr.mode; + fromEL = opModeToEL(fromMode); + if (opModeIs64(fromMode)) + from64 = true; + + // Determine target exception level + if (ArmSystem::haveSecurity(tc) && routeToMonitor(tc)) + toEL = EL3; + else + toEL = opModeToEL(nextMode()); + if (fromEL > toEL) + toEL = fromEL; + + if (toEL == ArmSystem::highestEL(tc) || ELIs64(tc, toEL)) { + // Invoke exception handler in AArch64 state + to64 = true; + invoke64(tc, inst); + return; + } + } + + // ARMv7 (ARM ARM issue C B1.9) + + bool have_security = ArmSystem::haveSecurity(tc); + bool have_virtualization = ArmSystem::haveVirtualization(tc); + FaultBase::invoke(tc); if (!FullSystem) return; countStat()++; SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR); - CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + SCR scr = tc->readMiscReg(MISCREG_SCR); CPSR saved_cpsr = tc->readMiscReg(MISCREG_CPSR); saved_cpsr.nz = tc->readIntReg(INTREG_CONDCODES_NZ); saved_cpsr.c = tc->readIntReg(INTREG_CONDCODES_C); @@ -118,22 +476,73 @@ ArmFault::invoke(ThreadContext *tc, StaticInstPtr inst) saved_cpsr.it2 = it.top6; saved_cpsr.it1 = it.bottom2; - cpsr.mode = nextMode(); + // if we have a valid instruction then use it to annotate this fault with + // extra information. This is used to generate the correct fault syndrome + // information + if (inst) { + ArmStaticInst *armInst = reinterpret_cast(inst.get()); + armInst->annotateFault(this); + } + + if (have_security && routeToMonitor(tc)) + cpsr.mode = MODE_MON; + else if (have_virtualization && routeToHyp(tc)) + cpsr.mode = MODE_HYP; + else + cpsr.mode = nextMode(); + + // Ensure Secure state if initially in Monitor mode + if (have_security && saved_cpsr.mode == MODE_MON) { + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + if (scr.ns) { + scr.ns = 0; + tc->setMiscRegNoEffect(MISCREG_SCR, scr); + } + } + + // some bits are set differently if we have been routed to hyp mode + if (cpsr.mode == MODE_HYP) { + SCTLR hsctlr = tc->readMiscReg(MISCREG_HSCTLR); + cpsr.t = hsctlr.te; + cpsr.e = hsctlr.ee; + if (!scr.ea) {cpsr.a = 1;} + if (!scr.fiq) {cpsr.f = 1;} + if (!scr.irq) {cpsr.i = 1;} + } else if (cpsr.mode == MODE_MON) { + // Special case handling when entering monitor mode + cpsr.t = sctlr.te; + cpsr.e = sctlr.ee; + cpsr.a = 1; + cpsr.f = 1; + cpsr.i = 1; + } else { + cpsr.t = sctlr.te; + cpsr.e = sctlr.ee; + + // The *Disable functions are virtual and different per fault + cpsr.a = cpsr.a | abortDisable(tc); + cpsr.f = cpsr.f | fiqDisable(tc); + cpsr.i = 1; + } cpsr.it1 = cpsr.it2 = 0; cpsr.j = 0; - - cpsr.t = sctlr.te; - cpsr.a = cpsr.a | abortDisable(); - cpsr.f = cpsr.f | fiqDisable(); - cpsr.i = 1; - cpsr.e = sctlr.ee; tc->setMiscReg(MISCREG_CPSR, cpsr); + // Make sure mailbox sets to one always tc->setMiscReg(MISCREG_SEV_MAILBOX, 1); - tc->setIntReg(INTREG_LR, curPc + - (saved_cpsr.t ? thumbPcOffset() : armPcOffset())); - switch (nextMode()) { + // Clear the exclusive monitor + tc->setMiscReg(MISCREG_LOCKFLAG, 0); + + if (cpsr.mode == MODE_HYP) { + tc->setMiscReg(MISCREG_ELR_HYP, curPc + + (saved_cpsr.t ? thumbPcOffset(true) : armPcOffset(true))); + } else { + tc->setIntReg(INTREG_LR, curPc + + (saved_cpsr.t ? thumbPcOffset(false) : armPcOffset(false))); + } + + switch (cpsr.mode) { case MODE_FIQ: tc->setMiscReg(MISCREG_SPSR_FIQ, saved_cpsr); break; @@ -143,12 +552,23 @@ ArmFault::invoke(ThreadContext *tc, StaticInstPtr inst) case MODE_SVC: tc->setMiscReg(MISCREG_SPSR_SVC, saved_cpsr); break; - case MODE_UNDEFINED: - tc->setMiscReg(MISCREG_SPSR_UND, saved_cpsr); + case MODE_MON: + assert(have_security); + tc->setMiscReg(MISCREG_SPSR_MON, saved_cpsr); break; case MODE_ABORT: tc->setMiscReg(MISCREG_SPSR_ABT, saved_cpsr); break; + case MODE_UNDEFINED: + tc->setMiscReg(MISCREG_SPSR_UND, saved_cpsr); + if (ec(tc) != EC_UNKNOWN) + setSyndrome(tc, MISCREG_HSR); + break; + case MODE_HYP: + assert(have_virtualization); + tc->setMiscReg(MISCREG_SPSR_HYP, saved_cpsr); + setSyndrome(tc, MISCREG_HSR); + break; default: panic("unknown Mode\n"); } @@ -161,7 +581,100 @@ ArmFault::invoke(ThreadContext *tc, StaticInstPtr inst) pc.nextThumb(pc.thumb()); pc.jazelle(cpsr.j); pc.nextJazelle(pc.jazelle()); + pc.aarch64(!cpsr.width); + pc.nextAArch64(!cpsr.width); + tc->pcState(pc); +} + +void +ArmFault::invoke64(ThreadContext *tc, StaticInstPtr inst) +{ + // Determine actual misc. register indices for ELR_ELx and SPSR_ELx + MiscRegIndex elr_idx, spsr_idx; + switch (toEL) { + case EL1: + elr_idx = MISCREG_ELR_EL1; + spsr_idx = MISCREG_SPSR_EL1; + break; + // @todo: uncomment this to enable Virtualization + // case EL2: + // assert(ArmSystem::haveVirtualization()); + // elr_idx = MISCREG_ELR_EL2; + // spsr_idx = MISCREG_SPSR_EL2; + // break; + case EL3: + assert(ArmSystem::haveSecurity(tc)); + elr_idx = MISCREG_ELR_EL3; + spsr_idx = MISCREG_SPSR_EL3; + break; + default: + panic("Invalid target exception level"); + break; + } + + // Save process state into SPSR_ELx + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + CPSR spsr = cpsr; + spsr.nz = tc->readIntReg(INTREG_CONDCODES_NZ); + spsr.c = tc->readIntReg(INTREG_CONDCODES_C); + spsr.v = tc->readIntReg(INTREG_CONDCODES_V); + if (from64) { + // Force some bitfields to 0 + spsr.q = 0; + spsr.it1 = 0; + spsr.j = 0; + spsr.res0_23_22 = 0; + spsr.ge = 0; + spsr.it2 = 0; + spsr.t = 0; + } else { + spsr.ge = tc->readIntReg(INTREG_CONDCODES_GE); + ITSTATE it = tc->pcState().itstate(); + spsr.it2 = it.top6; + spsr.it1 = it.bottom2; + // Force some bitfields to 0 + spsr.res0_23_22 = 0; + spsr.ss = 0; + } + tc->setMiscReg(spsr_idx, spsr); + + // Save preferred return address into ELR_ELx + Addr curr_pc = tc->pcState().pc(); + Addr ret_addr = curr_pc; + if (from64) + ret_addr += armPcElrOffset(); + else + ret_addr += spsr.t ? thumbPcElrOffset() : armPcElrOffset(); + tc->setMiscReg(elr_idx, ret_addr); + + // Update process state + OperatingMode64 mode = 0; + mode.spX = 1; + mode.el = toEL; + mode.width = 0; + cpsr.mode = mode; + cpsr.daif = 0xf; + cpsr.il = 0; + cpsr.ss = 0; + tc->setMiscReg(MISCREG_CPSR, cpsr); + + // Set PC to start of exception handler + Addr new_pc = purifyTaggedAddr(getVector64(tc), tc, toEL); + DPRINTF(Faults, "Invoking Fault (AArch64 target EL):%s cpsr:%#x PC:%#x " + "elr:%#x newVec: %#x\n", name(), cpsr, curr_pc, ret_addr, new_pc); + PCState pc(new_pc); + pc.aarch64(!cpsr.width); + pc.nextAArch64(!cpsr.width); tc->pcState(pc); + + // If we have a valid instruction then use it to annotate this fault with + // extra information. This is used to generate the correct fault syndrome + // information + if (inst) + reinterpret_cast(inst.get())->annotateFault(this); + // Save exception syndrome + if ((nextMode() != MODE_IRQ) && (nextMode() != MODE_FIQ)) + setSyndrome(tc, getSyndromeReg64()); } void @@ -171,7 +684,25 @@ Reset::invoke(ThreadContext *tc, StaticInstPtr inst) tc->getCpuPtr()->clearInterrupts(); tc->clearArchRegs(); } - ArmFault::invoke(tc, inst); + if (!ArmSystem::highestELIs64(tc)) { + ArmFault::invoke(tc, inst); + tc->setMiscReg(MISCREG_VMPIDR, + getMPIDR(dynamic_cast(tc->getSystemPtr()), tc)); + + // Unless we have SMC code to get us there, boot in HYP! + if (ArmSystem::haveVirtualization(tc) && + !ArmSystem::haveSecurity(tc)) { + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + cpsr.mode = MODE_HYP; + tc->setMiscReg(MISCREG_CPSR, cpsr); + } + } else { + // Advance the PC to the IMPLEMENTATION DEFINED reset value + PCState pc = ArmSystem::resetAddr64(tc); + pc.aarch64(true); + pc.nextAArch64(true); + tc->pcState(pc); + } } void @@ -196,6 +727,45 @@ UndefinedInstruction::invoke(ThreadContext *tc, StaticInstPtr inst) } } +bool +UndefinedInstruction::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + + // if in Hyp mode then stay in Hyp mode + toHyp = scr.ns && (cpsr.mode == MODE_HYP); + // if HCR.TGE is set to 1, take to Hyp mode through Hyp Trap vector + toHyp |= !inSecureState(scr, cpsr) && hcr.tge && (cpsr.mode == MODE_USER); + return toHyp; +} + +uint32_t +UndefinedInstruction::iss() const +{ + if (overrideEc == EC_INVALID) + return issRaw; + + uint32_t new_iss = 0; + uint32_t op0, op1, op2, CRn, CRm, Rt, dir; + + dir = bits(machInst, 21, 21); + op0 = bits(machInst, 20, 19); + op1 = bits(machInst, 18, 16); + CRn = bits(machInst, 15, 12); + CRm = bits(machInst, 11, 8); + op2 = bits(machInst, 7, 5); + Rt = bits(machInst, 4, 0); + + new_iss = op0 << 20 | op2 << 17 | op1 << 14 | CRn << 10 | + Rt << 5 | CRm << 1 | dir; + + return new_iss; +} + void SupervisorCall::invoke(ThreadContext *tc, StaticInstPtr inst) { @@ -207,7 +777,12 @@ SupervisorCall::invoke(ThreadContext *tc, StaticInstPtr inst) // As of now, there isn't a 32 bit thumb version of this instruction. assert(!machInst.bigThumb); uint32_t callNum; - callNum = tc->readIntReg(INTREG_R7); + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + OperatingMode mode = (OperatingMode)(uint8_t)cpsr.mode; + if (opModeIs64(mode)) + callNum = tc->readIntReg(INTREG_X8); + else + callNum = tc->readIntReg(INTREG_R7); tc->syscall(callNum); // Advance the PC since that won't happen automatically. @@ -217,21 +792,593 @@ SupervisorCall::invoke(ThreadContext *tc, StaticInstPtr inst) tc->pcState(pc); } +bool +SupervisorCall::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + + // if in Hyp mode then stay in Hyp mode + toHyp = scr.ns && (cpsr.mode == MODE_HYP); + // if HCR.TGE is set to 1, take to Hyp mode through Hyp Trap vector + toHyp |= !inSecureState(scr, cpsr) && hcr.tge && (cpsr.mode == MODE_USER); + return toHyp; +} + +ExceptionClass +SupervisorCall::ec(ThreadContext *tc) const +{ + return (overrideEc != EC_INVALID) ? overrideEc : + (from64 ? EC_SVC_64 : vals.ec); +} + +uint32_t +SupervisorCall::iss() const +{ + // Even if we have a 24 bit imm from an arm32 instruction then we only use + // the bottom 16 bits for the ISS value (it doesn't hurt for AArch64 SVC). + return issRaw & 0xFFFF; +} + +uint32_t +SecureMonitorCall::iss() const +{ + if (from64) + return bits(machInst, 20, 5); + return 0; +} + +ExceptionClass +UndefinedInstruction::ec(ThreadContext *tc) const +{ + return (overrideEc != EC_INVALID) ? overrideEc : vals.ec; +} + + +HypervisorCall::HypervisorCall(ExtMachInst _machInst, uint32_t _imm) : + ArmFaultVals(_machInst, _imm) +{} + +ExceptionClass +HypervisorTrap::ec(ThreadContext *tc) const +{ + return (overrideEc != EC_INVALID) ? overrideEc : vals.ec; +} + +template +FaultOffset +ArmFaultVals::offset(ThreadContext *tc) +{ + bool isHypTrap = false; + + // Normally we just use the exception vector from the table at the top if + // this file, however if this exception has caused a transition to hype + // mode, and its an exception type that would only do this if it has been + // trapped then we use the hyp trap vector instead of the normal vector + if (vals.hypTrappable) { + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + if (cpsr.mode == MODE_HYP) { + CPSR spsr = tc->readMiscReg(MISCREG_SPSR_HYP); + isHypTrap = spsr.mode != MODE_HYP; + } + } + return isHypTrap ? 0x14 : vals.offset; +} + +// void +// SupervisorCall::setSyndrome64(ThreadContext *tc, MiscRegIndex esr_idx) +// { +// ESR esr = 0; +// esr.ec = machInst.aarch64 ? SvcAArch64 : SvcAArch32; +// esr.il = !machInst.thumb; +// if (machInst.aarch64) +// esr.imm16 = bits(machInst.instBits, 20, 5); +// else if (machInst.thumb) +// esr.imm16 = bits(machInst.instBits, 7, 0); +// else +// esr.imm16 = bits(machInst.instBits, 15, 0); +// tc->setMiscReg(esr_idx, esr); +// } + +void +SecureMonitorCall::invoke(ThreadContext *tc, StaticInstPtr inst) +{ + if (FullSystem) { + ArmFault::invoke(tc, inst); + return; + } +} + +ExceptionClass +SecureMonitorCall::ec(ThreadContext *tc) const +{ + return (from64 ? EC_SMC_64 : vals.ec); +} + +ExceptionClass +SupervisorTrap::ec(ThreadContext *tc) const +{ + return (overrideEc != EC_INVALID) ? overrideEc : vals.ec; +} + +ExceptionClass +SecureMonitorTrap::ec(ThreadContext *tc) const +{ + return (overrideEc != EC_INVALID) ? overrideEc : + (from64 ? EC_SMC_64 : vals.ec); +} + template void AbortFault::invoke(ThreadContext *tc, StaticInstPtr inst) { + if (tranMethod == ArmFault::UnknownTran) { + tranMethod = longDescFormatInUse(tc) ? ArmFault::LpaeTran + : ArmFault::VmsaTran; + + if ((tranMethod == ArmFault::VmsaTran) && this->routeToMonitor(tc)) { + // See ARM ARM B3-1416 + bool override_LPAE = false; + TTBCR ttbcr_s = tc->readMiscReg(MISCREG_TTBCR_S); + TTBCR M5_VAR_USED ttbcr_ns = tc->readMiscReg(MISCREG_TTBCR_NS); + if (ttbcr_s.eae) { + override_LPAE = true; + } else { + // Unimplemented code option, not seen in testing. May need + // extension according to the manual exceprt above. + DPRINTF(Faults, "Warning: Incomplete translation method " + "override detected.\n"); + } + if (override_LPAE) + tranMethod = ArmFault::LpaeTran; + } + } + + if (source == ArmFault::AsynchronousExternalAbort) { + tc->getCpuPtr()->clearInterrupt(INT_ABT, 0); + } + // Get effective fault source encoding + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + FSR fsr = getFsr(tc); + + // source must be determined BEFORE invoking generic routines which will + // try to set hsr etc. and are based upon source! ArmFaultVals::invoke(tc, inst); + + if (cpsr.width) { // AArch32 + if (cpsr.mode == MODE_HYP) { + tc->setMiscReg(T::HFarIndex, faultAddr); + } else if (stage2) { + tc->setMiscReg(MISCREG_HPFAR, (faultAddr >> 8) & ~0xf); + tc->setMiscReg(T::HFarIndex, OVAddr); + } else { + tc->setMiscReg(T::FsrIndex, fsr); + tc->setMiscReg(T::FarIndex, faultAddr); + } + DPRINTF(Faults, "Abort Fault source=%#x fsr=%#x faultAddr=%#x "\ + "tranMethod=%#x\n", source, fsr, faultAddr, tranMethod); + } else { // AArch64 + // Set the FAR register. Nothing else to do if we are in AArch64 state + // because the syndrome register has already been set inside invoke64() + tc->setMiscReg(AbortFault::getFaultAddrReg64(), faultAddr); + } +} + +template +FSR +AbortFault::getFsr(ThreadContext *tc) +{ FSR fsr = 0; - fsr.fsLow = bits(status, 3, 0); - fsr.fsHigh = bits(status, 4); - fsr.domain = domain; - fsr.wnr = (write ? 1 : 0); - fsr.ext = 0; - tc->setMiscReg(T::FsrIndex, fsr); - tc->setMiscReg(T::FarIndex, faultAddr); - DPRINTF(Faults, "Abort Fault fsr=%#x faultAddr=%#x\n", fsr, faultAddr); + if (((CPSR) tc->readMiscRegNoEffect(MISCREG_CPSR)).width) { + // AArch32 + assert(tranMethod != ArmFault::UnknownTran); + if (tranMethod == ArmFault::LpaeTran) { + srcEncoded = ArmFault::longDescFaultSources[source]; + fsr.status = srcEncoded; + fsr.lpae = 1; + } else { + srcEncoded = ArmFault::shortDescFaultSources[source]; + fsr.fsLow = bits(srcEncoded, 3, 0); + fsr.fsHigh = bits(srcEncoded, 4); + fsr.domain = static_cast(domain); + } + fsr.wnr = (write ? 1 : 0); + fsr.ext = 0; + } else { + // AArch64 + srcEncoded = ArmFault::aarch64FaultSources[source]; + } + if (srcEncoded == ArmFault::FaultSourceInvalid) { + panic("Invalid fault source\n"); + } + return fsr; +} + +template +bool +AbortFault::abortDisable(ThreadContext *tc) +{ + if (ArmSystem::haveSecurity(tc)) { + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + return (!scr.ns || scr.aw); + } + return true; +} + +template +void +AbortFault::annotate(ArmFault::AnnotationIDs id, uint64_t val) +{ + switch (id) + { + case ArmFault::S1PTW: + s1ptw = val; + break; + case ArmFault::OVA: + OVAddr = val; + break; + + // Just ignore unknown ID's + default: + break; + } +} + +template +uint32_t +AbortFault::iss() const +{ + uint32_t val; + + val = srcEncoded & 0x3F; + val |= write << 6; + val |= s1ptw << 7; + return (val); +} + +template +bool +AbortFault::isMMUFault() const +{ + // NOTE: Not relying on LL information being aligned to lowest bits here + return + (source == ArmFault::AlignmentFault) || + ((source >= ArmFault::TranslationLL) && + (source < ArmFault::TranslationLL + 4)) || + ((source >= ArmFault::AccessFlagLL) && + (source < ArmFault::AccessFlagLL + 4)) || + ((source >= ArmFault::DomainLL) && + (source < ArmFault::DomainLL + 4)) || + ((source >= ArmFault::PermissionLL) && + (source < ArmFault::PermissionLL + 4)); +} + +ExceptionClass +PrefetchAbort::ec(ThreadContext *tc) const +{ + if (to64) { + // AArch64 + if (toEL == fromEL) + return EC_PREFETCH_ABORT_CURR_EL; + else + return EC_PREFETCH_ABORT_LOWER_EL; + } else { + // AArch32 + // Abort faults have different EC codes depending on whether + // the fault originated within HYP mode, or not. So override + // the method and add the extra adjustment of the EC value. + + ExceptionClass ec = ArmFaultVals::vals.ec; + + CPSR spsr = tc->readMiscReg(MISCREG_SPSR_HYP); + if (spsr.mode == MODE_HYP) { + ec = ((ExceptionClass) (((uint32_t) ec) + 1)); + } + return ec; + } +} + +bool +PrefetchAbort::routeToMonitor(ThreadContext *tc) const +{ + SCR scr = 0; + if (from64) + scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3); + else + scr = tc->readMiscRegNoEffect(MISCREG_SCR); + + return scr.ea && !isMMUFault(); +} + +bool +PrefetchAbort::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + HDCR hdcr = tc->readMiscRegNoEffect(MISCREG_HDCR); + + // if in Hyp mode then stay in Hyp mode + toHyp = scr.ns && (cpsr.mode == MODE_HYP); + // otherwise, check whether to take to Hyp mode through Hyp Trap vector + toHyp |= (stage2 || + ( (source == DebugEvent) && hdcr.tde && (cpsr.mode != MODE_HYP)) || + ( (source == SynchronousExternalAbort) && hcr.tge && (cpsr.mode == MODE_USER)) + ) && !inSecureState(scr, cpsr); + return toHyp; +} + +ExceptionClass +DataAbort::ec(ThreadContext *tc) const +{ + if (to64) { + // AArch64 + if (source == ArmFault::AsynchronousExternalAbort) { + panic("Asynchronous External Abort should be handled with \ + SystemErrors (SErrors)!"); + } + if (toEL == fromEL) + return EC_DATA_ABORT_CURR_EL; + else + return EC_DATA_ABORT_LOWER_EL; + } else { + // AArch32 + // Abort faults have different EC codes depending on whether + // the fault originated within HYP mode, or not. So override + // the method and add the extra adjustment of the EC value. + + ExceptionClass ec = ArmFaultVals::vals.ec; + + CPSR spsr = tc->readMiscReg(MISCREG_SPSR_HYP); + if (spsr.mode == MODE_HYP) { + ec = ((ExceptionClass) (((uint32_t) ec) + 1)); + } + return ec; + } +} + +bool +DataAbort::routeToMonitor(ThreadContext *tc) const +{ + SCR scr = 0; + if (from64) + scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3); + else + scr = tc->readMiscRegNoEffect(MISCREG_SCR); + + return scr.ea && !isMMUFault(); +} + +bool +DataAbort::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + HDCR hdcr = tc->readMiscRegNoEffect(MISCREG_HDCR); + + // if in Hyp mode then stay in Hyp mode + toHyp = scr.ns && (cpsr.mode == MODE_HYP); + // otherwise, check whether to take to Hyp mode through Hyp Trap vector + toHyp |= (stage2 || + ( (cpsr.mode != MODE_HYP) && ( ((source == AsynchronousExternalAbort) && hcr.amo) || + ((source == DebugEvent) && hdcr.tde) ) + ) || + ( (cpsr.mode == MODE_USER) && hcr.tge && + ((source == AlignmentFault) || + (source == SynchronousExternalAbort)) + ) + ) && !inSecureState(scr, cpsr); + return toHyp; +} + +uint32_t +DataAbort::iss() const +{ + uint32_t val; + + // Add on the data abort specific fields to the generic abort ISS value + val = AbortFault::iss(); + // ISS is valid if not caused by a stage 1 page table walk, and when taken + // to AArch64 only when directed to EL2 + if (!s1ptw && (!to64 || toEL == EL2)) { + val |= isv << 24; + if (isv) { + val |= sas << 22; + val |= sse << 21; + val |= srt << 16; + // AArch64 only. These assignments are safe on AArch32 as well + // because these vars are initialized to false + val |= sf << 15; + val |= ar << 14; + } + } + return (val); +} + +void +DataAbort::annotate(AnnotationIDs id, uint64_t val) +{ + AbortFault::annotate(id, val); + switch (id) + { + case SAS: + isv = true; + sas = val; + break; + case SSE: + isv = true; + sse = val; + break; + case SRT: + isv = true; + srt = val; + break; + case SF: + isv = true; + sf = val; + break; + case AR: + isv = true; + ar = val; + break; + // Just ignore unknown ID's + default: + break; + } +} + +void +VirtualDataAbort::invoke(ThreadContext *tc, StaticInstPtr inst) +{ + AbortFault::invoke(tc, inst); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + hcr.va = 0; + tc->setMiscRegNoEffect(MISCREG_HCR, hcr); +} + +bool +Interrupt::routeToMonitor(ThreadContext *tc) const +{ + assert(ArmSystem::haveSecurity(tc)); + SCR scr = 0; + if (from64) + scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3); + else + scr = tc->readMiscRegNoEffect(MISCREG_SCR); + return scr.irq; +} + +bool +Interrupt::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + // Determine whether IRQs are routed to Hyp mode. + toHyp = (!scr.irq && hcr.imo && !inSecureState(scr, cpsr)) || + (cpsr.mode == MODE_HYP); + return toHyp; +} + +bool +Interrupt::abortDisable(ThreadContext *tc) +{ + if (ArmSystem::haveSecurity(tc)) { + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + return (!scr.ns || scr.aw); + } + return true; +} + +VirtualInterrupt::VirtualInterrupt() +{} + +bool +FastInterrupt::routeToMonitor(ThreadContext *tc) const +{ + assert(ArmSystem::haveSecurity(tc)); + SCR scr = 0; + if (from64) + scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3); + else + scr = tc->readMiscRegNoEffect(MISCREG_SCR); + return scr.fiq; +} + +bool +FastInterrupt::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + // Determine whether IRQs are routed to Hyp mode. + toHyp = (!scr.fiq && hcr.fmo && !inSecureState(scr, cpsr)) || + (cpsr.mode == MODE_HYP); + return toHyp; +} + +bool +FastInterrupt::abortDisable(ThreadContext *tc) +{ + if (ArmSystem::haveSecurity(tc)) { + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + return (!scr.ns || scr.aw); + } + return true; +} + +bool +FastInterrupt::fiqDisable(ThreadContext *tc) +{ + if (ArmSystem::haveVirtualization(tc)) { + return true; + } else if (ArmSystem::haveSecurity(tc)) { + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + return (!scr.ns || scr.fw); + } + return true; +} + +VirtualFastInterrupt::VirtualFastInterrupt() +{} + +void +PCAlignmentFault::invoke(ThreadContext *tc, StaticInstPtr inst) +{ + ArmFaultVals::invoke(tc, inst); + assert(from64); + // Set the FAR + tc->setMiscReg(getFaultAddrReg64(), faultPC); +} + +SPAlignmentFault::SPAlignmentFault() +{} + +SystemError::SystemError() +{} + +void +SystemError::invoke(ThreadContext *tc, StaticInstPtr inst) +{ + tc->getCpuPtr()->clearInterrupt(INT_ABT, 0); + ArmFault::invoke(tc, inst); +} + +bool +SystemError::routeToMonitor(ThreadContext *tc) const +{ + assert(ArmSystem::haveSecurity(tc)); + assert(from64); + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3); + return scr.ea; +} + +bool +SystemError::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + assert(from64); + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + + toHyp = (!scr.ea && hcr.amo && !inSecureState(scr, cpsr)) || + (!scr.ea && !scr.rw && !hcr.amo && !inSecureState(scr,cpsr)); + return toHyp; } void @@ -247,11 +1394,6 @@ FlushPipe::invoke(ThreadContext *tc, StaticInstPtr inst) { tc->pcState(pc); } -template void AbortFault::invoke(ThreadContext *tc, - StaticInstPtr inst); -template void AbortFault::invoke(ThreadContext *tc, - StaticInstPtr inst); - void ArmSev::invoke(ThreadContext *tc, StaticInstPtr inst) { DPRINTF(Faults, "Invoking ArmSev Fault\n"); @@ -265,6 +1407,34 @@ ArmSev::invoke(ThreadContext *tc, StaticInstPtr inst) { tc->getCpuPtr()->clearInterrupt(INT_SEV, 0); } -// return via SUBS pc, lr, xxx; rfe, movs, ldm +// Instantiate all the templates to make the linker happy +template class ArmFaultVals; +template class ArmFaultVals; +template class ArmFaultVals; +template class ArmFaultVals; +template class ArmFaultVals; +template class ArmFaultVals; +template class ArmFaultVals; +template class ArmFaultVals; +template class ArmFaultVals; +template class ArmFaultVals; +template class ArmFaultVals; +template class ArmFaultVals; +template class ArmFaultVals; +template class ArmFaultVals; +template class ArmFaultVals; +template class ArmFaultVals; +template class ArmFaultVals; +template class ArmFaultVals; +template class ArmFaultVals; +template class ArmFaultVals; +template class AbortFault; +template class AbortFault; +template class AbortFault; + + +IllegalInstSetStateFault::IllegalInstSetStateFault() +{} + } // namespace ArmISA diff --git a/src/arch/arm/faults.hh b/src/arch/arm/faults.hh index 9858e52ef..a5720f115 100644 --- a/src/arch/arm/faults.hh +++ b/src/arch/arm/faults.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -40,12 +40,15 @@ * * Authors: Ali Saidi * Gabe Black + * Giacomo Gabrielli + * Thomas Grocutt */ #ifndef __ARM_FAULTS_HH__ #define __ARM_FAULTS_HH__ #include "arch/arm/miscregs.hh" +#include "arch/arm/pagetable.hh" #include "arch/arm/types.hh" #include "base/misc.hh" #include "sim/faults.hh" @@ -60,63 +63,146 @@ typedef const Addr FaultOffset; class ArmFault : public FaultBase { protected: + ExtMachInst machInst; + uint32_t issRaw; + + // Helper variables for ARMv8 exception handling + bool from64; // True if the exception is generated from the AArch64 state + bool to64; // True if the exception is taken in AArch64 state + ExceptionLevel fromEL; // Source exception level + ExceptionLevel toEL; // Target exception level + OperatingMode fromMode; // Source operating mode + Addr getVector(ThreadContext *tc); + Addr getVector64(ThreadContext *tc); public: - enum StatusEncoding + /// Generic fault source enums used to index into + /// {short/long/aarch64}DescFaultSources[] to get the actual encodings based + /// on the current register width state and the translation table format in + /// use + enum FaultSource { - // Fault Status register encodings - // ARM ARM B3.9.4 - AlignmentFault = 0x1, - DebugEvent = 0x2, - AccessFlag0 = 0x3, - InstructionCacheMaintenance = 0x4, - Translation0 = 0x5, - AccessFlag1 = 0x6, - Translation1 = 0x7, - SynchronousExternalAbort0 = 0x8, - Domain0 = 0x9, - SynchronousExternalAbort1 = 0x8, - Domain1 = 0xb, - TranslationTableWalkExtAbt0 = 0xc, - Permission0 = 0xd, - TranslationTableWalkExtAbt1 = 0xe, - Permission1 = 0xf, - AsynchronousExternalAbort = 0x16, - MemoryAccessAsynchronousParityError = 0x18, - MemoryAccessSynchronousParityError = 0x19, - TranslationTableWalkPrtyErr0 = 0x1c, - TranslationTableWalkPrtyErr1 = 0x1e, - - // not a real fault. This is a status code - // to allow the translation function to inform - // the memory access function not to proceed - // for a Prefetch that misses in the TLB. - PrefetchTLBMiss = 0x1f, - PrefetchUncacheable = 0x20 + AlignmentFault = 0, + InstructionCacheMaintenance, // Short-desc. format only + SynchExtAbtOnTranslTableWalkLL, + SynchPtyErrOnTranslTableWalkLL = SynchExtAbtOnTranslTableWalkLL + 4, + TranslationLL = SynchPtyErrOnTranslTableWalkLL + 4, + AccessFlagLL = TranslationLL + 4, + DomainLL = AccessFlagLL + 4, + PermissionLL = DomainLL + 4, + DebugEvent = PermissionLL + 4, + SynchronousExternalAbort, + TLBConflictAbort, // Requires LPAE + SynchPtyErrOnMemoryAccess, + AsynchronousExternalAbort, + AsynchPtyErrOnMemoryAccess, + AddressSizeLL, // AArch64 only + + // Not real faults. These are faults to allow the translation function + // to inform the memory access function not to proceed for a prefetch + // that misses in the TLB or that targets an uncacheable address + PrefetchTLBMiss = AddressSizeLL + 4, + PrefetchUncacheable, + + NumFaultSources, + FaultSourceInvalid = 0xff + }; + + /// Encodings of the fault sources when the short-desc. translation table + /// format is in use (ARM ARM Issue C B3.13.3) + static uint8_t shortDescFaultSources[NumFaultSources]; + /// Encodings of the fault sources when the long-desc. translation table + /// format is in use (ARM ARM Issue C B3.13.3) + static uint8_t longDescFaultSources[NumFaultSources]; + /// Encodings of the fault sources in AArch64 state + static uint8_t aarch64FaultSources[NumFaultSources]; + + enum AnnotationIDs + { + S1PTW, // DataAbort, PrefetchAbort: Stage 1 Page Table Walk, + OVA, // DataAbort, PrefetchAbort: stage 1 Virtual Address for stage 2 faults + SAS, // DataAbort: Syndrome Access Size + SSE, // DataAbort: Syndrome Sign Extend + SRT, // DataAbort: Syndrome Register Transfer + + // AArch64 only + SF, // DataAbort: width of the accessed register is SixtyFour + AR // DataAbort: Acquire/Release semantics + }; + + enum TranMethod + { + LpaeTran, + VmsaTran, + UnknownTran }; struct FaultVals { const FaultName name; + const FaultOffset offset; + + // Offsets used for exceptions taken in AArch64 state + const uint16_t currELTOffset; + const uint16_t currELHOffset; + const uint16_t lowerEL64Offset; + const uint16_t lowerEL32Offset; + const OperatingMode nextMode; + const uint8_t armPcOffset; const uint8_t thumbPcOffset; + // The following two values are used in place of armPcOffset and + // thumbPcOffset when the exception return address is saved into ELR + // registers (exceptions taken in HYP mode or in AArch64 state) + const uint8_t armPcElrOffset; + const uint8_t thumbPcElrOffset; + + const bool hypTrappable; const bool abortDisable; const bool fiqDisable; + + // Exception class used to appropriately set the syndrome register + // (exceptions taken in HYP mode or in AArch64 state) + const ExceptionClass ec; + FaultStat count; }; + ArmFault(ExtMachInst _machInst = 0, uint32_t _iss = 0) : + machInst(_machInst), issRaw(_iss), from64(false), to64(false) {} + + // Returns the actual syndrome register to use based on the target + // exception level + MiscRegIndex getSyndromeReg64() const; + // Returns the actual fault address register to use based on the target + // exception level + MiscRegIndex getFaultAddrReg64() const; + void invoke(ThreadContext *tc, StaticInstPtr inst = StaticInst::nullStaticInstPtr); + void invoke64(ThreadContext *tc, + StaticInstPtr inst = StaticInst::nullStaticInstPtr); + virtual void annotate(AnnotationIDs id, uint64_t val) {} virtual FaultStat& countStat() = 0; - virtual FaultOffset offset() = 0; + virtual FaultOffset offset(ThreadContext *tc) = 0; + virtual FaultOffset offset64() = 0; virtual OperatingMode nextMode() = 0; - virtual uint8_t armPcOffset() = 0; - virtual uint8_t thumbPcOffset() = 0; - virtual bool abortDisable() = 0; - virtual bool fiqDisable() = 0; + virtual bool routeToMonitor(ThreadContext *tc) const = 0; + virtual bool routeToHyp(ThreadContext *tc) const { return false; } + virtual uint8_t armPcOffset(bool isHyp) = 0; + virtual uint8_t thumbPcOffset(bool isHyp) = 0; + virtual uint8_t armPcElrOffset() = 0; + virtual uint8_t thumbPcElrOffset() = 0; + virtual bool abortDisable(ThreadContext *tc) = 0; + virtual bool fiqDisable(ThreadContext *tc) = 0; + virtual ExceptionClass ec(ThreadContext *tc) const = 0; + virtual uint32_t iss() const = 0; + virtual bool isStage2() const { return false; } + virtual FSR getFsr(ThreadContext *tc) { return 0; } + virtual void setSyndrome(ThreadContext *tc, MiscRegIndex syndrome_reg); }; template @@ -126,14 +212,38 @@ class ArmFaultVals : public ArmFault static FaultVals vals; public: + ArmFaultVals(ExtMachInst _machInst = 0, uint32_t _iss = 0) : + ArmFault(_machInst, _iss) {} FaultName name() const { return vals.name; } - FaultStat & countStat() {return vals.count;} - FaultOffset offset() { return vals.offset; } + FaultStat & countStat() { return vals.count; } + FaultOffset offset(ThreadContext *tc); + + FaultOffset + offset64() + { + if (toEL == fromEL) { + if (opModeIsT(fromMode)) + return vals.currELTOffset; + return vals.currELHOffset; + } else { + if (from64) + return vals.lowerEL64Offset; + return vals.lowerEL32Offset; + } + } + OperatingMode nextMode() { return vals.nextMode; } - uint8_t armPcOffset() { return vals.armPcOffset; } - uint8_t thumbPcOffset() { return vals.thumbPcOffset; } - bool abortDisable() { return vals.abortDisable; } - bool fiqDisable() { return vals.fiqDisable; } + virtual bool routeToMonitor(ThreadContext *tc) const { return false; } + uint8_t armPcOffset(bool isHyp) { return isHyp ? vals.armPcElrOffset + : vals.armPcOffset; } + uint8_t thumbPcOffset(bool isHyp) { return isHyp ? vals.thumbPcElrOffset + : vals.thumbPcOffset; } + uint8_t armPcElrOffset() { return vals.armPcElrOffset; } + uint8_t thumbPcElrOffset() { return vals.thumbPcElrOffset; } + virtual bool abortDisable(ThreadContext* tc) { return vals.abortDisable; } + virtual bool fiqDisable(ThreadContext* tc) { return vals.fiqDisable; } + virtual ExceptionClass ec(ThreadContext *tc) const { return vals.ec; } + virtual uint32_t iss() const { return issRaw; } }; class Reset : public ArmFaultVals @@ -146,87 +256,283 @@ class Reset : public ArmFaultVals class UndefinedInstruction : public ArmFaultVals { protected: - ExtMachInst machInst; bool unknown; const char *mnemonic; bool disabled; + ExceptionClass overrideEc; public: UndefinedInstruction(ExtMachInst _machInst, bool _unknown, const char *_mnemonic = NULL, bool _disabled = false) : - machInst(_machInst), unknown(_unknown), - mnemonic(_mnemonic), disabled(_disabled) - { - } - UndefinedInstruction() : - machInst(0), unknown(false), mnemonic("undefined"), disabled(false) + ArmFaultVals(_machInst), + unknown(_unknown), mnemonic(_mnemonic), disabled(_disabled), + overrideEc(EC_INVALID) + {} + UndefinedInstruction(ExtMachInst _machInst, uint32_t _iss, ExceptionClass _overrideEc) : + ArmFaultVals(_machInst, _iss), + overrideEc(_overrideEc) {} void invoke(ThreadContext *tc, StaticInstPtr inst = StaticInst::nullStaticInstPtr); + bool routeToHyp(ThreadContext *tc) const; + ExceptionClass ec(ThreadContext *tc) const; + uint32_t iss() const; }; class SupervisorCall : public ArmFaultVals { protected: - ExtMachInst machInst; - + ExceptionClass overrideEc; public: - SupervisorCall(ExtMachInst _machInst) : machInst(_machInst) + SupervisorCall(ExtMachInst _machInst, uint32_t _iss, + ExceptionClass _overrideEc = EC_INVALID) : + ArmFaultVals(_machInst, _iss), + overrideEc(_overrideEc) {} - SupervisorCall() : machInst(0) + + void invoke(ThreadContext *tc, + StaticInstPtr inst = StaticInst::nullStaticInstPtr); + bool routeToHyp(ThreadContext *tc) const; + ExceptionClass ec(ThreadContext *tc) const; + uint32_t iss() const; +}; + +class SecureMonitorCall : public ArmFaultVals +{ + public: + SecureMonitorCall(ExtMachInst _machInst) : + ArmFaultVals(_machInst) {} void invoke(ThreadContext *tc, StaticInstPtr inst = StaticInst::nullStaticInstPtr); + ExceptionClass ec(ThreadContext *tc) const; + uint32_t iss() const; +}; + +class SupervisorTrap : public ArmFaultVals +{ + protected: + ExtMachInst machInst; + ExceptionClass overrideEc; + + public: + SupervisorTrap(ExtMachInst _machInst, uint32_t _iss, + ExceptionClass _overrideEc = EC_INVALID) : + ArmFaultVals(_machInst, _iss), + overrideEc(_overrideEc) + {} + + ExceptionClass ec(ThreadContext *tc) const; +}; + +class SecureMonitorTrap : public ArmFaultVals +{ + protected: + ExtMachInst machInst; + ExceptionClass overrideEc; + + public: + SecureMonitorTrap(ExtMachInst _machInst, uint32_t _iss, + ExceptionClass _overrideEc = EC_INVALID) : + ArmFaultVals(_machInst, _iss), + overrideEc(_overrideEc) + {} + + ExceptionClass ec(ThreadContext *tc) const; +}; + +class HypervisorCall : public ArmFaultVals +{ + public: + HypervisorCall(ExtMachInst _machInst, uint32_t _imm); +}; + +class HypervisorTrap : public ArmFaultVals +{ + protected: + ExtMachInst machInst; + ExceptionClass overrideEc; + + public: + HypervisorTrap(ExtMachInst _machInst, uint32_t _iss, + ExceptionClass _overrideEc = EC_INVALID) : + ArmFaultVals(_machInst, _iss), + overrideEc(_overrideEc) + {} + + ExceptionClass ec(ThreadContext *tc) const; }; template class AbortFault : public ArmFaultVals { protected: + /** + * The virtual address the fault occured at. If 2 stages of + * translation are being used then this is the intermediate + * physical address that is the starting point for the second + * stage of translation. + */ Addr faultAddr; + /** + * Original virtual address. If the fault was generated on the + * second stage of translation then this variable stores the + * virtual address used in the original stage 1 translation. + */ + Addr OVAddr; bool write; - uint8_t domain; - uint8_t status; + TlbEntry::DomainType domain; + uint8_t source; + uint8_t srcEncoded; + bool stage2; + bool s1ptw; + ArmFault::TranMethod tranMethod; public: - AbortFault(Addr _faultAddr, bool _write, - uint8_t _domain, uint8_t _status) : - faultAddr(_faultAddr), write(_write), - domain(_domain), status(_status) + AbortFault(Addr _faultAddr, bool _write, TlbEntry::DomainType _domain, uint8_t _source, + bool _stage2, ArmFault::TranMethod _tranMethod = ArmFault::UnknownTran) : + faultAddr(_faultAddr), write(_write), domain(_domain), source(_source), + stage2(_stage2), s1ptw(false), tranMethod(_tranMethod) {} void invoke(ThreadContext *tc, StaticInstPtr inst = StaticInst::nullStaticInstPtr); + + FSR getFsr(ThreadContext *tc); + bool abortDisable(ThreadContext *tc); + uint32_t iss() const; + bool isStage2() const { return stage2; } + void annotate(ArmFault::AnnotationIDs id, uint64_t val); + bool isMMUFault() const; }; class PrefetchAbort : public AbortFault { public: - static const MiscRegIndex FsrIndex = MISCREG_IFSR; - static const MiscRegIndex FarIndex = MISCREG_IFAR; + static const MiscRegIndex FsrIndex = MISCREG_IFSR; + static const MiscRegIndex FarIndex = MISCREG_IFAR; + static const MiscRegIndex HFarIndex = MISCREG_HIFAR; - PrefetchAbort(Addr _addr, uint8_t _status) : - AbortFault(_addr, false, 0, _status) + PrefetchAbort(Addr _addr, uint8_t _source, bool _stage2 = false, + ArmFault::TranMethod _tranMethod = ArmFault::UnknownTran) : + AbortFault(_addr, false, TlbEntry::DomainType::NoAccess, + _source, _stage2, _tranMethod) {} + + ExceptionClass ec(ThreadContext *tc) const; + // @todo: external aborts should be routed if SCR.EA == 1 + bool routeToMonitor(ThreadContext *tc) const; + bool routeToHyp(ThreadContext *tc) const; }; class DataAbort : public AbortFault { public: - static const MiscRegIndex FsrIndex = MISCREG_DFSR; - static const MiscRegIndex FarIndex = MISCREG_DFAR; + static const MiscRegIndex FsrIndex = MISCREG_DFSR; + static const MiscRegIndex FarIndex = MISCREG_DFAR; + static const MiscRegIndex HFarIndex = MISCREG_HDFAR; + bool isv; + uint8_t sas; + uint8_t sse; + uint8_t srt; + + // AArch64 only + bool sf; + bool ar; + + DataAbort(Addr _addr, TlbEntry::DomainType _domain, bool _write, uint8_t _source, + bool _stage2 = false, ArmFault::TranMethod _tranMethod = ArmFault::UnknownTran) : + AbortFault(_addr, _write, _domain, _source, _stage2, + _tranMethod), + isv(false), sas (0), sse(0), srt(0), sf(false), ar(false) + {} + + ExceptionClass ec(ThreadContext *tc) const; + // @todo: external aborts should be routed if SCR.EA == 1 + bool routeToMonitor(ThreadContext *tc) const; + bool routeToHyp(ThreadContext *tc) const; + uint32_t iss() const; + void annotate(AnnotationIDs id, uint64_t val); +}; + +class VirtualDataAbort : public AbortFault +{ + public: + static const MiscRegIndex FsrIndex = MISCREG_DFSR; + static const MiscRegIndex FarIndex = MISCREG_DFAR; + static const MiscRegIndex HFarIndex = MISCREG_HDFAR; - DataAbort(Addr _addr, uint8_t _domain, bool _write, uint8_t _status) : - AbortFault(_addr, _write, _domain, _status) + VirtualDataAbort(Addr _addr, TlbEntry::DomainType _domain, bool _write, + uint8_t _source) : + AbortFault(_addr, _write, _domain, _source, false) {} + + void invoke(ThreadContext *tc, StaticInstPtr inst); }; -class Interrupt : public ArmFaultVals {}; -class FastInterrupt : public ArmFaultVals {}; +class Interrupt : public ArmFaultVals +{ + public: + bool routeToMonitor(ThreadContext *tc) const; + bool routeToHyp(ThreadContext *tc) const; + bool abortDisable(ThreadContext *tc); +}; + +class VirtualInterrupt : public ArmFaultVals +{ + public: + VirtualInterrupt(); +}; + +class FastInterrupt : public ArmFaultVals +{ + public: + bool routeToMonitor(ThreadContext *tc) const; + bool routeToHyp(ThreadContext *tc) const; + bool abortDisable(ThreadContext *tc); + bool fiqDisable(ThreadContext *tc); +}; + +class VirtualFastInterrupt : public ArmFaultVals +{ + public: + VirtualFastInterrupt(); +}; + +/// PC alignment fault (AArch64 only) +class PCAlignmentFault : public ArmFaultVals +{ + protected: + /// The unaligned value of the PC + Addr faultPC; + public: + PCAlignmentFault(Addr _faultPC) : faultPC(_faultPC) + {} + void invoke(ThreadContext *tc, + StaticInstPtr inst = StaticInst::nullStaticInstPtr); +}; + +/// Stack pointer alignment fault (AArch64 only) +class SPAlignmentFault : public ArmFaultVals +{ + public: + SPAlignmentFault(); +}; + +/// System error (AArch64 only) +class SystemError : public ArmFaultVals +{ + public: + SystemError(); + void invoke(ThreadContext *tc, + StaticInstPtr inst = StaticInst::nullStaticInstPtr); + bool routeToMonitor(ThreadContext *tc) const; + bool routeToHyp(ThreadContext *tc) const; +}; // A fault that flushes the pipe, excluding the faulting instructions class FlushPipe : public ArmFaultVals @@ -246,6 +552,13 @@ class ArmSev : public ArmFaultVals StaticInstPtr inst = StaticInst::nullStaticInstPtr); }; +/// Illegal Instruction Set State fault (AArch64 only) +class IllegalInstSetStateFault : public ArmFaultVals +{ + public: + IllegalInstSetStateFault(); +}; + } // namespace ArmISA #endif // __ARM_FAULTS_HH__ diff --git a/src/arch/arm/insts/branch64.cc b/src/arch/arm/insts/branch64.cc new file mode 100644 index 000000000..49ba3402a --- /dev/null +++ b/src/arch/arm/insts/branch64.cc @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/arm/insts/branch64.hh" + +namespace ArmISA +{ + +ArmISA::PCState +BranchImm64::branchTarget(const ArmISA::PCState &branchPC) const +{ + ArmISA::PCState pcs = branchPC; + pcs.instNPC(pcs.pc() + imm); + pcs.advance(); + return pcs; +} + +ArmISA::PCState +BranchImmReg64::branchTarget(const ArmISA::PCState &branchPC) const +{ + ArmISA::PCState pcs = branchPC; + pcs.instNPC(pcs.pc() + imm); + pcs.advance(); + return pcs; +} + +ArmISA::PCState +BranchImmImmReg64::branchTarget(const ArmISA::PCState &branchPC) const +{ + ArmISA::PCState pcs = branchPC; + pcs.instNPC(pcs.pc() + imm2); + pcs.advance(); + return pcs; +} + +std::string +BranchImmCond64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false, true, condCode); + printTarget(ss, pc + imm, symtab); + return ss.str(); +} + +std::string +BranchImm64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printTarget(ss, pc + imm, symtab); + return ss.str(); +} + +std::string +BranchReg64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + return ss.str(); +} + +std::string +BranchRet64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + if (op1 != INTREG_X30) + printReg(ss, op1); + return ss.str(); +} + +std::string +BranchEret64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + return ss.str(); +} + +std::string +BranchImmReg64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", "); + printTarget(ss, pc + imm, symtab); + return ss.str(); +} + +std::string +BranchImmImmReg64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", #%#x, ", imm1); + printTarget(ss, pc + imm2, symtab); + return ss.str(); +} + +} // namespace ArmISA diff --git a/src/arch/arm/insts/branch64.hh b/src/arch/arm/insts/branch64.hh new file mode 100644 index 000000000..48881e0c2 --- /dev/null +++ b/src/arch/arm/insts/branch64.hh @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ +#ifndef __ARCH_ARM_INSTS_BRANCH64_HH__ +#define __ARCH_ARM_INSTS_BRANCH64_HH__ + +#include "arch/arm/insts/static_inst.hh" + +namespace ArmISA +{ +// Branch to a target computed with an immediate +class BranchImm64 : public ArmStaticInst +{ + protected: + int64_t imm; + + public: + BranchImm64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + int64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), imm(_imm) + {} + + ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const; + + /// Explicitly import the otherwise hidden branchTarget + using StaticInst::branchTarget; + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Conditionally Branch to a target computed with an immediate +class BranchImmCond64 : public BranchImm64 +{ + protected: + ConditionCode condCode; + + public: + BranchImmCond64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + int64_t _imm, ConditionCode _condCode) : + BranchImm64(mnem, _machInst, __opClass, _imm), condCode(_condCode) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Branch to a target computed with a register +class BranchReg64 : public ArmStaticInst +{ + protected: + IntRegIndex op1; + + public: + BranchReg64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), op1(_op1) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Ret instruction +class BranchRet64 : public BranchReg64 +{ + public: + BranchRet64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _op1) : + BranchReg64(mnem, _machInst, __opClass, _op1) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Eret instruction +class BranchEret64 : public ArmStaticInst +{ + public: + BranchEret64(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : + ArmStaticInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Branch to a target computed with an immediate and a register +class BranchImmReg64 : public ArmStaticInst +{ + protected: + int64_t imm; + IntRegIndex op1; + + public: + BranchImmReg64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + int64_t _imm, IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), imm(_imm), op1(_op1) + {} + + ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const; + + /// Explicitly import the otherwise hidden branchTarget + using StaticInst::branchTarget; + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Branch to a target computed with two immediates +class BranchImmImmReg64 : public ArmStaticInst +{ + protected: + int64_t imm1; + int64_t imm2; + IntRegIndex op1; + + public: + BranchImmImmReg64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, int64_t _imm1, int64_t _imm2, + IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), + imm1(_imm1), imm2(_imm2), op1(_op1) + {} + + ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const; + + /// Explicitly import the otherwise hidden branchTarget + using StaticInst::branchTarget; + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +} + +#endif //__ARCH_ARM_INSTS_BRANCH_HH__ diff --git a/src/arch/arm/insts/data64.cc b/src/arch/arm/insts/data64.cc new file mode 100644 index 000000000..f65219870 --- /dev/null +++ b/src/arch/arm/insts/data64.cc @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/arm/insts/data64.hh" + +namespace ArmISA +{ + +std::string +DataXImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printDataInst(ss, true, false, /*XXX not really s*/ false, dest, op1, + INTREG_ZERO, INTREG_ZERO, 0, LSL, imm); + return ss.str(); +} + +std::string +DataXImmOnlyOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", #%d", imm); + return ss.str(); +} + +std::string +DataXSRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printDataInst(ss, false, true, /*XXX not really s*/ false, dest, op1, + op2, INTREG_ZERO, shiftAmt, shiftType, 0); + return ss.str(); +} + +std::string +DataXERegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printDataInst(ss, false, true, /*XXX not really s*/ false, dest, op1, + op2, INTREG_ZERO, shiftAmt, LSL, 0); + return ss.str(); +} + +std::string +DataX1RegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + return ss.str(); +} + +std::string +DataX1RegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", #%d", imm); + return ss.str(); +} + +std::string +DataX1Reg2ImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", #%d, #%d", imm1, imm2); + return ss.str(); +} + +std::string +DataX2RegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + return ss.str(); +} + +std::string +DataX2RegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", #%d", imm); + return ss.str(); +} + +std::string +DataX3RegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", "); + printReg(ss, op3); + return ss.str(); +} + +std::string +DataXCondCompImmOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", #%d, #%d", imm, defCc); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +std::string +DataXCondCompRegOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", #%d", defCc); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +std::string +DataXCondSelOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +} diff --git a/src/arch/arm/insts/data64.hh b/src/arch/arm/insts/data64.hh new file mode 100644 index 000000000..8c0677b3d --- /dev/null +++ b/src/arch/arm/insts/data64.hh @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ +#ifndef __ARCH_ARM_INSTS_DATA64_HH__ +#define __ARCH_ARM_INSTS_DATA64_HH__ + +#include "arch/arm/insts/static_inst.hh" +#include "base/trace.hh" + +namespace ArmISA +{ + +class DataXImmOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1; + uint64_t imm; + + DataXImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXImmOnlyOp : public ArmStaticInst +{ + protected: + IntRegIndex dest; + uint64_t imm; + + DataXImmOnlyOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXSRegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + int32_t shiftAmt; + ArmShiftType shiftType; + + DataXSRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + int32_t _shiftAmt, ArmShiftType _shiftType) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), + shiftAmt(_shiftAmt), shiftType(_shiftType) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXERegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + ArmExtendType extendType; + int32_t shiftAmt; + + DataXERegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + ArmExtendType _extendType, int32_t _shiftAmt) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), + extendType(_extendType), shiftAmt(_shiftAmt) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX1RegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1; + + DataX1RegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), dest(_dest), op1(_op1) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX1RegImmOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1; + uint64_t imm; + + DataX1RegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), dest(_dest), op1(_op1), + imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX1Reg2ImmOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1; + uint64_t imm1, imm2; + + DataX1Reg2ImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm1, + uint64_t _imm2) : + ArmStaticInst(mnem, _machInst, __opClass), dest(_dest), op1(_op1), + imm1(_imm1), imm2(_imm2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX2RegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + + DataX2RegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX2RegImmOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + uint64_t imm; + + DataX2RegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX3RegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2, op3; + + DataX3RegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _op3) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), op3(_op3) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXCondCompImmOp : public ArmStaticInst +{ + protected: + IntRegIndex op1; + uint64_t imm; + ConditionCode condCode; + uint8_t defCc; + + DataXCondCompImmOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _op1, uint64_t _imm, + ConditionCode _condCode, uint8_t _defCc) : + ArmStaticInst(mnem, _machInst, __opClass), + op1(_op1), imm(_imm), condCode(_condCode), defCc(_defCc) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXCondCompRegOp : public ArmStaticInst +{ + protected: + IntRegIndex op1, op2; + ConditionCode condCode; + uint8_t defCc; + + DataXCondCompRegOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode, uint8_t _defCc) : + ArmStaticInst(mnem, _machInst, __opClass), + op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXCondSelOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + ConditionCode condCode; + + DataXCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), condCode(_condCode) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +} + +#endif //__ARCH_ARM_INSTS_PREDINST_HH__ diff --git a/src/arch/arm/insts/fplib.cc b/src/arch/arm/insts/fplib.cc new file mode 100644 index 000000000..1f44eed09 --- /dev/null +++ b/src/arch/arm/insts/fplib.cc @@ -0,0 +1,3086 @@ +/* +* Copyright (c) 2012-2013 ARM Limited +* All rights reserved +* +* The license below extends only to copyright in the software and shall +* not be construed as granting a license to any other intellectual +* property including but not limited to intellectual property relating +* to a hardware implementation of the functionality of the software +* licensed hereunder. You may use the software subject to the license +* terms below provided that you ensure that this notice is replicated +* unmodified and in its entirety in all distributions of the software, +* modified or unmodified, in source code or in binary form. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer; +* redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution; +* neither the name of the copyright holders nor the names of its +* contributors may be used to endorse or promote products derived from +* this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +* Authors: Edmund Grimley Evans +* Thomas Grocutt +*/ + +#include + +#include + +#include "fplib.hh" + +namespace ArmISA +{ + +#define FPLIB_RN 0 +#define FPLIB_RP 1 +#define FPLIB_RM 2 +#define FPLIB_RZ 3 +#define FPLIB_FZ 4 +#define FPLIB_DN 8 +#define FPLIB_AHP 16 + +#define FPLIB_IDC 128 // Input Denormal +#define FPLIB_IXC 16 // Inexact +#define FPLIB_UFC 8 // Underflow +#define FPLIB_OFC 4 // Overflow +#define FPLIB_DZC 2 // Division by Zero +#define FPLIB_IOC 1 // Invalid Operation + +static inline uint16_t +lsl16(uint16_t x, uint32_t shift) +{ + return shift < 16 ? x << shift : 0; +} + +static inline uint16_t +lsr16(uint16_t x, uint32_t shift) +{ + return shift < 16 ? x >> shift : 0; +} + +static inline uint32_t +lsl32(uint32_t x, uint32_t shift) +{ + return shift < 32 ? x << shift : 0; +} + +static inline uint32_t +lsr32(uint32_t x, uint32_t shift) +{ + return shift < 32 ? x >> shift : 0; +} + +static inline uint64_t +lsl64(uint64_t x, uint32_t shift) +{ + return shift < 64 ? x << shift : 0; +} + +static inline uint64_t +lsr64(uint64_t x, uint32_t shift) +{ + return shift < 64 ? x >> shift : 0; +} + +static inline void +lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) +{ + if (shift < 64) { + *r1 = x1 << shift | x0 >> (64 - shift); + *r0 = x0 << shift; + } else if (shift < 128) { + *r1 = x0 << (shift - 64); + *r0 = 0; + } else { + *r1 = 0; + *r0 = 0; + } +} + +static inline void +lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) +{ + if (shift < 64) { + *r0 = x0 >> shift | x1 << (64 - shift); + *r1 = x1 >> shift; + } else if (shift < 128) { + *r0 = x1 >> (shift - 64); + *r1 = 0; + } else { + *r0 = 0; + *r1 = 0; + } +} + +static inline void +mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) +{ + uint32_t mask = ((uint32_t)1 << 31) - 1; + uint64_t a0 = a & mask; + uint64_t a1 = a >> 31 & mask; + uint64_t b0 = b & mask; + uint64_t b1 = b >> 31 & mask; + uint64_t p0 = a0 * b0; + uint64_t p2 = a1 * b1; + uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2; + uint64_t s0 = p0; + uint64_t s1 = (s0 >> 31) + p1; + uint64_t s2 = (s1 >> 31) + p2; + *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62; + *x1 = s2 >> 2; +} + +static inline +void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b) +{ + uint64_t t0 = (uint64_t)(uint32_t)a * b; + uint64_t t1 = (t0 >> 32) + (a >> 32) * b; + *x0 = t1 << 32 | (uint32_t)t0; + *x1 = t1 >> 32; +} + +static inline void +mul64x64(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) +{ + uint64_t a0 = (uint32_t)a; + uint64_t a1 = a >> 32; + uint64_t b0 = (uint32_t)b; + uint64_t b1 = b >> 32; + uint64_t t1 = (a0 * b0 >> 32) + a1 * b0; + uint64_t t2 = a0 * b1; + uint64_t x = ((uint64_t)(uint32_t)t1 + (uint32_t)t2) >> 32; + x += t1 >> 32; + x += t2 >> 32; + x += a1 * b1; + *x0 = a * b; + *x1 = x; +} + +static inline void +add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, + uint64_t b1) +{ + *x0 = a0 + b0; + *x1 = a1 + b1 + (*x0 < a0); +} + +static inline void +sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, + uint64_t b1) +{ + *x0 = a0 - b0; + *x1 = a1 - b1 - (*x0 > a0); +} + +static inline int +cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) +{ + return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0); +} + +static inline uint16_t +fp16_normalise(uint16_t mnt, int *exp) +{ + int shift; + + if (!mnt) { + return 0; + } + + for (shift = 8; shift; shift >>= 1) { + if (!(mnt >> (16 - shift))) { + mnt <<= shift; + *exp -= shift; + } + } + return mnt; +} + +static inline uint32_t +fp32_normalise(uint32_t mnt, int *exp) +{ + int shift; + + if (!mnt) { + return 0; + } + + for (shift = 16; shift; shift >>= 1) { + if (!(mnt >> (32 - shift))) { + mnt <<= shift; + *exp -= shift; + } + } + return mnt; +} + +static inline uint64_t +fp64_normalise(uint64_t mnt, int *exp) +{ + int shift; + + if (!mnt) { + return 0; + } + + for (shift = 32; shift; shift >>= 1) { + if (!(mnt >> (64 - shift))) { + mnt <<= shift; + *exp -= shift; + } + } + return mnt; +} + +static inline void +fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp) +{ + uint64_t x0 = *mnt0; + uint64_t x1 = *mnt1; + int shift; + + if (!x0 && !x1) { + return; + } + + if (!x1) { + x1 = x0; + x0 = 0; + *exp -= 64; + } + + for (shift = 32; shift; shift >>= 1) { + if (!(x1 >> (64 - shift))) { + x1 = x1 << shift | x0 >> (64 - shift); + x0 <<= shift; + *exp -= shift; + } + } + + *mnt0 = x0; + *mnt1 = x1; +} + +static inline uint16_t +fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt) +{ + return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1)); +} + +static inline uint32_t +fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt) +{ + return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1)); +} + +static inline uint64_t +fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt) +{ + return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1)); +} + +static inline uint16_t +fp16_zero(int sgn) +{ + return fp16_pack(sgn, 0, 0); +} + +static inline uint32_t +fp32_zero(int sgn) +{ + return fp32_pack(sgn, 0, 0); +} + +static inline uint64_t +fp64_zero(int sgn) +{ + return fp64_pack(sgn, 0, 0); +} + +static inline uint16_t +fp16_max_normal(int sgn) +{ + return fp16_pack(sgn, 30, -1); +} + +static inline uint32_t +fp32_max_normal(int sgn) +{ + return fp32_pack(sgn, 254, -1); +} + +static inline uint64_t +fp64_max_normal(int sgn) +{ + return fp64_pack(sgn, 2046, -1); +} + +static inline uint16_t +fp16_infinity(int sgn) +{ + return fp16_pack(sgn, 31, 0); +} + +static inline uint32_t +fp32_infinity(int sgn) +{ + return fp32_pack(sgn, 255, 0); +} + +static inline uint64_t +fp64_infinity(int sgn) +{ + return fp64_pack(sgn, 2047, 0); +} + +static inline uint16_t +fp16_defaultNaN() +{ + return fp16_pack(0, 31, (uint16_t)1 << 9); +} + +static inline uint32_t +fp32_defaultNaN() +{ + return fp32_pack(0, 255, (uint32_t)1 << 22); +} + +static inline uint64_t +fp64_defaultNaN() +{ + return fp64_pack(0, 2047, (uint64_t)1 << 51); +} + +static inline void +fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, + int *flags) +{ + *sgn = x >> 15; + *exp = x >> 10 & 31; + *mnt = x & (((uint16_t)1 << 10) - 1); + + // Handle subnormals: + if (*exp) { + *mnt |= (uint16_t)1 << 10; + } else { + ++*exp; + // There is no flush to zero in this case! + } +} + +static inline void +fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, + int *flags) +{ + *sgn = x >> 31; + *exp = x >> 23 & 255; + *mnt = x & (((uint32_t)1 << 23) - 1); + + // Handle subnormals: + if (*exp) { + *mnt |= (uint32_t)1 << 23; + } else { + ++*exp; + if ((mode & FPLIB_FZ) && *mnt) { + *flags |= FPLIB_IDC; + *mnt = 0; + } + } +} + +static inline void +fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, + int *flags) +{ + *sgn = x >> 63; + *exp = x >> 52 & 2047; + *mnt = x & (((uint64_t)1 << 52) - 1); + + // Handle subnormals: + if (*exp) { + *mnt |= (uint64_t)1 << 52; + } else { + ++*exp; + if ((mode & FPLIB_FZ) && *mnt) { + *flags |= FPLIB_IDC; + *mnt = 0; + } + } +} + +static inline uint32_t +fp32_process_NaN(uint32_t a, int mode, int *flags) +{ + if (!(a >> 22 & 1)) { + *flags |= FPLIB_IOC; + a |= (uint32_t)1 << 22; + } + return mode & FPLIB_DN ? fp32_defaultNaN() : a; +} + +static inline uint64_t +fp64_process_NaN(uint64_t a, int mode, int *flags) +{ + if (!(a >> 51 & 1)) { + *flags |= FPLIB_IOC; + a |= (uint64_t)1 << 51; + } + return mode & FPLIB_DN ? fp64_defaultNaN() : a; +} + +static uint32_t +fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_exp = a >> 23 & 255; + uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1); + int b_exp = b >> 23 & 255; + uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1); + + // Handle signalling NaNs: + if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1)) + return fp32_process_NaN(a, mode, flags); + if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1)) + return fp32_process_NaN(b, mode, flags); + + // Handle quiet NaNs: + if (a_exp == 255 && a_mnt) + return fp32_process_NaN(a, mode, flags); + if (b_exp == 255 && b_mnt) + return fp32_process_NaN(b, mode, flags); + + return 0; +} + +static uint64_t +fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_exp = a >> 52 & 2047; + uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1); + int b_exp = b >> 52 & 2047; + uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1); + + // Handle signalling NaNs: + if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1)) + return fp64_process_NaN(a, mode, flags); + if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1)) + return fp64_process_NaN(b, mode, flags); + + // Handle quiet NaNs: + if (a_exp == 2047 && a_mnt) + return fp64_process_NaN(a, mode, flags); + if (b_exp == 2047 && b_mnt) + return fp64_process_NaN(b, mode, flags); + + return 0; +} + +static uint32_t +fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags) +{ + int a_exp = a >> 23 & 255; + uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1); + int b_exp = b >> 23 & 255; + uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1); + int c_exp = c >> 23 & 255; + uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1); + + // Handle signalling NaNs: + if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1)) + return fp32_process_NaN(a, mode, flags); + if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1)) + return fp32_process_NaN(b, mode, flags); + if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1)) + return fp32_process_NaN(c, mode, flags); + + // Handle quiet NaNs: + if (a_exp == 255 && a_mnt) + return fp32_process_NaN(a, mode, flags); + if (b_exp == 255 && b_mnt) + return fp32_process_NaN(b, mode, flags); + if (c_exp == 255 && c_mnt) + return fp32_process_NaN(c, mode, flags); + + return 0; +} + +static uint64_t +fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags) +{ + int a_exp = a >> 52 & 2047; + uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1); + int b_exp = b >> 52 & 2047; + uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1); + int c_exp = c >> 52 & 2047; + uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1); + + // Handle signalling NaNs: + if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1)) + return fp64_process_NaN(a, mode, flags); + if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1)) + return fp64_process_NaN(b, mode, flags); + if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1)) + return fp64_process_NaN(c, mode, flags); + + // Handle quiet NaNs: + if (a_exp == 2047 && a_mnt) + return fp64_process_NaN(a, mode, flags); + if (b_exp == 2047 && b_mnt) + return fp64_process_NaN(b, mode, flags); + if (c_exp == 2047 && c_mnt) + return fp64_process_NaN(c, mode, flags); + + return 0; +} + +static uint16_t +fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags) +{ + int biased_exp; // non-negative exponent value for result + uint16_t int_mant; // mantissa for result, less than (1 << 11) + int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 + + assert(rm != FPRounding_TIEAWAY); + + // There is no flush to zero in this case! + + // The bottom 5 bits of mnt are orred together: + mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0); + + if (exp > 0) { + biased_exp = exp; + int_mant = mnt >> 2; + error = mnt & 3; + } else { + biased_exp = 0; + int_mant = lsr16(mnt, 3 - exp); + error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1)); + } + + if (!biased_exp && error) { // xx should also check fpscr_val<11> + *flags |= FPLIB_UFC; + } + + // Round up: + if ((rm == FPLIB_RN && (error == 3 || + (error == 2 && (int_mant & 1)))) || + (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { + ++int_mant; + if (int_mant == (uint32_t)1 << 10) { + // Rounded up from denormalized to normalized + biased_exp = 1; + } + if (int_mant == (uint32_t)1 << 11) { + // Rounded up to next exponent + ++biased_exp; + int_mant >>= 1; + } + } + + // Handle rounding to odd aka Von Neumann rounding: + if (error && rm == FPRounding_ODD) + int_mant |= 1; + + // Handle overflow: + if (!(mode & FPLIB_AHP)) { + if (biased_exp >= 31) { + *flags |= FPLIB_OFC | FPLIB_IXC; + if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || + (rm == FPLIB_RM && sgn)) { + return fp16_infinity(sgn); + } else { + return fp16_max_normal(sgn); + } + } + } else { + if (biased_exp >= 32) { + *flags |= FPLIB_IOC; + return fp16_pack(sgn, 31, -1); + } + } + + if (error) { + *flags |= FPLIB_IXC; + } + + return fp16_pack(sgn, biased_exp, int_mant); +} + +static uint32_t +fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags) +{ + int biased_exp; // non-negative exponent value for result + uint32_t int_mant; // mantissa for result, less than (1 << 24) + int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 + + assert(rm != FPRounding_TIEAWAY); + + // Flush to zero: + if ((mode & FPLIB_FZ) && exp < 1) { + *flags |= FPLIB_UFC; + return fp32_zero(sgn); + } + + // The bottom 8 bits of mnt are orred together: + mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0); + + if (exp > 0) { + biased_exp = exp; + int_mant = mnt >> 2; + error = mnt & 3; + } else { + biased_exp = 0; + int_mant = lsr32(mnt, 3 - exp); + error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1)); + } + + if (!biased_exp && error) { // xx should also check fpscr_val<11> + *flags |= FPLIB_UFC; + } + + // Round up: + if ((rm == FPLIB_RN && (error == 3 || + (error == 2 && (int_mant & 1)))) || + (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { + ++int_mant; + if (int_mant == (uint32_t)1 << 23) { + // Rounded up from denormalized to normalized + biased_exp = 1; + } + if (int_mant == (uint32_t)1 << 24) { + // Rounded up to next exponent + ++biased_exp; + int_mant >>= 1; + } + } + + // Handle rounding to odd aka Von Neumann rounding: + if (error && rm == FPRounding_ODD) + int_mant |= 1; + + // Handle overflow: + if (biased_exp >= 255) { + *flags |= FPLIB_OFC | FPLIB_IXC; + if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || + (rm == FPLIB_RM && sgn)) { + return fp32_infinity(sgn); + } else { + return fp32_max_normal(sgn); + } + } + + if (error) { + *flags |= FPLIB_IXC; + } + + return fp32_pack(sgn, biased_exp, int_mant); +} + +static uint32_t +fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags) +{ + return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags); +} + +static uint64_t +fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags) +{ + int biased_exp; // non-negative exponent value for result + uint64_t int_mant; // mantissa for result, less than (1 << 52) + int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 + + assert(rm != FPRounding_TIEAWAY); + + // Flush to zero: + if ((mode & FPLIB_FZ) && exp < 1) { + *flags |= FPLIB_UFC; + return fp64_zero(sgn); + } + + // The bottom 11 bits of mnt are orred together: + mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0); + + if (exp > 0) { + biased_exp = exp; + int_mant = mnt >> 2; + error = mnt & 3; + } else { + biased_exp = 0; + int_mant = lsr64(mnt, 3 - exp); + error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1)); + } + + if (!biased_exp && error) { // xx should also check fpscr_val<11> + *flags |= FPLIB_UFC; + } + + // Round up: + if ((rm == FPLIB_RN && (error == 3 || + (error == 2 && (int_mant & 1)))) || + (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { + ++int_mant; + if (int_mant == (uint64_t)1 << 52) { + // Rounded up from denormalized to normalized + biased_exp = 1; + } + if (int_mant == (uint64_t)1 << 53) { + // Rounded up to next exponent + ++biased_exp; + int_mant >>= 1; + } + } + + // Handle rounding to odd aka Von Neumann rounding: + if (error && rm == FPRounding_ODD) + int_mant |= 1; + + // Handle overflow: + if (biased_exp >= 2047) { + *flags |= FPLIB_OFC | FPLIB_IXC; + if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || + (rm == FPLIB_RM && sgn)) { + return fp64_infinity(sgn); + } else { + return fp64_max_normal(sgn); + } + } + + if (error) { + *flags |= FPLIB_IXC; + } + + return fp64_pack(sgn, biased_exp, int_mant); +} + +static uint64_t +fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags) +{ + return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags); +} + +static int +fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint32_t a_mnt, b_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || + (b_exp == 255 && (uint32_t)(b_mnt << 9))) { + if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) || + (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1))) + *flags |= FPLIB_IOC; + return 0; + } + return a == b || (!a_mnt && !b_mnt); +} + +static int +fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint32_t a_mnt, b_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || + (b_exp == 255 && (uint32_t)(b_mnt << 9))) { + *flags |= FPLIB_IOC; + return 0; + } + if (!a_mnt && !b_mnt) + return 1; + if (a_sgn != b_sgn) + return b_sgn; + if (a_exp != b_exp) + return a_sgn ^ (a_exp > b_exp); + if (a_mnt != b_mnt) + return a_sgn ^ (a_mnt > b_mnt); + return 1; +} + +static int +fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint32_t a_mnt, b_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || + (b_exp == 255 && (uint32_t)(b_mnt << 9))) { + *flags |= FPLIB_IOC; + return 0; + } + if (!a_mnt && !b_mnt) + return 0; + if (a_sgn != b_sgn) + return b_sgn; + if (a_exp != b_exp) + return a_sgn ^ (a_exp > b_exp); + if (a_mnt != b_mnt) + return a_sgn ^ (a_mnt > b_mnt); + return 0; +} + +static int +fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint64_t a_mnt, b_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || + (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { + if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) || + (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1))) + *flags |= FPLIB_IOC; + return 0; + } + return a == b || (!a_mnt && !b_mnt); +} + +static int +fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint64_t a_mnt, b_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || + (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { + *flags |= FPLIB_IOC; + return 0; + } + if (!a_mnt && !b_mnt) + return 1; + if (a_sgn != b_sgn) + return b_sgn; + if (a_exp != b_exp) + return a_sgn ^ (a_exp > b_exp); + if (a_mnt != b_mnt) + return a_sgn ^ (a_mnt > b_mnt); + return 1; +} + +static int +fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint64_t a_mnt, b_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || + (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { + *flags |= FPLIB_IOC; + return 0; + } + if (!a_mnt && !b_mnt) + return 0; + if (a_sgn != b_sgn) + return b_sgn; + if (a_exp != b_exp) + return a_sgn ^ (a_exp > b_exp); + if (a_mnt != b_mnt) + return a_sgn ^ (a_mnt > b_mnt); + return 0; +} + +static uint32_t +fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint32_t a_mnt, b_mnt, x, x_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp32_process_NaNs(a, b, mode, flags))) { + return x; + } + + b_sgn ^= neg; + + // Handle infinities and zeroes: + if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } else if (a_exp == 255) { + return fp32_infinity(a_sgn); + } else if (b_exp == 255) { + return fp32_infinity(b_sgn); + } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { + return fp32_zero(a_sgn); + } + + a_mnt <<= 3; + b_mnt <<= 3; + if (a_exp >= b_exp) { + b_mnt = (lsr32(b_mnt, a_exp - b_exp) | + !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1))); + b_exp = a_exp; + } else { + a_mnt = (lsr32(a_mnt, b_exp - a_exp) | + !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1))); + a_exp = b_exp; + } + x_sgn = a_sgn; + x_exp = a_exp; + if (a_sgn == b_sgn) { + x_mnt = a_mnt + b_mnt; + } else if (a_mnt >= b_mnt) { + x_mnt = a_mnt - b_mnt; + } else { + x_sgn ^= 1; + x_mnt = b_mnt - a_mnt; + } + + if (!x_mnt) { + // Sign of exact zero result depends on rounding mode + return fp32_zero((mode & 3) == 2); + } + + x_mnt = fp32_normalise(x_mnt, &x_exp); + + return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags); +} + +static uint64_t +fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint64_t a_mnt, b_mnt, x, x_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp64_process_NaNs(a, b, mode, flags))) { + return x; + } + + b_sgn ^= neg; + + // Handle infinities and zeroes: + if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } else if (a_exp == 2047) { + return fp64_infinity(a_sgn); + } else if (b_exp == 2047) { + return fp64_infinity(b_sgn); + } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { + return fp64_zero(a_sgn); + } + + a_mnt <<= 3; + b_mnt <<= 3; + if (a_exp >= b_exp) { + b_mnt = (lsr64(b_mnt, a_exp - b_exp) | + !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1))); + b_exp = a_exp; + } else { + a_mnt = (lsr64(a_mnt, b_exp - a_exp) | + !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1))); + a_exp = b_exp; + } + x_sgn = a_sgn; + x_exp = a_exp; + if (a_sgn == b_sgn) { + x_mnt = a_mnt + b_mnt; + } else if (a_mnt >= b_mnt) { + x_mnt = a_mnt - b_mnt; + } else { + x_sgn ^= 1; + x_mnt = b_mnt - a_mnt; + } + + if (!x_mnt) { + // Sign of exact zero result depends on rounding mode + return fp64_zero((mode & 3) == 2); + } + + x_mnt = fp64_normalise(x_mnt, &x_exp); + + return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags); +} + +static uint32_t +fp32_mul(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint32_t a_mnt, b_mnt, x; + uint64_t x_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp32_process_NaNs(a, b, mode, flags))) { + return x; + } + + // Handle infinities and zeroes: + if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } else if (a_exp == 255 || b_exp == 255) { + return fp32_infinity(a_sgn ^ b_sgn); + } else if (!a_mnt || !b_mnt) { + return fp32_zero(a_sgn ^ b_sgn); + } + + // Multiply and normalise: + x_sgn = a_sgn ^ b_sgn; + x_exp = a_exp + b_exp - 110; + x_mnt = (uint64_t)a_mnt * b_mnt; + x_mnt = fp64_normalise(x_mnt, &x_exp); + + // Convert to 32 bits, collapsing error into bottom bit: + x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33); + + return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); +} + +static uint64_t +fp64_mul(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint64_t a_mnt, b_mnt, x; + uint64_t x0_mnt, x1_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp64_process_NaNs(a, b, mode, flags))) { + return x; + } + + // Handle infinities and zeroes: + if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } else if (a_exp == 2047 || b_exp == 2047) { + return fp64_infinity(a_sgn ^ b_sgn); + } else if (!a_mnt || !b_mnt) { + return fp64_zero(a_sgn ^ b_sgn); + } + + // Multiply and normalise: + x_sgn = a_sgn ^ b_sgn; + x_exp = a_exp + b_exp - 1000; + mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt); + fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); + + // Convert to 64 bits, collapsing error into bottom bit: + x0_mnt = x1_mnt << 1 | !!x0_mnt; + + return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags); +} + +static uint32_t +fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, + int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; + uint32_t a_mnt, b_mnt, c_mnt, x; + uint64_t x_mnt, y_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); + + x = fp32_process_NaNs3(a, b, c, mode, flags); + + // Quiet NaN added to product of zero and infinity: + if (a_exp == 255 && (a_mnt >> 22 & 1) && + ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) || + (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) { + x = fp32_defaultNaN(); + *flags |= FPLIB_IOC; + } + + if (x) { + return x; + } + + // Handle infinities and zeroes: + if ((b_exp == 255 && !c_mnt) || + (c_exp == 255 && !b_mnt) || + (a_exp == 255 && (b_exp == 255 || c_exp == 255) && + (a_sgn != (b_sgn ^ c_sgn)))) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } + if (a_exp == 255) + return fp32_infinity(a_sgn); + if (b_exp == 255 || c_exp == 255) + return fp32_infinity(b_sgn ^ c_sgn); + if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) + return fp32_zero(a_sgn); + + x_sgn = a_sgn; + x_exp = a_exp + 13; + x_mnt = (uint64_t)a_mnt << 27; + + // Multiply: + y_sgn = b_sgn ^ c_sgn; + y_exp = b_exp + c_exp - 113; + y_mnt = (uint64_t)b_mnt * c_mnt << 3; + if (!y_mnt) { + y_exp = x_exp; + } + + // Add: + if (x_exp >= y_exp) { + y_mnt = (lsr64(y_mnt, x_exp - y_exp) | + !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1))); + y_exp = x_exp; + } else { + x_mnt = (lsr64(x_mnt, y_exp - x_exp) | + !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1))); + x_exp = y_exp; + } + if (x_sgn == y_sgn) { + x_mnt = x_mnt + y_mnt; + } else if (x_mnt >= y_mnt) { + x_mnt = x_mnt - y_mnt; + } else { + x_sgn ^= 1; + x_mnt = y_mnt - x_mnt; + } + + if (!x_mnt) { + // Sign of exact zero result depends on rounding mode + return fp32_zero((mode & 3) == 2); + } + + // Normalise and convert to 32 bits, collapsing error into bottom bit: + x_mnt = fp64_normalise(x_mnt, &x_exp); + x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); + + return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags); +} + +static uint64_t +fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, + int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; + uint64_t a_mnt, b_mnt, c_mnt, x; + uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); + + x = fp64_process_NaNs3(a, b, c, mode, flags); + + // Quiet NaN added to product of zero and infinity: + if (a_exp == 2047 && (a_mnt >> 51 & 1) && + ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) || + (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) { + x = fp64_defaultNaN(); + *flags |= FPLIB_IOC; + } + + if (x) { + return x; + } + + // Handle infinities and zeroes: + if ((b_exp == 2047 && !c_mnt) || + (c_exp == 2047 && !b_mnt) || + (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) && + (a_sgn != (b_sgn ^ c_sgn)))) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } + if (a_exp == 2047) + return fp64_infinity(a_sgn); + if (b_exp == 2047 || c_exp == 2047) + return fp64_infinity(b_sgn ^ c_sgn); + if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) + return fp64_zero(a_sgn); + + x_sgn = a_sgn; + x_exp = a_exp + 11; + x0_mnt = 0; + x1_mnt = a_mnt; + + // Multiply: + y_sgn = b_sgn ^ c_sgn; + y_exp = b_exp + c_exp - 1003; + mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3); + if (!y0_mnt && !y1_mnt) { + y_exp = x_exp; + } + + // Add: + if (x_exp >= y_exp) { + uint64_t t0, t1; + lsl128(&t0, &t1, y0_mnt, y1_mnt, + x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0); + lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp); + y0_mnt |= !!(t0 | t1); + y_exp = x_exp; + } else { + uint64_t t0, t1; + lsl128(&t0, &t1, x0_mnt, x1_mnt, + y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0); + lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp); + x0_mnt |= !!(t0 | t1); + x_exp = y_exp; + } + if (x_sgn == y_sgn) { + add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); + } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) { + sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); + } else { + x_sgn ^= 1; + sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt); + } + + if (!x0_mnt && !x1_mnt) { + // Sign of exact zero result depends on rounding mode + return fp64_zero((mode & 3) == 2); + } + + // Normalise and convert to 64 bits, collapsing error into bottom bit: + fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); + x0_mnt = x1_mnt << 1 | !!x0_mnt; + + return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags); +} + +static uint32_t +fp32_div(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint32_t a_mnt, b_mnt, x; + uint64_t x_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp32_process_NaNs(a, b, mode, flags))) + return x; + + // Handle infinities and zeroes: + if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } + if (a_exp == 255 || !b_mnt) { + if (a_exp != 255) + *flags |= FPLIB_DZC; + return fp32_infinity(a_sgn ^ b_sgn); + } + if (!a_mnt || b_exp == 255) + return fp32_zero(a_sgn ^ b_sgn); + + // Divide, setting bottom bit if inexact: + a_mnt = fp32_normalise(a_mnt, &a_exp); + x_sgn = a_sgn ^ b_sgn; + x_exp = a_exp - b_exp + 172; + x_mnt = ((uint64_t)a_mnt << 18) / b_mnt; + x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18); + + // Normalise and convert to 32 bits, collapsing error into bottom bit: + x_mnt = fp64_normalise(x_mnt, &x_exp); + x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); + + return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); +} + +static uint64_t +fp64_div(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c; + uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp64_process_NaNs(a, b, mode, flags))) + return x; + + // Handle infinities and zeroes: + if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } + if (a_exp == 2047 || !b_mnt) { + if (a_exp != 2047) + *flags |= FPLIB_DZC; + return fp64_infinity(a_sgn ^ b_sgn); + } + if (!a_mnt || b_exp == 2047) + return fp64_zero(a_sgn ^ b_sgn); + + // Find reciprocal of divisor with Newton-Raphson: + a_mnt = fp64_normalise(a_mnt, &a_exp); + b_mnt = fp64_normalise(b_mnt, &b_exp); + x_mnt = ~(uint64_t)0 / (b_mnt >> 31); + mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt); + sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt); + lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32); + mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt); + lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33); + + // Multiply by dividend: + x_sgn = a_sgn ^ b_sgn; + x_exp = a_exp - b_exp + 1031; + mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough + lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4); + x_mnt = x1_mnt; + + // This is an underestimate, so try adding one: + mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough + c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11); + if (c <= 0) { + ++x_mnt; + } + + x_mnt = fp64_normalise(x_mnt, &x_exp); + + return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); +} + +static void +set_fpscr0(FPSCR &fpscr, int flags) +{ + if (flags & FPLIB_IDC) { + fpscr.idc = 1; + } + if (flags & FPLIB_IOC) { + fpscr.ioc = 1; + } + if (flags & FPLIB_DZC) { + fpscr.dzc = 1; + } + if (flags & FPLIB_OFC) { + fpscr.ofc = 1; + } + if (flags & FPLIB_UFC) { + fpscr.ufc = 1; + } + if (flags & FPLIB_IXC) { + fpscr.ixc = 1; + } +} + +static uint32_t +fp32_sqrt(uint32_t a, int mode, int *flags) +{ + int a_sgn, a_exp, x_sgn, x_exp; + uint32_t a_mnt, x, x_mnt; + uint64_t t0, t1; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + + // Handle NaNs: + if (a_exp == 255 && (uint32_t)(a_mnt << 9)) + return fp32_process_NaN(a, mode, flags); + + // Handle infinities and zeroes: + if (!a_mnt) { + return fp32_zero(a_sgn); + } + if (a_exp == 255 && !a_sgn) { + return fp32_infinity(a_sgn); + } + if (a_sgn) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } + + a_mnt = fp32_normalise(a_mnt, &a_exp); + if (!(a_exp & 1)) { + ++a_exp; + a_mnt >>= 1; + } + + // x = (a * 3 + 5) / 8 + x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28); + + // x = (a / x + x) / 2; // 16-bit accuracy + x = (a_mnt / (x >> 15) + (x >> 16)) << 15; + + // x = (a / x + x) / 2; // 16-bit accuracy + x = (a_mnt / (x >> 15) + (x >> 16)) << 15; + + // x = (a / x + x) / 2; // 32-bit accuracy + x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1); + + x_sgn = 0; + x_exp = (a_exp + 147) >> 1; + x_mnt = ((x - (1 << 5)) >> 6) + 1; + t1 = (uint64_t)x_mnt * x_mnt; + t0 = (uint64_t)a_mnt << 19; + if (t1 > t0) { + --x_mnt; + } + + x_mnt = fp32_normalise(x_mnt, &x_exp); + + return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags); +} + +static uint64_t +fp64_sqrt(uint64_t a, int mode, int *flags) +{ + int a_sgn, a_exp, x_sgn, x_exp, c; + uint64_t a_mnt, x_mnt, r, x0, x1; + uint32_t x; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + + // Handle NaNs: + if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) { + return fp64_process_NaN(a, mode, flags); + } + + // Handle infinities and zeroes: + if (!a_mnt) + return fp64_zero(a_sgn); + if (a_exp == 2047 && !a_sgn) + return fp64_infinity(a_sgn); + if (a_sgn) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } + + a_mnt = fp64_normalise(a_mnt, &a_exp); + if (a_exp & 1) { + ++a_exp; + a_mnt >>= 1; + } + + // x = (a * 3 + 5) / 8 + x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28); + + // x = (a / x + x) / 2; // 16-bit accuracy + x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; + + // x = (a / x + x) / 2; // 16-bit accuracy + x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; + + // x = (a / x + x) / 2; // 32-bit accuracy + x = ((a_mnt / x) >> 2) + (x >> 1); + + // r = 1 / x; // 32-bit accuracy + r = ((uint64_t)1 << 62) / x; + + // r = r * (2 - x * r); // 64-bit accuracy + mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r); + lsr128(&x0, &x1, x0, x1, 31); + + // x = (x + a * r) / 2; // 64-bit accuracy + mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2); + lsl128(&x0, &x1, x0, x1, 5); + lsr128(&x0, &x1, x0, x1, 56); + + x0 = ((uint64_t)x << 31) + (x0 >> 1); + + x_sgn = 0; + x_exp = (a_exp + 1053) >> 1; + x_mnt = x0; + x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1; + mul62x62(&x0, &x1, x_mnt, x_mnt); + lsl128(&x0, &x1, x0, x1, 19); + c = cmp128(x0, x1, 0, a_mnt); + if (c > 0) + --x_mnt; + + x_mnt = fp64_normalise(x_mnt, &x_exp); + + return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); +} + +static int +modeConv(FPSCR fpscr) +{ + return (((int) fpscr) >> 22) & 0xF; +} + +static void +set_fpscr(FPSCR &fpscr, int flags) +{ + // translate back to FPSCR + bool underflow = false; + if (flags & FPLIB_IDC) { + fpscr.idc = 1; + } + if (flags & FPLIB_IOC) { + fpscr.ioc = 1; + } + if (flags & FPLIB_DZC) { + fpscr.dzc = 1; + } + if (flags & FPLIB_OFC) { + fpscr.ofc = 1; + } + if (flags & FPLIB_UFC) { + underflow = true; //xx Why is this required? + fpscr.ufc = 1; + } + if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) { + fpscr.ixc = 1; + } +} + +template <> +bool +fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +uint32_t +fplibAbs(uint32_t op) +{ + return op & ~((uint32_t)1 << 31); +} + +template <> +uint64_t +fplibAbs(uint64_t op) +{ + return op & ~((uint64_t)1 << 63); +} + +template <> +uint32_t +fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +int +fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2, result; + uint32_t mnt1, mnt2; + + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) || + (exp2 == 255 && (uint32_t)(mnt2 << 9))) { + result = 3; + if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) || + (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) || + signal_nans) + flags |= FPLIB_IOC; + } else { + if (op1 == op2 || (!mnt1 && !mnt2)) { + result = 6; + } else if (sgn1 != sgn2) { + result = sgn1 ? 8 : 2; + } else if (exp1 != exp2) { + result = sgn1 ^ (exp1 < exp2) ? 8 : 2; + } else { + result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +int +fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2, result; + uint64_t mnt1, mnt2; + + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) || + (exp2 == 2047 && (uint64_t)(mnt2 << 12))) { + result = 3; + if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) || + (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) || + signal_nans) + flags |= FPLIB_IOC; + } else { + if (op1 == op2 || (!mnt1 && !mnt2)) { + result = 6; + } else if (sgn1 != sgn2) { + result = sgn1 ? 8 : 2; + } else if (exp1 != exp2) { + result = sgn1 ^ (exp1 < exp2) ? 8 : 2; + } else { + result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +static uint16_t +fp16_FPConvertNaN_32(uint32_t op) +{ + return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13); +} + +static uint16_t +fp16_FPConvertNaN_64(uint64_t op) +{ + return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42); +} + +static uint32_t +fp32_FPConvertNaN_16(uint16_t op) +{ + return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13); +} + +static uint32_t +fp32_FPConvertNaN_64(uint64_t op) +{ + return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29); +} + +static uint64_t +fp64_FPConvertNaN_16(uint16_t op) +{ + return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42); +} + +static uint64_t +fp64_FPConvertNaN_32(uint32_t op) +{ + return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29); +} + +static uint32_t +fp32_FPOnePointFive(int sgn) +{ + return fp32_pack(sgn, 127, (uint64_t)1 << 22); +} + +static uint64_t +fp64_FPOnePointFive(int sgn) +{ + return fp64_pack(sgn, 1023, (uint64_t)1 << 51); +} + +static uint32_t +fp32_FPThree(int sgn) +{ + return fp32_pack(sgn, 128, (uint64_t)1 << 22); +} + +static uint64_t +fp64_FPThree(int sgn) +{ + return fp64_pack(sgn, 1024, (uint64_t)1 << 51); +} + +static uint32_t +fp32_FPTwo(int sgn) +{ + return fp32_pack(sgn, 128, 0); +} + +static uint64_t +fp64_FPTwo(int sgn) +{ + return fp64_pack(sgn, 1024, 0); +} + +template <> +uint16_t +fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt; + uint16_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + bool alt_hp = fpscr.ahp; + + if (exp == 255 && (uint32_t)(mnt << 9)) { + if (alt_hp) { + result = fp16_zero(sgn); + } else if (fpscr.dn) { + result = fp16_defaultNaN(); + } else { + result = fp16_FPConvertNaN_32(op); + } + if (!(mnt >> 22 & 1) || alt_hp) { + flags |= FPLIB_IOC; + } + } else if (exp == 255) { + if (alt_hp) { + result = sgn << 15 | (uint16_t)0x7fff; + flags |= FPLIB_IOC; + } else { + result = fp16_infinity(sgn); + } + } else if (!mnt) { + result = fp16_zero(sgn); + } else { + result = fp16_round_(sgn, exp - 127 + 15, + mnt >> 7 | !!(uint32_t)(mnt << 25), + rounding, mode | alt_hp << 4, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint16_t +fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt; + uint16_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + bool alt_hp = fpscr.ahp; + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + if (alt_hp) { + result = fp16_zero(sgn); + } else if (fpscr.dn) { + result = fp16_defaultNaN(); + } else { + result = fp16_FPConvertNaN_64(op); + } + if (!(mnt >> 51 & 1) || alt_hp) { + flags |= FPLIB_IOC; + } + } else if (exp == 2047) { + if (alt_hp) { + result = sgn << 15 | (uint16_t)0x7fff; + flags |= FPLIB_IOC; + } else { + result = fp16_infinity(sgn); + } + } else if (!mnt) { + result = fp16_zero(sgn); + } else { + result = fp16_round_(sgn, exp - 1023 + 15, + mnt >> 36 | !!(uint64_t)(mnt << 28), + rounding, mode | alt_hp << 4, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint16_t mnt; + uint32_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) { + if (fpscr.dn) { + result = fp32_defaultNaN(); + } else { + result = fp32_FPConvertNaN_16(op); + } + if (!(mnt >> 9 & 1)) { + flags |= FPLIB_IOC; + } + } else if (exp == 31 && !fpscr.ahp) { + result = fp32_infinity(sgn); + } else if (!mnt) { + result = fp32_zero(sgn); + } else { + mnt = fp16_normalise(mnt, &exp); + result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt; + uint32_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + if (fpscr.dn) { + result = fp32_defaultNaN(); + } else { + result = fp32_FPConvertNaN_64(op); + } + if (!(mnt >> 51 & 1)) { + flags |= FPLIB_IOC; + } + } else if (exp == 2047) { + result = fp32_infinity(sgn); + } else if (!mnt) { + result = fp32_zero(sgn); + } else { + result = fp32_round_(sgn, exp - 1023 + 127, + mnt >> 20 | !!(uint64_t)(mnt << 44), + rounding, mode, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint16_t mnt; + uint64_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) { + if (fpscr.dn) { + result = fp64_defaultNaN(); + } else { + result = fp64_FPConvertNaN_16(op); + } + if (!(mnt >> 9 & 1)) { + flags |= FPLIB_IOC; + } + } else if (exp == 31 && !fpscr.ahp) { + result = fp64_infinity(sgn); + } else if (!mnt) { + result = fp64_zero(sgn); + } else { + mnt = fp16_normalise(mnt, &exp); + result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt; + uint64_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 255 && (uint32_t)(mnt << 9)) { + if (fpscr.dn) { + result = fp64_defaultNaN(); + } else { + result = fp64_FPConvertNaN_32(op); + } + if (!(mnt >> 22 & 1)) { + flags |= FPLIB_IOC; + } + } else if (exp == 255) { + result = fp64_infinity(sgn); + } else if (!mnt) { + result = fp64_zero(sgn); + } else { + mnt = fp32_normalise(mnt, &exp); + result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +static uint32_t +fp32_repack(int sgn, int exp, uint32_t mnt) +{ + return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt); +} + +static uint64_t +fp64_repack(int sgn, int exp, uint64_t mnt) +{ + return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt); +} + +static void +fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn) +{ + // Treat a single quiet-NaN as +Infinity/-Infinity + if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23) + *op1 = fp32_infinity(sgn); + if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23) + *op2 = fp32_infinity(sgn); +} + +static void +fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn) +{ + // Treat a single quiet-NaN as +Infinity/-Infinity + if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52) + *op1 = fp64_infinity(sgn); + if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52) + *op2 = fp64_infinity(sgn); +} + +template <> +uint32_t +fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, x, result; + + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { + result = x; + } else { + result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? + fp32_repack(sgn1, exp1, mnt1) : + fp32_repack(sgn2, exp2, mnt2)); + } + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, x, result; + + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { + result = x; + } else { + result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? + fp64_repack(sgn1, exp1, mnt1) : + fp64_repack(sgn2, exp2, mnt2)); + } + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + fp32_minmaxnum(&op1, &op2, 1); + return fplibMax(op1, op2, fpscr); +} + +template <> +uint64_t +fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + fp64_minmaxnum(&op1, &op2, 1); + return fplibMax(op1, op2, fpscr); +} + +template <> +uint32_t +fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, x, result; + + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { + result = x; + } else { + result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? + fp32_repack(sgn1, exp1, mnt1) : + fp32_repack(sgn2, exp2, mnt2)); + } + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, x, result; + + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { + result = x; + } else { + result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? + fp64_repack(sgn1, exp1, mnt1) : + fp64_repack(sgn2, exp2, mnt2)); + } + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + fp32_minmaxnum(&op1, &op2, 0); + return fplibMin(op1, op2, fpscr); +} + +template <> +uint64_t +fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + fp64_minmaxnum(&op1, &op2, 0); + return fplibMin(op1, op2, fpscr); +} + +template <> +uint32_t +fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, result; + + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp32_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { + result = fp32_FPTwo(sgn1 ^ sgn2); + } else if (exp1 == 255 || exp2 == 255) { + result = fp32_infinity(sgn1 ^ sgn2); + } else if (!mnt1 || !mnt2) { + result = fp32_zero(sgn1 ^ sgn2); + } else { + result = fp32_mul(op1, op2, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, result; + + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp64_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { + result = fp64_FPTwo(sgn1 ^ sgn2); + } else if (exp1 == 2047 || exp2 == 2047) { + result = fp64_infinity(sgn1 ^ sgn2); + } else if (!mnt1 || !mnt2) { + result = fp64_zero(sgn1 ^ sgn2); + } else { + result = fp64_mul(op1, op2, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibNeg(uint32_t op) +{ + return op ^ (uint32_t)1 << 31; +} + +template <> +uint64_t +fplibNeg(uint64_t op) +{ + return op ^ (uint64_t)1 << 63; +} + +static const uint8_t recip_sqrt_estimate[256] = { + 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228, + 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203, + 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181, + 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163, + 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146, + 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131, + 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118, + 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106, + 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86, + 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68, + 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53, + 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40, + 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28, + 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18, + 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9, + 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0 +}; + +template <> +uint32_t +fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 255 && (uint32_t)(mnt << 9)) { + result = fp32_process_NaN(op, mode, &flags); + } else if (!mnt) { + result = fp32_infinity(sgn); + flags |= FPLIB_DZC; + } else if (sgn) { + result = fp32_defaultNaN(); + flags |= FPLIB_IOC; + } else if (exp == 255) { + result = fp32_zero(0); + } else { + exp += 8; + mnt = fp32_normalise(mnt, &exp); + mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)]; + result = fp32_pack(0, (380 - exp) >> 1, mnt << 15); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + result = fp64_process_NaN(op, mode, &flags); + } else if (!mnt) { + result = fp64_infinity(sgn); + flags |= FPLIB_DZC; + } else if (sgn) { + result = fp64_defaultNaN(); + flags |= FPLIB_IOC; + } else if (exp == 2047) { + result = fp32_zero(0); + } else { + exp += 11; + mnt = fp64_normalise(mnt, &exp); + mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)]; + result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, result; + + op1 = fplibNeg(op1); + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp32_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { + result = fp32_FPOnePointFive(0); + } else if (exp1 == 255 || exp2 == 255) { + result = fp32_infinity(sgn1 ^ sgn2); + } else { + result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, result; + + op1 = fplibNeg(op1); + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp64_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { + result = fp64_FPOnePointFive(0); + } else if (exp1 == 2047 || exp2 == 2047) { + result = fp64_infinity(sgn1 ^ sgn2); + } else { + result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, result; + + op1 = fplibNeg(op1); + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp32_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { + result = fp32_FPTwo(0); + } else if (exp1 == 255 || exp2 == 255) { + result = fp32_infinity(sgn1 ^ sgn2); + } else { + result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRecipEstimate(uint32_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 255 && (uint32_t)(mnt << 9)) { + result = fp32_process_NaN(op, mode, &flags); + } else if (exp == 255) { + result = fp32_zero(sgn); + } else if (!mnt) { + result = fp32_infinity(sgn); + flags |= FPLIB_DZC; + } else if (!((uint32_t)(op << 1) >> 22)) { + bool overflow_to_inf; + switch (FPCRRounding(fpscr)) { + case FPRounding_TIEEVEN: + overflow_to_inf = true; + break; + case FPRounding_POSINF: + overflow_to_inf = !sgn; + break; + case FPRounding_NEGINF: + overflow_to_inf = sgn; + break; + case FPRounding_ZERO: + overflow_to_inf = false; + break; + default: + assert(0); + } + result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn); + flags |= FPLIB_OFC | FPLIB_IXC; + } else if (fpscr.fz && exp >= 253) { + result = fp32_zero(sgn); + flags |= FPLIB_UFC; + } else { + exp += 8; + mnt = fp32_normalise(mnt, &exp); + int result_exp = 253 - exp; + uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1; + fraction <<= 15; + if (result_exp == 0) { + fraction >>= 1; + } else if (result_exp == -1) { + fraction >>= 2; + result_exp = 0; + } + result = fp32_pack(sgn, result_exp, fraction); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRecipEstimate(uint64_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + result = fp64_process_NaN(op, mode, &flags); + } else if (exp == 2047) { + result = fp64_zero(sgn); + } else if (!mnt) { + result = fp64_infinity(sgn); + flags |= FPLIB_DZC; + } else if (!((uint64_t)(op << 1) >> 51)) { + bool overflow_to_inf; + switch (FPCRRounding(fpscr)) { + case FPRounding_TIEEVEN: + overflow_to_inf = true; + break; + case FPRounding_POSINF: + overflow_to_inf = !sgn; + break; + case FPRounding_NEGINF: + overflow_to_inf = sgn; + break; + case FPRounding_ZERO: + overflow_to_inf = false; + break; + default: + assert(0); + } + result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn); + flags |= FPLIB_OFC | FPLIB_IXC; + } else if (fpscr.fz && exp >= 2045) { + result = fp64_zero(sgn); + flags |= FPLIB_UFC; + } else { + exp += 11; + mnt = fp64_normalise(mnt, &exp); + int result_exp = 2045 - exp; + uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1; + fraction <<= 44; + if (result_exp == 0) { + fraction >>= 1; + } else if (result_exp == -1) { + fraction >>= 2; + result_exp = 0; + } + result = fp64_pack(sgn, result_exp, fraction); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, result; + + op1 = fplibNeg(op1); + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp64_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { + result = fp64_FPTwo(0); + } else if (exp1 == 2047 || exp2 == 2047) { + result = fp64_infinity(sgn1 ^ sgn2); + } else { + result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRecpX(uint32_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 255 && (uint32_t)(mnt << 9)) { + result = fp32_process_NaN(op, mode, &flags); + } + else { + if (!mnt) { // Zero and denormals + result = fp32_pack(sgn, 254, 0); + } else { // Infinities and normals + result = fp32_pack(sgn, exp ^ 255, 0); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRecpX(uint64_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + result = fp64_process_NaN(op, mode, &flags); + } + else { + if (!mnt) { // Zero and denormals + result = fp64_pack(sgn, 2046, 0); + } else { // Infinities and normals + result = fp64_pack(sgn, exp ^ 2047, 0); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + // Handle NaNs, infinities and zeroes: + if (exp == 255 && (uint32_t)(mnt << 9)) { + result = fp32_process_NaN(op, mode, &flags); + } else if (exp == 255) { + result = fp32_infinity(sgn); + } else if (!mnt) { + result = fp32_zero(sgn); + } else if (exp >= 150) { + // There are no fractional bits + result = op; + } else { + // Truncate towards zero: + uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp); + int err = exp < 118 ? 1 : + (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0); + switch (rounding) { + case FPRounding_TIEEVEN: + x += (err == 3 || (err == 2 && (x & 1))); + break; + case FPRounding_POSINF: + x += err && !sgn; + break; + case FPRounding_NEGINF: + x += err && sgn; + break; + case FPRounding_ZERO: + break; + case FPRounding_TIEAWAY: + x += err >> 1; + break; + default: + assert(0); + } + + if (x == 0) { + result = fp32_zero(sgn); + } else { + exp = 150; + mnt = fp32_normalise(x, &exp); + result = fp32_pack(sgn, exp + 8, mnt >> 8); + } + + if (err && exact) + flags |= FPLIB_IXC; + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + // Handle NaNs, infinities and zeroes: + if (exp == 2047 && (uint64_t)(mnt << 12)) { + result = fp64_process_NaN(op, mode, &flags); + } else if (exp == 2047) { + result = fp64_infinity(sgn); + } else if (!mnt) { + result = fp64_zero(sgn); + } else if (exp >= 1075) { + // There are no fractional bits + result = op; + } else { + // Truncate towards zero: + uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp); + int err = exp < 1011 ? 1 : + (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0); + switch (rounding) { + case FPRounding_TIEEVEN: + x += (err == 3 || (err == 2 && (x & 1))); + break; + case FPRounding_POSINF: + x += err && !sgn; + break; + case FPRounding_NEGINF: + x += err && sgn; + break; + case FPRounding_ZERO: + break; + case FPRounding_TIEAWAY: + x += err >> 1; + break; + default: + assert(0); + } + + if (x == 0) { + result = fp64_zero(sgn); + } else { + exp = 1075; + mnt = fp64_normalise(x, &exp); + result = fp64_pack(sgn, exp + 11, mnt >> 11); + } + + if (err && exact) + flags |= FPLIB_IXC; + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibSqrt(uint32_t op, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibSqrt(uint64_t op, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +static uint64_t +FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, + int *flags) +{ + uint64_t x; + int err; + + if (exp > 1023 + 63) { + *flags = FPLIB_IOC; + return ((uint64_t)!u << 63) - !sgn; + } + + x = lsr64(mnt << 11, 1023 + 63 - exp); + err = (exp > 1023 + 63 - 2 ? 0 : + (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) | + !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1))); + + switch (rounding) { + case FPRounding_TIEEVEN: + x += (err == 3 || (err == 2 && (x & 1))); + break; + case FPRounding_POSINF: + x += err && !sgn; + break; + case FPRounding_NEGINF: + x += err && sgn; + break; + case FPRounding_ZERO: + break; + case FPRounding_TIEAWAY: + x += err >> 1; + break; + default: + assert(0); + } + + if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) { + *flags = FPLIB_IOC; + return ((uint64_t)!u << 63) - !sgn; + } + + if (err) { + *flags = FPLIB_IXC; + } + + return sgn ? -x : x; +} + +static uint32_t +FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, + int *flags) +{ + uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags); + if (u ? x >= (uint64_t)1 << 32 : + !(x < (uint64_t)1 << 31 || + (uint64_t)-x <= (uint64_t)1 << 31)) { + *flags = FPLIB_IOC; + x = ((uint32_t)!u << 31) - !sgn; + } + return x; +} + +template <> +uint32_t +fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); + + // If NaN, set cumulative flag or take exception: + if (exp == 255 && (uint32_t)(mnt << 9)) { + flags = FPLIB_IOC; + result = 0; + } else { + result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits, + (uint64_t)mnt << (52 - 23), u, rounding, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + int sgn, exp; + uint64_t mnt; + uint32_t result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); + + // If NaN, set cumulative flag or take exception: + if (exp == 2047 && (uint64_t)(mnt << 12)) { + flags = FPLIB_IOC; + result = 0; + } else { + result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + int sgn, exp; + uint32_t mnt; + uint64_t result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); + + // If NaN, set cumulative flag or take exception: + if (exp == 255 && (uint32_t)(mnt << 9)) { + flags = FPLIB_IOC; + result = 0; + } else { + result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits, + (uint64_t)mnt << (52 - 23), u, rounding, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); + + // If NaN, set cumulative flag or take exception: + if (exp == 2047 && (uint64_t)(mnt << 12)) { + flags = FPLIB_IOC; + result = 0; + } else { + result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +static uint32_t +fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) +{ + int x_sgn = !u && a >> 63; + int x_exp = 190 - fbits; + uint64_t x_mnt = x_sgn ? -a : a; + + // Handle zero: + if (!x_mnt) { + return fp32_zero(0); + } + + // Normalise and convert to 32 bits, collapsing error into bottom bit: + x_mnt = fp64_normalise(x_mnt, &x_exp); + x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); + + return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); +} + +static uint64_t +fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) +{ + int x_sgn = !u && a >> 63; + int x_exp = 1024 + 62 - fbits; + uint64_t x_mnt = x_sgn ? -a : a; + + // Handle zero: + if (!x_mnt) { + return fp64_zero(0); + } + + x_mnt = fp64_normalise(x_mnt, &x_exp); + + return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags); +} + +template <> +uint32_t +fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + uint32_t res = fp32_cvtf(op, fbits, u, + (int)rounding | ((uint32_t)fpscr >> 22 & 12), + &flags); + set_fpscr0(fpscr, flags); + return res; +} + +template <> +uint64_t +fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + uint64_t res = fp64_cvtf(op, fbits, u, + (int)rounding | ((uint32_t)fpscr >> 22 & 12), + &flags); + set_fpscr0(fpscr, flags); + return res; +} + +} diff --git a/src/arch/arm/insts/fplib.hh b/src/arch/arm/insts/fplib.hh new file mode 100644 index 000000000..6263687fc --- /dev/null +++ b/src/arch/arm/insts/fplib.hh @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2012-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Edmund Grimley Evans + * Thomas Grocutt + */ + +/** + * @file + * Floating-point library code, which will gradually replace vfp.hh. For + * portability, this library does not use floating-point data types. Currently, + * C's standard integer types are used in the API, though this could be changed + * to something like class Fp32 { uint32_t x; }, etc. + */ + +#ifndef __ARCH_ARM_INSTS_FPLIB_HH__ +#define __ARCH_ARM_INSTS_FPLIB_HH__ + +#include + +#include "arch/arm/miscregs.hh" + +namespace ArmISA +{ + +enum FPRounding { + FPRounding_TIEEVEN = 0, + FPRounding_POSINF = 1, + FPRounding_NEGINF = 2, + FPRounding_ZERO = 3, + FPRounding_TIEAWAY = 4, + FPRounding_ODD = 5 +}; + +static inline FPRounding +FPCRRounding(FPSCR &fpscr) +{ + return (FPRounding)((uint32_t)fpscr >> 22 & 3); +} + +/** Floating-point absolute value. */ +template +T fplibAbs(T op); +/** Floating-point add. */ +template +T fplibAdd(T op1, T op2, FPSCR &fpscr); +/** Floating-point compare (quiet and signaling). */ +template +int fplibCompare(T op1, T op2, bool signal_nans, FPSCR &fpscr); +/** Floating-point compare equal. */ +template +bool fplibCompareEQ(T op1, T op2, FPSCR &fpscr); +/** Floating-point compare greater than or equal. */ +template +bool fplibCompareGE(T op1, T op2, FPSCR &fpscr); +/** Floating-point compare greater than. */ +template +bool fplibCompareGT(T op1, T op2, FPSCR &fpscr); +/** Floating-point convert precision. */ +template +T2 fplibConvert(T1 op, FPRounding rounding, FPSCR &fpscr); +/** Floating-point division. */ +template +T fplibDiv(T op1, T op2, FPSCR &fpscr); +/** Floating-point maximum. */ +template +T fplibMax(T op1, T op2, FPSCR &fpscr); +/** Floating-point maximum number. */ +template +T fplibMaxNum(T op1, T op2, FPSCR &fpscr); +/** Floating-point minimum. */ +template +T fplibMin(T op1, T op2, FPSCR &fpscr); +/** Floating-point minimum number. */ +template +T fplibMinNum(T op1, T op2, FPSCR &fpscr); +/** Floating-point multiply. */ +template +T fplibMul(T op1, T op2, FPSCR &fpscr); +/** Floating-point multiply-add. */ +template +T fplibMulAdd(T addend, T op1, T op2, FPSCR &fpscr); +/** Floating-point multiply extended. */ +template +T fplibMulX(T op1, T op2, FPSCR &fpscr); +/** Floating-point negate. */ +template +T fplibNeg(T op); +/** Floating-point reciprocal square root estimate. */ +template +T fplibRSqrtEstimate(T op, FPSCR &fpscr); +/** Floating-point reciprocal square root step. */ +template +T fplibRSqrtStepFused(T op1, T op2, FPSCR &fpscr); +/** Floating-point reciprocal estimate. */ +template +T fplibRecipEstimate(T op, FPSCR &fpscr); +/** Floating-point reciprocal step. */ +template +T fplibRecipStepFused(T op1, T op2, FPSCR &fpscr); +/** Floating-point reciprocal exponent. */ +template +T fplibRecpX(T op, FPSCR &fpscr); +/** Floating-point convert to integer. */ +template +T fplibRoundInt(T op, FPRounding rounding, bool exact, FPSCR &fpscr); +/** Floating-point square root. */ +template +T fplibSqrt(T op, FPSCR &fpscr); +/** Floating-point subtract. */ +template +T fplibSub(T op1, T op2, FPSCR &fpscr); +/** Floating-point convert to fixed-point. */ +template +T2 fplibFPToFixed(T1 op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr); +/** Floating-point convert from fixed-point. */ +template +T fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); + +/* Function specializations... */ +template <> +uint32_t fplibAbs(uint32_t op); +template <> +uint64_t fplibAbs(uint64_t op); +template <> +uint32_t fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +int fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr); +template <> +int fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr); +template <> +bool fplibCompareEQ(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +bool fplibCompareEQ(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +bool fplibCompareGE(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +bool fplibCompareGE(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +bool fplibCompareGT(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +bool fplibCompareGT(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint16_t fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint32_t fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint32_t fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint64_t fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint64_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint32_t fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, + FPSCR &fpscr); +template <> +uint64_t fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, + FPSCR &fpscr); +template <> +uint32_t fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibNeg(uint32_t op); +template <> +uint64_t fplibNeg(uint64_t op); +template <> +uint32_t fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr); +template<> +uint64_t fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr); +template <> +uint32_t fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibRecipEstimate(uint32_t op, FPSCR &fpscr); +template <> +uint64_t fplibRecipEstimate(uint64_t op, FPSCR &fpscr); +template <> +uint32_t fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibRecpX(uint32_t op, FPSCR &fpscr); +template <> +uint64_t fplibRecpX(uint64_t op, FPSCR &fpscr); +template <> +uint32_t fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, + FPSCR &fpscr); +template <> +uint64_t fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, + FPSCR &fpscr); +template <> +uint32_t fplibSqrt(uint32_t op, FPSCR &fpscr); +template <> +uint64_t fplibSqrt(uint64_t op, FPSCR &fpscr); +template <> +uint32_t fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint32_t fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint64_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint64_t fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint32_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint64_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +} + +#endif diff --git a/src/arch/arm/insts/macromem.cc b/src/arch/arm/insts/macromem.cc index 26a916fc7..42cb98a7c 100644 --- a/src/arch/arm/insts/macromem.cc +++ b/src/arch/arm/insts/macromem.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -43,7 +43,9 @@ #include #include "arch/arm/insts/macromem.hh" + #include "arch/arm/generated/decoder.hh" +#include "arch/arm/insts/neon64_mem.hh" using namespace std; using namespace ArmISAInst; @@ -177,6 +179,212 @@ MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst, } } +PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + uint32_t size, bool fp, bool load, bool noAlloc, + bool signExt, bool exclusive, bool acrel, + int64_t imm, AddrMode mode, + IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) : + PredMacroOp(mnem, machInst, __opClass) +{ + bool writeback = (mode != AddrMd_Offset); + numMicroops = 1 + (size / 4) + (writeback ? 1 : 0); + microOps = new StaticInstPtr[numMicroops]; + + StaticInstPtr *uop = microOps; + + bool post = (mode == AddrMd_PostIndex); + + rn = makeSP(rn); + + *uop = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, post ? 0 : imm); + + if (fp) { + if (size == 16) { + if (load) { + *++uop = new MicroLdrQBFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroLdrQTFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroLdrQBFpXImmUop(machInst, rt2, + INTREG_UREG0, 16, noAlloc, exclusive, acrel); + *++uop = new MicroLdrQTFpXImmUop(machInst, rt2, + INTREG_UREG0, 16, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroStrQBFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroStrQTFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroStrQBFpXImmUop(machInst, rt2, + INTREG_UREG0, 16, noAlloc, exclusive, acrel); + *++uop = new MicroStrQTFpXImmUop(machInst, rt2, + INTREG_UREG0, 16, noAlloc, exclusive, acrel); + } + } else if (size == 8) { + if (load) { + *++uop = new MicroLdrFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroLdrFpXImmUop(machInst, rt2, + INTREG_UREG0, 8, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroStrFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroStrFpXImmUop(machInst, rt2, + INTREG_UREG0, 8, noAlloc, exclusive, acrel); + } + } else if (size == 4) { + if (load) { + *++uop = new MicroLdrDFpXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroStrDFpXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } + } + } else { + if (size == 8) { + if (load) { + *++uop = new MicroLdrXImmUop(machInst, rt, INTREG_UREG0, + 0, noAlloc, exclusive, acrel); + *++uop = new MicroLdrXImmUop(machInst, rt2, INTREG_UREG0, + size, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroStrXImmUop(machInst, rt, INTREG_UREG0, + 0, noAlloc, exclusive, acrel); + *++uop = new MicroStrXImmUop(machInst, rt2, INTREG_UREG0, + size, noAlloc, exclusive, acrel); + } + } else if (size == 4) { + if (load) { + if (signExt) { + *++uop = new MicroLdrDSXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroLdrDUXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } + } else { + *++uop = new MicroStrDXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } + } + } + + if (writeback) { + *++uop = new MicroAddXiUop(machInst, rn, INTREG_UREG0, + post ? imm : 0); + } + + (*uop)->setLastMicroop(); + + for (StaticInstPtr *curUop = microOps; + !(*curUop)->isLastMicroop(); curUop++) { + (*curUop)->setDelayedCommit(); + } +} + +BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, bool load, IntRegIndex dest, + IntRegIndex base, int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 2; + microOps = new StaticInstPtr[numMicroops]; + + if (load) { + microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm); + microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm); + } else { + microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm); + microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm); + } + microOps[0]->setDelayedCommit(); + microOps[1]->setLastMicroop(); +} + +BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, bool load, IntRegIndex dest, + IntRegIndex base, int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 3; + microOps = new StaticInstPtr[numMicroops]; + + if (load) { + microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, 0); + microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, 0); + } else { + microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, 0); + microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, 0); + } + microOps[2] = new MicroAddXiUop(machInst, base, base, imm); + + microOps[0]->setDelayedCommit(); + microOps[1]->setDelayedCommit(); + microOps[2]->setLastMicroop(); +} + +BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, bool load, IntRegIndex dest, + IntRegIndex base, int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 3; + microOps = new StaticInstPtr[numMicroops]; + + if (load) { + microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm); + microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm); + } else { + microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm); + microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm); + } + microOps[2] = new MicroAddXiUop(machInst, base, base, imm); + + microOps[0]->setDelayedCommit(); + microOps[1]->setDelayedCommit(); + microOps[2]->setLastMicroop(); +} + +BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, bool load, IntRegIndex dest, + IntRegIndex base, IntRegIndex offset, + ArmExtendType type, int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 2; + microOps = new StaticInstPtr[numMicroops]; + + if (load) { + microOps[0] = new MicroLdrQBFpXRegUop(machInst, dest, base, + offset, type, imm); + microOps[1] = new MicroLdrQTFpXRegUop(machInst, dest, base, + offset, type, imm); + } else { + microOps[0] = new MicroStrQBFpXRegUop(machInst, dest, base, + offset, type, imm); + microOps[1] = new MicroStrQTFpXRegUop(machInst, dest, base, + offset, type, imm); + } + + microOps[0]->setDelayedCommit(); + microOps[1]->setLastMicroop(); +} + +BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, IntRegIndex dest, + int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 2; + microOps = new StaticInstPtr[numMicroops]; + + microOps[0] = new MicroLdrQBFpXLitUop(machInst, dest, imm); + microOps[1] = new MicroLdrQTFpXLitUop(machInst, dest, imm); + + microOps[0]->setDelayedCommit(); + microOps[1]->setLastMicroop(); +} + VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : @@ -193,7 +401,7 @@ VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, if (deinterleave) numMicroops += (regs / elems); microOps = new StaticInstPtr[numMicroops]; - RegIndex rMid = deinterleave ? NumFloatArchRegs : vd * 2; + RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2; uint32_t noAlign = TLB::MustBeOne; @@ -295,7 +503,7 @@ VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst, numMicroops += (regs / elems); microOps = new StaticInstPtr[numMicroops]; - RegIndex ufp0 = NumFloatArchRegs; + RegIndex ufp0 = NumFloatV7ArchRegs; unsigned uopIdx = 0; switch (loadSize) { @@ -556,7 +764,7 @@ VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, uint32_t noAlign = TLB::MustBeOne; - RegIndex rMid = interleave ? NumFloatArchRegs : vd * 2; + RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2; unsigned uopIdx = 0; if (interleave) { @@ -657,7 +865,7 @@ VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst, numMicroops += (regs / elems); microOps = new StaticInstPtr[numMicroops]; - RegIndex ufp0 = NumFloatArchRegs; + RegIndex ufp0 = NumFloatV7ArchRegs; unsigned uopIdx = 0; switch (elems) { @@ -834,6 +1042,285 @@ VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst, microOps[numMicroops - 1]->setLastMicroop(); } +VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t numRegs, bool wb) : + PredMacroOp(mnem, machInst, __opClass) +{ + RegIndex vx = NumFloatV8ArchRegs / 4; + RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); + bool baseIsSP = isSP((IntRegIndex) rnsp); + + numMicroops = wb ? 1 : 0; + + int totNumBytes = numRegs * dataSize / 8; + assert(totNumBytes <= 64); + + // The guiding principle here is that no more than 16 bytes can be + // transferred at a time + int numMemMicroops = totNumBytes / 16; + int residuum = totNumBytes % 16; + if (residuum) + ++numMemMicroops; + numMicroops += numMemMicroops; + + int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0); + numMicroops += numMarshalMicroops; + + microOps = new StaticInstPtr[numMicroops]; + unsigned uopIdx = 0; + uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | + TLB::AllowUnaligned; + + int i = 0; + for(; i < numMemMicroops - 1; ++i) { + microOps[uopIdx++] = new MicroNeonLoad64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, + baseIsSP, 16 /* accSize */, eSize); + } + microOps[uopIdx++] = new MicroNeonLoad64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, + residuum ? residuum : 16 /* accSize */, eSize); + + // Writeback microop: the post-increment amount is encoded in "Rm": a + // 64-bit general register OR as '11111' for an immediate value equal to + // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) + if (wb) { + if (rm != ((RegIndex) INTREG_X31)) { + microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, + UXTX, 0); + } else { + microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, + totNumBytes); + } + } + + for (int i = 0; i < numMarshalMicroops; ++i) { + microOps[uopIdx++] = new MicroDeintNeon64( + machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, + numStructElems, numRegs, i /* step */); + } + + assert(uopIdx == numMicroops); + + for (int i = 0; i < numMicroops - 1; ++i) { + microOps[i]->setDelayedCommit(); + } + microOps[numMicroops - 1]->setLastMicroop(); +} + +VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t numRegs, bool wb) : + PredMacroOp(mnem, machInst, __opClass) +{ + RegIndex vx = NumFloatV8ArchRegs / 4; + RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); + bool baseIsSP = isSP((IntRegIndex) rnsp); + + numMicroops = wb ? 1 : 0; + + int totNumBytes = numRegs * dataSize / 8; + assert(totNumBytes <= 64); + + // The guiding principle here is that no more than 16 bytes can be + // transferred at a time + int numMemMicroops = totNumBytes / 16; + int residuum = totNumBytes % 16; + if (residuum) + ++numMemMicroops; + numMicroops += numMemMicroops; + + int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; + numMicroops += numMarshalMicroops; + + microOps = new StaticInstPtr[numMicroops]; + unsigned uopIdx = 0; + + for(int i = 0; i < numMarshalMicroops; ++i) { + microOps[uopIdx++] = new MicroIntNeon64( + machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, + numStructElems, numRegs, i /* step */); + } + + uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | + TLB::AllowUnaligned; + + int i = 0; + for(; i < numMemMicroops - 1; ++i) { + microOps[uopIdx++] = new MicroNeonStore64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, + baseIsSP, 16 /* accSize */, eSize); + } + microOps[uopIdx++] = new MicroNeonStore64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, + residuum ? residuum : 16 /* accSize */, eSize); + + // Writeback microop: the post-increment amount is encoded in "Rm": a + // 64-bit general register OR as '11111' for an immediate value equal to + // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) + if (wb) { + if (rm != ((RegIndex) INTREG_X31)) { + microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, + UXTX, 0); + } else { + microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, + totNumBytes); + } + } + + assert(uopIdx == numMicroops); + + for (int i = 0; i < numMicroops - 1; i++) { + microOps[i]->setDelayedCommit(); + } + microOps[numMicroops - 1]->setLastMicroop(); +} + +VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t index, bool wb, + bool replicate) : + PredMacroOp(mnem, machInst, __opClass) +{ + RegIndex vx = NumFloatV8ArchRegs / 4; + RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); + bool baseIsSP = isSP((IntRegIndex) rnsp); + + numMicroops = wb ? 1 : 0; + + int eSizeBytes = 1 << eSize; + int totNumBytes = numStructElems * eSizeBytes; + assert(totNumBytes <= 64); + + // The guiding principle here is that no more than 16 bytes can be + // transferred at a time + int numMemMicroops = totNumBytes / 16; + int residuum = totNumBytes % 16; + if (residuum) + ++numMemMicroops; + numMicroops += numMemMicroops; + + int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0); + numMicroops += numMarshalMicroops; + + microOps = new StaticInstPtr[numMicroops]; + unsigned uopIdx = 0; + + uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | + TLB::AllowUnaligned; + + int i = 0; + for (; i < numMemMicroops - 1; ++i) { + microOps[uopIdx++] = new MicroNeonLoad64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, + baseIsSP, 16 /* accSize */, eSize); + } + microOps[uopIdx++] = new MicroNeonLoad64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, + residuum ? residuum : 16 /* accSize */, eSize); + + // Writeback microop: the post-increment amount is encoded in "Rm": a + // 64-bit general register OR as '11111' for an immediate value equal to + // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) + if (wb) { + if (rm != ((RegIndex) INTREG_X31)) { + microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, + UXTX, 0); + } else { + microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, + totNumBytes); + } + } + + for(int i = 0; i < numMarshalMicroops; ++i) { + microOps[uopIdx++] = new MicroUnpackNeon64( + machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, + numStructElems, index, i /* step */, replicate); + } + + assert(uopIdx == numMicroops); + + for (int i = 0; i < numMicroops - 1; i++) { + microOps[i]->setDelayedCommit(); + } + microOps[numMicroops - 1]->setLastMicroop(); +} + +VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t index, bool wb, + bool replicate) : + PredMacroOp(mnem, machInst, __opClass) +{ + RegIndex vx = NumFloatV8ArchRegs / 4; + RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); + bool baseIsSP = isSP((IntRegIndex) rnsp); + + numMicroops = wb ? 1 : 0; + + int eSizeBytes = 1 << eSize; + int totNumBytes = numStructElems * eSizeBytes; + assert(totNumBytes <= 64); + + // The guiding principle here is that no more than 16 bytes can be + // transferred at a time + int numMemMicroops = totNumBytes / 16; + int residuum = totNumBytes % 16; + if (residuum) + ++numMemMicroops; + numMicroops += numMemMicroops; + + int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; + numMicroops += numMarshalMicroops; + + microOps = new StaticInstPtr[numMicroops]; + unsigned uopIdx = 0; + + for(int i = 0; i < numMarshalMicroops; ++i) { + microOps[uopIdx++] = new MicroPackNeon64( + machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, + numStructElems, index, i /* step */, replicate); + } + + uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | + TLB::AllowUnaligned; + + int i = 0; + for(; i < numMemMicroops - 1; ++i) { + microOps[uopIdx++] = new MicroNeonStore64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, + baseIsSP, 16 /* accsize */, eSize); + } + microOps[uopIdx++] = new MicroNeonStore64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, + residuum ? residuum : 16 /* accSize */, eSize); + + // Writeback microop: the post-increment amount is encoded in "Rm": a + // 64-bit general register OR as '11111' for an immediate value equal to + // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) + if (wb) { + if (rm != ((RegIndex) INTREG_X31)) { + microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, + UXTX, 0); + } else { + microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, + totNumBytes); + } + } + + assert(uopIdx == numMicroops); + + for (int i = 0; i < numMicroops - 1; i++) { + microOps[i]->setDelayedCommit(); + } + microOps[numMicroops - 1]->setLastMicroop(); +} + MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, IntRegIndex rn, RegIndex vd, bool single, bool up, @@ -846,14 +1333,14 @@ MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst, // to be functionally identical except that fldmx is deprecated. For now // we'll assume they're otherwise interchangable. int count = (single ? offset : (offset / 2)); - if (count == 0 || count > NumFloatArchRegs) + if (count == 0 || count > NumFloatV7ArchRegs) warn_once("Bad offset field for VFP load/store multiple.\n"); if (count == 0) { // Force there to be at least one microop so the macroop makes sense. writeback = true; } - if (count > NumFloatArchRegs) - count = NumFloatArchRegs; + if (count > NumFloatV7ArchRegs) + count = NumFloatV7ArchRegs; numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0); microOps = new StaticInstPtr[numMicroops]; @@ -933,6 +1420,19 @@ MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const return ss.str(); } +std::string +MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, ura); + ss << ", "; + printReg(ss, urb); + ss << ", "; + ccprintf(ss, "#%d", imm); + return ss.str(); +} + std::string MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const { @@ -942,6 +1442,18 @@ MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const return ss.str(); } +std::string +MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, ura); + ccprintf(ss, ", "); + printReg(ss, urb); + printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt); + return ss.str(); +} + std::string MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const { diff --git a/src/arch/arm/insts/macromem.hh b/src/arch/arm/insts/macromem.hh index 4933a1e7c..fc8e3e1b7 100644 --- a/src/arch/arm/insts/macromem.hh +++ b/src/arch/arm/insts/macromem.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -85,6 +85,27 @@ class MicroOp : public PredOp } }; +class MicroOpX : public ArmStaticInst +{ + protected: + MicroOpX(const char *mnem, ExtMachInst machInst, OpClass __opClass) + : ArmStaticInst(mnem, machInst, __opClass) + {} + + public: + void + advancePC(PCState &pcState) const + { + if (flags[IsLastMicroop]) { + pcState.uEnd(); + } else if (flags[IsMicroop]) { + pcState.uAdvance(); + } else { + pcState.advance(); + } + } +}; + /** * Microops for Neon loads/stores */ @@ -135,6 +156,96 @@ class MicroNeonMixLaneOp : public MicroNeonMixOp } }; +/** + * Microops for AArch64 NEON load/store (de)interleaving + */ +class MicroNeonMixOp64 : public MicroOp +{ + protected: + RegIndex dest, op1; + uint8_t eSize, dataSize, numStructElems, numRegs, step; + + MicroNeonMixOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex _dest, RegIndex _op1, uint8_t _eSize, + uint8_t _dataSize, uint8_t _numStructElems, + uint8_t _numRegs, uint8_t _step) + : MicroOp(mnem, machInst, __opClass), dest(_dest), op1(_op1), + eSize(_eSize), dataSize(_dataSize), numStructElems(_numStructElems), + numRegs(_numRegs), step(_step) + { + } +}; + +class MicroNeonMixLaneOp64 : public MicroOp +{ + protected: + RegIndex dest, op1; + uint8_t eSize, dataSize, numStructElems, lane, step; + bool replicate; + + MicroNeonMixLaneOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex _dest, RegIndex _op1, + uint8_t _eSize, uint8_t _dataSize, + uint8_t _numStructElems, uint8_t _lane, uint8_t _step, + bool _replicate = false) + : MicroOp(mnem, machInst, __opClass), dest(_dest), op1(_op1), + eSize(_eSize), dataSize(_dataSize), numStructElems(_numStructElems), + lane(_lane), step(_step), replicate(_replicate) + { + } +}; + +/** + * Base classes for microcoded AArch64 NEON memory instructions. + */ +class VldMultOp64 : public PredMacroOp +{ + protected: + uint8_t eSize, dataSize, numStructElems, numRegs; + bool wb; + + VldMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, + uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs, + bool wb); +}; + +class VstMultOp64 : public PredMacroOp +{ + protected: + uint8_t eSize, dataSize, numStructElems, numRegs; + bool wb; + + VstMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, + uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs, + bool wb); +}; + +class VldSingleOp64 : public PredMacroOp +{ + protected: + uint8_t eSize, dataSize, numStructElems, index; + bool wb, replicate; + + VldSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, + uint8_t dataSize, uint8_t numStructElems, uint8_t index, + bool wb, bool replicate = false); +}; + +class VstSingleOp64 : public PredMacroOp +{ + protected: + uint8_t eSize, dataSize, numStructElems, index; + bool wb, replicate; + + VstSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, + uint8_t dataSize, uint8_t numStructElems, uint8_t index, + bool wb, bool replicate = false); +}; + /** * Microops of the form * PC = IntRegA @@ -180,10 +291,10 @@ class MicroIntImmOp : public MicroOp { protected: RegIndex ura, urb; - uint32_t imm; + int32_t imm; MicroIntImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, - RegIndex _ura, RegIndex _urb, uint32_t _imm) + RegIndex _ura, RegIndex _urb, int32_t _imm) : MicroOp(mnem, machInst, __opClass), ura(_ura), urb(_urb), imm(_imm) { @@ -192,6 +303,22 @@ class MicroIntImmOp : public MicroOp std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class MicroIntImmXOp : public MicroOpX +{ + protected: + RegIndex ura, urb; + int64_t imm; + + MicroIntImmXOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex _ura, RegIndex _urb, int64_t _imm) + : MicroOpX(mnem, machInst, __opClass), + ura(_ura), urb(_urb), imm(_imm) + { + } + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + /** * Microops of the form IntRegA = IntRegB op IntRegC */ @@ -210,6 +337,25 @@ class MicroIntOp : public MicroOp std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class MicroIntRegXOp : public MicroOp +{ + protected: + RegIndex ura, urb, urc; + ArmExtendType type; + uint32_t shiftAmt; + + MicroIntRegXOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex _ura, RegIndex _urb, RegIndex _urc, + ArmExtendType _type, uint32_t _shiftAmt) + : MicroOp(mnem, machInst, __opClass), + ura(_ura), urb(_urb), urc(_urc), + type(_type), shiftAmt(_shiftAmt) + { + } + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + /** * Microops of the form IntRegA = IntRegB op shifted IntRegC */ @@ -260,6 +406,61 @@ class MacroMemOp : public PredMacroOp bool writeback, bool load, uint32_t reglist); }; +/** + * Base class for pair load/store instructions. + */ +class PairMemOp : public PredMacroOp +{ + public: + enum AddrMode { + AddrMd_Offset, + AddrMd_PreIndex, + AddrMd_PostIndex + }; + + protected: + PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + uint32_t size, bool fp, bool load, bool noAlloc, bool signExt, + bool exclusive, bool acrel, int64_t imm, AddrMode mode, + IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2); +}; + +class BigFpMemImmOp : public PredMacroOp +{ + protected: + BigFpMemImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + bool load, IntRegIndex dest, IntRegIndex base, int64_t imm); +}; + +class BigFpMemPostOp : public PredMacroOp +{ + protected: + BigFpMemPostOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + bool load, IntRegIndex dest, IntRegIndex base, int64_t imm); +}; + +class BigFpMemPreOp : public PredMacroOp +{ + protected: + BigFpMemPreOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + bool load, IntRegIndex dest, IntRegIndex base, int64_t imm); +}; + +class BigFpMemRegOp : public PredMacroOp +{ + protected: + BigFpMemRegOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + bool load, IntRegIndex dest, IntRegIndex base, + IntRegIndex offset, ArmExtendType type, int64_t imm); +}; + +class BigFpMemLitOp : public PredMacroOp +{ + protected: + BigFpMemLitOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + IntRegIndex dest, int64_t imm); +}; + /** * Base classes for microcoded integer memory instructions. */ diff --git a/src/arch/arm/insts/mem.cc b/src/arch/arm/insts/mem.cc index 552803b6a..15702ff83 100644 --- a/src/arch/arm/insts/mem.cc +++ b/src/arch/arm/insts/mem.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -157,6 +157,9 @@ SrsOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const case MODE_ABORT: ss << "abort"; break; + case MODE_HYP: + ss << "hyp"; + break; case MODE_UNDEFINED: ss << "undefined"; break; diff --git a/src/arch/arm/insts/mem64.cc b/src/arch/arm/insts/mem64.cc new file mode 100644 index 000000000..4d1fdd302 --- /dev/null +++ b/src/arch/arm/insts/mem64.cc @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/arm/insts/mem64.hh" +#include "arch/arm/tlb.hh" +#include "base/loader/symtab.hh" +#include "mem/request.hh" + +using namespace std; + +namespace ArmISA +{ + +std::string +SysDC64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, ", ["); + printReg(ss, base); + ccprintf(ss, "]"); + return ss.str(); +} + + + +void +Memory64::startDisassembly(std::ostream &os) const +{ + printMnemonic(os, "", false); + printReg(os, dest); + ccprintf(os, ", ["); + printReg(os, base); +} + +void +Memory64::setExcAcRel(bool exclusive, bool acrel) +{ + if (exclusive) + memAccessFlags |= Request::LLSC; + else + memAccessFlags |= ArmISA::TLB::AllowUnaligned; + if (acrel) { + flags[IsMemBarrier] = true; + flags[IsWriteBarrier] = true; + flags[IsReadBarrier] = true; + } +} + +std::string +MemoryImm64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + if (imm) + ccprintf(ss, ", #%d", imm); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryDImm64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, dest2); + ccprintf(ss, ", ["); + printReg(ss, base); + if (imm) + ccprintf(ss, ", #%d", imm); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryDImmEx64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, result); + ccprintf(ss, ", "); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, dest2); + ccprintf(ss, ", ["); + printReg(ss, base); + if (imm) + ccprintf(ss, ", #%d", imm); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryPreIndex64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + ccprintf(ss, ", #%d]!", imm); + return ss.str(); +} + +std::string +MemoryPostIndex64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + if (imm) + ccprintf(ss, "], #%d", imm); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryReg64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + printExtendOperand(false, ss, offset, type, shiftAmt); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryRaw64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryEx64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, result); + ccprintf(ss, ", ["); + printReg(ss, base); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryLiteral64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", #%d", pc + imm); + return ss.str(); +} +} diff --git a/src/arch/arm/insts/mem64.hh b/src/arch/arm/insts/mem64.hh new file mode 100644 index 000000000..21c1e1ea8 --- /dev/null +++ b/src/arch/arm/insts/mem64.hh @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ +#ifndef __ARCH_ARM_MEM64_HH__ +#define __ARCH_ARM_MEM64_HH__ + +#include "arch/arm/insts/static_inst.hh" + +namespace ArmISA +{ + +class SysDC64 : public ArmStaticInst +{ + protected: + IntRegIndex base; + IntRegIndex dest; + uint64_t imm; + + SysDC64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _base, IntRegIndex _dest, uint64_t _imm) + : ArmStaticInst(mnem, _machInst, __opClass), base(_base), dest(_dest), + imm(_imm) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MightBeMicro64 : public ArmStaticInst +{ + protected: + MightBeMicro64(const char *mnem, ExtMachInst _machInst, OpClass __opClass) + : ArmStaticInst(mnem, _machInst, __opClass) + {} + + void + advancePC(PCState &pcState) const + { + if (flags[IsLastMicroop]) { + pcState.uEnd(); + } else if (flags[IsMicroop]) { + pcState.uAdvance(); + } else { + pcState.advance(); + } + } +}; + +class Memory64 : public MightBeMicro64 +{ + public: + enum AddrMode { + AddrMd_Offset, + AddrMd_PreIndex, + AddrMd_PostIndex + }; + + protected: + + IntRegIndex dest; + IntRegIndex base; + /// True if the base register is SP (used for SP alignment checking). + bool baseIsSP; + static const unsigned numMicroops = 3; + + StaticInstPtr *uops; + + Memory64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _base) + : MightBeMicro64(mnem, _machInst, __opClass), + dest(_dest), base(_base), uops(NULL) + { + baseIsSP = isSP(_base); + } + + virtual + ~Memory64() + { + delete [] uops; + } + + StaticInstPtr + fetchMicroop(MicroPC microPC) const + { + assert(uops != NULL && microPC < numMicroops); + return uops[microPC]; + } + + void startDisassembly(std::ostream &os) const; + + unsigned memAccessFlags; + + void setExcAcRel(bool exclusive, bool acrel); +}; + +class MemoryImm64 : public Memory64 +{ + protected: + int64_t imm; + + MemoryImm64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _base, int64_t _imm) + : Memory64(mnem, _machInst, __opClass, _dest, _base), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryDImm64 : public MemoryImm64 +{ + protected: + IntRegIndex dest2; + + MemoryDImm64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _dest2, IntRegIndex _base, + int64_t _imm) + : MemoryImm64(mnem, _machInst, __opClass, _dest, _base, _imm), + dest2(_dest2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryDImmEx64 : public MemoryDImm64 +{ + protected: + IntRegIndex result; + + MemoryDImmEx64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _result, IntRegIndex _dest, IntRegIndex _dest2, + IntRegIndex _base, int32_t _imm) + : MemoryDImm64(mnem, _machInst, __opClass, _dest, _dest2, + _base, _imm), result(_result) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryPreIndex64 : public MemoryImm64 +{ + protected: + MemoryPreIndex64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base, + int64_t _imm) + : MemoryImm64(mnem, _machInst, __opClass, _dest, _base, _imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryPostIndex64 : public MemoryImm64 +{ + protected: + MemoryPostIndex64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base, + int64_t _imm) + : MemoryImm64(mnem, _machInst, __opClass, _dest, _base, _imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryReg64 : public Memory64 +{ + protected: + IntRegIndex offset; + ArmExtendType type; + uint64_t shiftAmt; + + MemoryReg64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base, + IntRegIndex _offset, ArmExtendType _type, + uint64_t _shiftAmt) + : Memory64(mnem, _machInst, __opClass, _dest, _base), + offset(_offset), type(_type), shiftAmt(_shiftAmt) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryRaw64 : public Memory64 +{ + protected: + MemoryRaw64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base) + : Memory64(mnem, _machInst, __opClass, _dest, _base) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryEx64 : public Memory64 +{ + protected: + IntRegIndex result; + + MemoryEx64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base, + IntRegIndex _result) + : Memory64(mnem, _machInst, __opClass, _dest, _base), result(_result) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryLiteral64 : public Memory64 +{ + protected: + int64_t imm; + + MemoryLiteral64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, int64_t _imm) + : Memory64(mnem, _machInst, __opClass, _dest, INTREG_ZERO), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; +} + +#endif //__ARCH_ARM_INSTS_MEM_HH__ diff --git a/src/arch/arm/insts/misc.cc b/src/arch/arm/insts/misc.cc index 6320bb6da..efc334c4b 100644 --- a/src/arch/arm/insts/misc.cc +++ b/src/arch/arm/insts/misc.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -145,6 +145,32 @@ MsrRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const return ss.str(); } +std::string +MrrcOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, dest); + ss << ", "; + printReg(ss, dest2); + ss << ", "; + printReg(ss, op1); + return ss.str(); +} + +std::string +McrrOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, dest); + ss << ", "; + printReg(ss, op1); + ss << ", "; + printReg(ss, op2); + return ss.str(); +} + std::string ImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const { @@ -229,6 +255,16 @@ RegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const return ss.str(); } +std::string +RegImmImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, dest); + ccprintf(ss, ", #%d, #%d", imm1, imm2); + return ss.str(); +} + std::string RegRegImmImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const { diff --git a/src/arch/arm/insts/misc.hh b/src/arch/arm/insts/misc.hh index c9e114f85..3d947a272 100644 --- a/src/arch/arm/insts/misc.hh +++ b/src/arch/arm/insts/misc.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -94,6 +94,42 @@ class MsrRegOp : public MsrBase std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class MrrcOp : public PredOp +{ + protected: + IntRegIndex op1; + IntRegIndex dest; + IntRegIndex dest2; + uint32_t imm; + + MrrcOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _op1, IntRegIndex _dest, IntRegIndex _dest2, + uint32_t _imm) : + PredOp(mnem, _machInst, __opClass), op1(_op1), dest(_dest), + dest2(_dest2), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class McrrOp : public PredOp +{ + protected: + IntRegIndex op1; + IntRegIndex op2; + IntRegIndex dest; + uint32_t imm; + + McrrOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _op1, IntRegIndex _op2, IntRegIndex _dest, + uint32_t _imm) : + PredOp(mnem, _machInst, __opClass), op1(_op1), op2(_op2), + dest(_dest), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + class ImmOp : public PredOp { protected: @@ -220,6 +256,23 @@ class RegRegImmOp : public PredOp std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class RegImmImmOp : public PredOp +{ + protected: + IntRegIndex dest; + IntRegIndex op1; + uint64_t imm1; + uint64_t imm2; + + RegImmImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, uint64_t _imm1, uint64_t _imm2) : + PredOp(mnem, _machInst, __opClass), + dest(_dest), imm1(_imm1), imm2(_imm2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + class RegRegImmImmOp : public PredOp { protected: diff --git a/src/arch/arm/insts/misc64.cc b/src/arch/arm/insts/misc64.cc new file mode 100644 index 000000000..3553020da --- /dev/null +++ b/src/arch/arm/insts/misc64.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/arm/insts/misc64.hh" + +std::string +RegRegImmImmOp64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ss << ", "; + printReg(ss, op1); + ccprintf(ss, ", #%d, #%d", imm1, imm2); + return ss.str(); +} + +std::string +RegRegRegImmOp64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ss << ", "; + printReg(ss, op1); + ss << ", "; + printReg(ss, op2); + ccprintf(ss, ", #%d", imm); + return ss.str(); +} + +std::string +UnknownOp64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + return csprintf("%-10s (inst %#08x)", "unknown", machInst); +} diff --git a/src/arch/arm/insts/misc64.hh b/src/arch/arm/insts/misc64.hh new file mode 100644 index 000000000..5a0e18224 --- /dev/null +++ b/src/arch/arm/insts/misc64.hh @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#ifndef __ARCH_ARM_INSTS_MISC64_HH__ +#define __ARCH_ARM_INSTS_MISC64_HH__ + +#include "arch/arm/insts/static_inst.hh" + +class RegRegImmImmOp64 : public ArmStaticInst +{ + protected: + IntRegIndex dest; + IntRegIndex op1; + uint64_t imm1; + uint64_t imm2; + + RegRegImmImmOp64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + uint64_t _imm1, uint64_t _imm2) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), imm1(_imm1), imm2(_imm2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class RegRegRegImmOp64 : public ArmStaticInst +{ + protected: + IntRegIndex dest; + IntRegIndex op1; + IntRegIndex op2; + uint64_t imm; + + RegRegRegImmOp64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + IntRegIndex _op2, uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class UnknownOp64 : public ArmStaticInst +{ + protected: + + UnknownOp64(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : + ArmStaticInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +#endif diff --git a/src/arch/arm/insts/neon64_mem.hh b/src/arch/arm/insts/neon64_mem.hh new file mode 100644 index 000000000..01ce1b624 --- /dev/null +++ b/src/arch/arm/insts/neon64_mem.hh @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2012-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Mbou Eyole + * Giacomo Gabrielli + */ + +/// @file +/// Utility functions and datatypes used by AArch64 NEON memory instructions. + +#ifndef __ARCH_ARM_INSTS_NEON64_MEM_HH__ +#define __ARCH_ARM_INSTS_NEON64_MEM_HH__ + +namespace ArmISA +{ + +typedef uint64_t XReg; + +/// 128-bit NEON vector register. +struct VReg { + XReg hi; + XReg lo; +}; + +/// Write a single NEON vector element leaving the others untouched. +inline void +writeVecElem(VReg *dest, XReg src, int index, int eSize) +{ + // eSize must be less than 4: + // 0 -> 8-bit elems, + // 1 -> 16-bit elems, + // 2 -> 32-bit elems, + // 3 -> 64-bit elems + assert(eSize <= 3); + + int eBits = 8 << eSize; + int lsbPos = index * eBits; + assert(lsbPos < 128); + int shiftAmt = lsbPos % 64; + + XReg maskBits = -1; + if (eBits == 64) { + maskBits = 0; + } else { + maskBits = maskBits << eBits; + } + maskBits = ~maskBits; + + XReg sMask = maskBits; + maskBits = sMask << shiftAmt; + + if (lsbPos < 64) { + dest->lo = (dest->lo & (~maskBits)) | ((src & sMask) << shiftAmt); + } else { + dest->hi = (dest->hi & (~maskBits)) | ((src & sMask) << shiftAmt); + } +} + +/// Read a single NEON vector element. +inline XReg +readVecElem(VReg src, int index, int eSize) +{ + // eSize must be less than 4: + // 0 -> 8-bit elems, + // 1 -> 16-bit elems, + // 2 -> 32-bit elems, + // 3 -> 64-bit elems + assert(eSize <= 3); + + XReg data; + + int eBits = 8 << eSize; + int lsbPos = index * eBits; + assert(lsbPos < 128); + int shiftAmt = lsbPos % 64; + + XReg maskBits = -1; + if (eBits == 64) { + maskBits = 0; + } else { + maskBits = maskBits << eBits; + } + maskBits = ~maskBits; + + if (lsbPos < 64) { + data = (src.lo >> shiftAmt) & maskBits; + } else { + data = (src.hi >> shiftAmt) & maskBits; + } + return data; +} + +} // namespace ArmISA + +#endif // __ARCH_ARM_INSTS_NEON64_MEM_HH__ diff --git a/src/arch/arm/insts/pred_inst.hh b/src/arch/arm/insts/pred_inst.hh index c441d1f32..c5e2ab386 100644 --- a/src/arch/arm/insts/pred_inst.hh +++ b/src/arch/arm/insts/pred_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -78,7 +78,8 @@ modified_imm(uint8_t ctrlImm, uint8_t dataImm) } static inline uint64_t -simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid) +simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid, + bool isAarch64 = false) { uint64_t bigData = data; immValid = true; @@ -133,12 +134,20 @@ simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid) } break; case 0xf: - if (!op) { - uint64_t bVal = bits(bigData, 6) ? (0x1F) : (0x20); - bigData = (bits(bigData, 5, 0) << 19) | - (bVal << 25) | (bits(bigData, 7) << 31); - bigData |= (bigData << 32); - break; + { + uint64_t bVal = 0; + if (!op) { + bVal = bits(bigData, 6) ? (0x1F) : (0x20); + bigData = (bits(bigData, 5, 0) << 19) | + (bVal << 25) | (bits(bigData, 7) << 31); + bigData |= (bigData << 32); + break; + } else if (isAarch64) { + bVal = bits(bigData, 6) ? (0x0FF) : (0x100); + bigData = (bits(bigData, 5, 0) << 48) | + (bVal << 54) | (bits(bigData, 7) << 63); + break; + } } // Fall through, immediate encoding is invalid. default: @@ -179,11 +188,14 @@ class PredOp : public ArmStaticInst /// Constructor PredOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : - ArmStaticInst(mnem, _machInst, __opClass), - condCode(machInst.itstateMask ? - (ConditionCode)(uint8_t)machInst.itstateCond : - (ConditionCode)(unsigned)machInst.condCode) + ArmStaticInst(mnem, _machInst, __opClass) { + if (machInst.aarch64) + condCode = COND_UC; + else if (machInst.itstateMask) + condCode = (ConditionCode)(uint8_t)machInst.itstateCond; + else + condCode = (ConditionCode)(unsigned)machInst.condCode; } }; diff --git a/src/arch/arm/insts/static_inst.cc b/src/arch/arm/insts/static_inst.cc index 2a8dee162..260c29a84 100644 --- a/src/arch/arm/insts/static_inst.cc +++ b/src/arch/arm/insts/static_inst.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -86,6 +86,90 @@ ArmStaticInst::shift_rm_imm(uint32_t base, uint32_t shamt, return 0; } +int64_t +ArmStaticInst::shiftReg64(uint64_t base, uint64_t shiftAmt, + ArmShiftType type, uint8_t width) const +{ + shiftAmt = shiftAmt % width; + ArmShiftType shiftType; + shiftType = (ArmShiftType)type; + + switch (shiftType) + { + case LSL: + return base << shiftAmt; + case LSR: + if (shiftAmt == 0) + return base; + else + return (base & mask(width)) >> shiftAmt; + case ASR: + if (shiftAmt == 0) { + return base; + } else { + int sign_bit = bits(base, intWidth - 1); + base >>= shiftAmt; + base = sign_bit ? (base | ~mask(intWidth - shiftAmt)) : base; + return base & mask(intWidth); + } + case ROR: + if (shiftAmt == 0) + return base; + else + return (base << (width - shiftAmt)) | (base >> shiftAmt); + default: + ccprintf(std::cerr, "Unhandled shift type\n"); + exit(1); + break; + } + return 0; +} + +int64_t +ArmStaticInst::extendReg64(uint64_t base, ArmExtendType type, + uint64_t shiftAmt, uint8_t width) const +{ + bool sign_extend = false; + int len = 0; + switch (type) { + case UXTB: + len = 8; + break; + case UXTH: + len = 16; + break; + case UXTW: + len = 32; + break; + case UXTX: + len = 64; + break; + case SXTB: + len = 8; + sign_extend = true; + break; + case SXTH: + len = 16; + sign_extend = true; + break; + case SXTW: + len = 32; + sign_extend = true; + break; + case SXTX: + len = 64; + sign_extend = true; + break; + } + len = len <= width - shiftAmt ? len : width - shiftAmt; + uint64_t tmp = (uint64_t) bits(base, len - 1, 0) << shiftAmt; + if (sign_extend) { + int sign_bit = bits(tmp, len + shiftAmt - 1); + tmp = sign_bit ? (tmp | ~mask(len + shiftAmt)) : tmp; + } + return tmp & mask(width); +} + // Shift Rm by Rs int32_t ArmStaticInst::shift_rm_rs(uint32_t base, uint32_t shamt, @@ -214,22 +298,33 @@ ArmStaticInst::printReg(std::ostream &os, int reg) const switch (regIdxToClass(reg, &rel_reg)) { case IntRegClass: - switch (rel_reg) { - case PCReg: - ccprintf(os, "pc"); - break; - case StackPointerReg: - ccprintf(os, "sp"); - break; - case FramePointerReg: - ccprintf(os, "fp"); - break; - case ReturnAddressReg: - ccprintf(os, "lr"); - break; - default: - ccprintf(os, "r%d", reg); - break; + if (aarch64) { + if (reg == INTREG_UREG0) + ccprintf(os, "ureg0"); + else if (reg == INTREG_SPX) + ccprintf(os, "%s%s", (intWidth == 32) ? "w" : "", "sp"); + else if (reg == INTREG_X31) + ccprintf(os, "%szr", (intWidth == 32) ? "w" : "x"); + else + ccprintf(os, "%s%d", (intWidth == 32) ? "w" : "x", reg); + } else { + switch (rel_reg) { + case PCReg: + ccprintf(os, "pc"); + break; + case StackPointerReg: + ccprintf(os, "sp"); + break; + case FramePointerReg: + ccprintf(os, "fp"); + break; + case ReturnAddressReg: + ccprintf(os, "lr"); + break; + default: + ccprintf(os, "r%d", reg); + break; + } } break; case FloatRegClass: @@ -247,67 +342,102 @@ ArmStaticInst::printReg(std::ostream &os, int reg) const void ArmStaticInst::printMnemonic(std::ostream &os, const std::string &suffix, - bool withPred) const + bool withPred, + bool withCond64, + ConditionCode cond64) const { os << " " << mnemonic; - if (withPred) { - unsigned condCode = machInst.condCode; - switch (condCode) { - case COND_EQ: - os << "eq"; - break; - case COND_NE: - os << "ne"; - break; - case COND_CS: - os << "cs"; - break; - case COND_CC: - os << "cc"; - break; - case COND_MI: - os << "mi"; - break; - case COND_PL: - os << "pl"; - break; - case COND_VS: - os << "vs"; - break; - case COND_VC: - os << "vc"; - break; - case COND_HI: - os << "hi"; - break; - case COND_LS: - os << "ls"; - break; - case COND_GE: - os << "ge"; - break; - case COND_LT: - os << "lt"; - break; - case COND_GT: - os << "gt"; - break; - case COND_LE: - os << "le"; - break; - case COND_AL: - // This one is implicit. - break; - case COND_UC: - // Unconditional. - break; - default: - panic("Unrecognized condition code %d.\n", condCode); - } + if (withPred && !aarch64) { + printCondition(os, machInst.condCode); + os << suffix; + } else if (withCond64) { + os << "."; + printCondition(os, cond64); os << suffix; - if (machInst.bigThumb) - os << ".w"; - os << " "; + } + if (machInst.bigThumb) + os << ".w"; + os << " "; +} + +void +ArmStaticInst::printTarget(std::ostream &os, Addr target, + const SymbolTable *symtab) const +{ + Addr symbolAddr; + std::string symbol; + + if (symtab && symtab->findNearestSymbol(target, symbol, symbolAddr)) { + ccprintf(os, "<%s", symbol); + if (symbolAddr != target) + ccprintf(os, "+%d>", target - symbolAddr); + else + ccprintf(os, ">"); + } else { + ccprintf(os, "%#x", target); + } +} + +void +ArmStaticInst::printCondition(std::ostream &os, + unsigned code, + bool noImplicit) const +{ + switch (code) { + case COND_EQ: + os << "eq"; + break; + case COND_NE: + os << "ne"; + break; + case COND_CS: + os << "cs"; + break; + case COND_CC: + os << "cc"; + break; + case COND_MI: + os << "mi"; + break; + case COND_PL: + os << "pl"; + break; + case COND_VS: + os << "vs"; + break; + case COND_VC: + os << "vc"; + break; + case COND_HI: + os << "hi"; + break; + case COND_LS: + os << "ls"; + break; + case COND_GE: + os << "ge"; + break; + case COND_LT: + os << "lt"; + break; + case COND_GT: + os << "gt"; + break; + case COND_LE: + os << "le"; + break; + case COND_AL: + // This one is implicit. + if (noImplicit) + os << "al"; + break; + case COND_UC: + // Unconditional. + if (noImplicit) + os << "uc"; + break; + default: + panic("Unrecognized condition code %d.\n", code); } } @@ -392,6 +522,38 @@ ArmStaticInst::printShiftOperand(std::ostream &os, } } +void +ArmStaticInst::printExtendOperand(bool firstOperand, std::ostream &os, + IntRegIndex rm, ArmExtendType type, + int64_t shiftAmt) const +{ + if (!firstOperand) + ccprintf(os, ", "); + printReg(os, rm); + if (type == UXTX && shiftAmt == 0) + return; + switch (type) { + case UXTB: ccprintf(os, ", UXTB"); + break; + case UXTH: ccprintf(os, ", UXTH"); + break; + case UXTW: ccprintf(os, ", UXTW"); + break; + case UXTX: ccprintf(os, ", LSL"); + break; + case SXTB: ccprintf(os, ", SXTB"); + break; + case SXTH: ccprintf(os, ", SXTH"); + break; + case SXTW: ccprintf(os, ", SXTW"); + break; + case SXTX: ccprintf(os, ", SXTW"); + break; + } + if (type == UXTX || shiftAmt) + ccprintf(os, " #%d", shiftAmt); +} + void ArmStaticInst::printDataInst(std::ostream &os, bool withImm, bool immShift, bool s, IntRegIndex rd, IntRegIndex rn, diff --git a/src/arch/arm/insts/static_inst.hh b/src/arch/arm/insts/static_inst.hh index c36024ecd..aeec67ec2 100644 --- a/src/arch/arm/insts/static_inst.hh +++ b/src/arch/arm/insts/static_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -44,6 +44,7 @@ #include "arch/arm/faults.hh" #include "arch/arm/utility.hh" +#include "arch/arm/system.hh" #include "base/trace.hh" #include "cpu/static_inst.hh" #include "sim/byteswap.hh" @@ -55,6 +56,9 @@ namespace ArmISA class ArmStaticInst : public StaticInst { protected: + bool aarch64; + uint8_t intWidth; + int32_t shift_rm_imm(uint32_t base, uint32_t shamt, uint32_t type, uint32_t cfval) const; int32_t shift_rm_rs(uint32_t base, uint32_t shamt, @@ -65,6 +69,11 @@ class ArmStaticInst : public StaticInst bool shift_carry_rs(uint32_t base, uint32_t shamt, uint32_t type, uint32_t cfval) const; + int64_t shiftReg64(uint64_t base, uint64_t shiftAmt, + ArmShiftType type, uint8_t width) const; + int64_t extendReg64(uint64_t base, ArmExtendType type, + uint64_t shiftAmt, uint8_t width) const; + template static inline bool saturateOp(int32_t &res, int64_t op1, int64_t op2, bool sub=false) @@ -135,6 +144,11 @@ class ArmStaticInst : public StaticInst OpClass __opClass) : StaticInst(mnem, _machInst, __opClass) { + aarch64 = machInst.aarch64; + if (bits(machInst, 28, 24) == 0x10) + intWidth = 64; // Force 64-bit width for ADR/ADRP + else + intWidth = (aarch64 && bits(machInst, 31)) ? 64 : 32; } /// Print a register name for disassembly given the unique @@ -142,13 +156,22 @@ class ArmStaticInst : public StaticInst void printReg(std::ostream &os, int reg) const; void printMnemonic(std::ostream &os, const std::string &suffix = "", - bool withPred = true) const; + bool withPred = true, + bool withCond64 = false, + ConditionCode cond64 = COND_UC) const; + void printTarget(std::ostream &os, Addr target, + const SymbolTable *symtab) const; + void printCondition(std::ostream &os, unsigned code, + bool noImplicit=false) const; void printMemSymbol(std::ostream &os, const SymbolTable *symtab, const std::string &prefix, const Addr addr, const std::string &suffix) const; void printShiftOperand(std::ostream &os, IntRegIndex rm, bool immShift, uint32_t shiftAmt, IntRegIndex rs, ArmShiftType type) const; + void printExtendOperand(bool firstOperand, std::ostream &os, + IntRegIndex rm, ArmExtendType type, + int64_t shiftAmt) const; void printDataInst(std::ostream &os, bool withImm) const; @@ -166,10 +189,13 @@ class ArmStaticInst : public StaticInst std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; static inline uint32_t - cpsrWriteByInstr(CPSR cpsr, uint32_t val, - uint8_t byteMask, bool affectState, bool nmfi) + cpsrWriteByInstr(CPSR cpsr, uint32_t val, SCR scr, NSACR nsacr, + uint8_t byteMask, bool affectState, bool nmfi, ThreadContext *tc) { - bool privileged = (cpsr.mode != MODE_USER); + bool privileged = (cpsr.mode != MODE_USER); + bool haveVirt = ArmSystem::haveVirtualization(tc); + bool haveSecurity = ArmSystem::haveSecurity(tc); + bool isSecure = inSecureState(scr, cpsr) || !haveSecurity; uint32_t bitMask = 0; @@ -182,14 +208,53 @@ class ArmStaticInst : public StaticInst } if (bits(byteMask, 1)) { unsigned highIdx = affectState ? 15 : 9; - unsigned lowIdx = privileged ? 8 : 9; + unsigned lowIdx = (privileged && (isSecure || scr.aw || haveVirt)) + ? 8 : 9; bitMask = bitMask | mask(highIdx, lowIdx); } if (bits(byteMask, 0)) { if (privileged) { - bitMask = bitMask | mask(7, 6); - if (!badMode((OperatingMode)(val & mask(5)))) { - bitMask = bitMask | mask(5); + bitMask |= 1 << 7; + if ( (!nmfi || !((val >> 6) & 0x1)) && + (isSecure || scr.fw || haveVirt) ) { + bitMask |= 1 << 6; + } + // Now check the new mode is allowed + OperatingMode newMode = (OperatingMode) (val & mask(5)); + OperatingMode oldMode = (OperatingMode)(uint32_t)cpsr.mode; + if (!badMode(newMode)) { + bool validModeChange = true; + // Check for attempts to enter modes only permitted in + // Secure state from Non-secure state. These are Monitor + // mode ('10110'), and FIQ mode ('10001') if the Security + // Extensions have reserved it. + if (!isSecure && newMode == MODE_MON) + validModeChange = false; + if (!isSecure && newMode == MODE_FIQ && nsacr.rfr == '1') + validModeChange = false; + // There is no Hyp mode ('11010') in Secure state, so that + // is UNPREDICTABLE + if (scr.ns == '0' && newMode == MODE_HYP) + validModeChange = false; + // Cannot move into Hyp mode directly from a Non-secure + // PL1 mode + if (!isSecure && oldMode != MODE_HYP && newMode == MODE_HYP) + validModeChange = false; + // Cannot move out of Hyp mode with this function except + // on an exception return + if (oldMode == MODE_HYP && newMode != MODE_HYP && !affectState) + validModeChange = false; + // Must not change to 64 bit when running in 32 bit mode + if (!opModeIs64(oldMode) && opModeIs64(newMode)) + validModeChange = false; + + // If we passed all of the above then set the bit mask to + // copy the mode accross + if (validModeChange) { + bitMask = bitMask | mask(5); + } else { + warn_once("Illegal change to CPSR mode attempted\n"); + } } else { warn_once("Ignoring write of bad mode to CPSR.\n"); } @@ -198,11 +263,7 @@ class ArmStaticInst : public StaticInst bitMask = bitMask | (1 << 5); } - bool cpsr_f = cpsr.f; - uint32_t new_cpsr = ((uint32_t)cpsr & ~bitMask) | (val & bitMask); - if (nmfi && !cpsr_f) - new_cpsr &= ~(1 << 6); - return new_cpsr; + return ((uint32_t)cpsr & ~bitMask) | (val & bitMask); } static inline uint32_t @@ -296,12 +357,12 @@ class ArmStaticInst : public StaticInst inline Fault disabledFault() const { - if (FullSystem) { - return new UndefinedInstruction(); - } else { - return new UndefinedInstruction(machInst, false, mnemonic, true); - } + return new UndefinedInstruction(machInst, false, mnemonic, true); } + + public: + virtual void + annotateFault(ArmFault *fault) {} }; } diff --git a/src/arch/arm/insts/vfp.cc b/src/arch/arm/insts/vfp.cc index ca0f58226..03fdc83fa 100644 --- a/src/arch/arm/insts/vfp.cc +++ b/src/arch/arm/insts/vfp.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -45,6 +45,37 @@ * exception bits read before it, etc. */ +std::string +FpCondCompRegOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", #%d", defCc); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +std::string +FpCondSelOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + std::string FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const { @@ -91,6 +122,21 @@ FpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const return ss.str(); } +std::string +FpRegRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, dest + FP_Reg_Base); + ss << ", "; + printReg(ss, op1 + FP_Reg_Base); + ss << ", "; + printReg(ss, op2 + FP_Reg_Base); + ss << ", "; + printReg(ss, op3 + FP_Reg_Base); + return ss.str(); +} + std::string FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const { @@ -131,24 +177,25 @@ prepFpState(uint32_t rMode) } void -finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush) +finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask) { int exceptions = fetestexcept(FeAllExceptions); bool underflow = false; - if (exceptions & FeInvalid) { + if ((exceptions & FeInvalid) && mask.ioc) { fpscr.ioc = 1; } - if (exceptions & FeDivByZero) { + if ((exceptions & FeDivByZero) && mask.dzc) { fpscr.dzc = 1; } - if (exceptions & FeOverflow) { + if ((exceptions & FeOverflow) && mask.ofc) { fpscr.ofc = 1; } if (exceptions & FeUnderflow) { underflow = true; - fpscr.ufc = 1; + if (mask.ufc) + fpscr.ufc = 1; } - if ((exceptions & FeInexact) && !(underflow && flush)) { + if ((exceptions & FeInexact) && !(underflow && flush) && mask.ixc) { fpscr.ixc = 1; } fesetround(state); @@ -329,19 +376,33 @@ fixFpSFpDDest(FPSCR fpscr, float val) return mid; } -uint16_t -vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, - uint32_t rMode, bool ahp, float op) +static inline uint16_t +vcvtFpFpH(FPSCR &fpscr, bool flush, bool defaultNan, + uint32_t rMode, bool ahp, uint64_t opBits, bool isDouble) { - uint32_t opBits = fpToBits(op); + uint32_t mWidth; + uint32_t eWidth; + uint32_t eHalfRange; + uint32_t sBitPos; + + if (isDouble) { + mWidth = 52; + eWidth = 11; + } else { + mWidth = 23; + eWidth = 8; + } + sBitPos = eWidth + mWidth; + eHalfRange = (1 << (eWidth-1)) - 1; + // Extract the operand. - bool neg = bits(opBits, 31); - uint32_t exponent = bits(opBits, 30, 23); - uint32_t oldMantissa = bits(opBits, 22, 0); - uint32_t mantissa = oldMantissa >> (23 - 10); + bool neg = bits(opBits, sBitPos); + uint32_t exponent = bits(opBits, sBitPos-1, mWidth); + uint64_t oldMantissa = bits(opBits, mWidth-1, 0); + uint32_t mantissa = oldMantissa >> (mWidth - 10); // Do the conversion. - uint32_t extra = oldMantissa & mask(23 - 10); - if (exponent == 0xff) { + uint64_t extra = oldMantissa & mask(mWidth - 10); + if (exponent == mask(eWidth)) { if (oldMantissa != 0) { // Nans. if (bits(mantissa, 9) == 0) { @@ -379,7 +440,6 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, if (exponent == 0) { // Denormalized. - // If flush to zero is on, this shouldn't happen. assert(!flush); @@ -407,13 +467,13 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, // We need to track the dropped bits differently since // more can be dropped by denormalizing. - bool topOne = bits(extra, 12); - bool restZeros = bits(extra, 11, 0) == 0; + bool topOne = bits(extra, mWidth - 10 - 1); + bool restZeros = bits(extra, mWidth - 10 - 2, 0) == 0; - if (exponent <= (127 - 15)) { + if (exponent <= (eHalfRange - 15)) { // The result is too small. Denormalize. mantissa |= (1 << 10); - while (mantissa && exponent <= (127 - 15)) { + while (mantissa && exponent <= (eHalfRange - 15)) { restZeros = restZeros && !topOne; topOne = bits(mantissa, 0); mantissa = mantissa >> 1; @@ -424,7 +484,7 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, exponent = 0; } else { // Change bias. - exponent -= (127 - 15); + exponent -= (eHalfRange - 15); } if (exponent == 0 && (inexact || fpscr.ufe)) { @@ -488,155 +548,115 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, return result; } -float -vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op) +uint16_t +vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, + uint32_t rMode, bool ahp, float op) { - float junk = 0.0; + uint64_t opBits = fpToBits(op); + return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, false); +} + +uint16_t +vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan, + uint32_t rMode, bool ahp, double op) +{ + uint64_t opBits = fpToBits(op); + return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, true); +} + +static inline uint64_t +vcvtFpHFp(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op, bool isDouble) +{ + uint32_t mWidth; + uint32_t eWidth; + uint32_t eHalfRange; + uint32_t sBitPos; + + if (isDouble) { + mWidth = 52; + eWidth = 11; + } else { + mWidth = 23; + eWidth = 8; + } + sBitPos = eWidth + mWidth; + eHalfRange = (1 << (eWidth-1)) - 1; + // Extract the bitfields. bool neg = bits(op, 15); uint32_t exponent = bits(op, 14, 10); - uint32_t mantissa = bits(op, 9, 0); + uint64_t mantissa = bits(op, 9, 0); // Do the conversion. if (exponent == 0) { if (mantissa != 0) { // Normalize the value. - exponent = exponent + (127 - 15) + 1; + exponent = exponent + (eHalfRange - 15) + 1; while (mantissa < (1 << 10)) { mantissa = mantissa << 1; exponent--; } } - mantissa = mantissa << (23 - 10); + mantissa = mantissa << (mWidth - 10); } else if (exponent == 0x1f && !ahp) { // Infinities and nans. - exponent = 0xff; + exponent = mask(eWidth); if (mantissa != 0) { // Nans. - mantissa = mantissa << (23 - 10); - if (bits(mantissa, 22) == 0) { + mantissa = mantissa << (mWidth - 10); + if (bits(mantissa, mWidth-1) == 0) { // Signalling nan. fpscr.ioc = 1; - mantissa |= (1 << 22); + mantissa |= (((uint64_t) 1) << (mWidth-1)); } if (defaultNan) { - mantissa &= ~mask(22); + mantissa &= ~mask(mWidth-1); neg = false; } } } else { - exponent = exponent + (127 - 15); - mantissa = mantissa << (23 - 10); + exponent = exponent + (eHalfRange - 15); + mantissa = mantissa << (mWidth - 10); } // Reassemble the result. - uint32_t result = bits(mantissa, 22, 0); - replaceBits(result, 30, 23, exponent); - if (neg) - result |= (1 << 31); + uint64_t result = bits(mantissa, mWidth-1, 0); + replaceBits(result, sBitPos-1, mWidth, exponent); + if (neg) { + result |= (((uint64_t) 1) << sBitPos); + } + return result; +} + +double +vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op) +{ + double junk = 0.0; + uint64_t result; + + result = vcvtFpHFp(fpscr, defaultNan, ahp, op, true); return bitsToFp(result, junk); } -uint64_t -vfpFpSToFixed(float val, bool isSigned, bool half, - uint8_t imm, bool rzero) +float +vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op) { - int rmode = rzero ? FeRoundZero : fegetround(); - __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); - fesetround(FeRoundNearest); - val = val * powf(2.0, imm); - __asm__ __volatile__("" : "=m" (val) : "m" (val)); - fesetround(rmode); - feclearexcept(FeAllExceptions); - __asm__ __volatile__("" : "=m" (val) : "m" (val)); - float origVal = val; - val = rintf(val); - int fpType = std::fpclassify(val); - if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { - if (fpType == FP_NAN) { - feraiseexcept(FeInvalid); - } - val = 0.0; - } else if (origVal != val) { - switch (rmode) { - case FeRoundNearest: - if (origVal - val > 0.5) - val += 1.0; - else if (val - origVal > 0.5) - val -= 1.0; - break; - case FeRoundDown: - if (origVal < val) - val -= 1.0; - break; - case FeRoundUpward: - if (origVal > val) - val += 1.0; - break; - } - feraiseexcept(FeInexact); - } + float junk = 0.0; + uint64_t result; - if (isSigned) { - if (half) { - if ((double)val < (int16_t)(1 << 15)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int16_t)(1 << 15); - } - if ((double)val > (int16_t)mask(15)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int16_t)mask(15); - } - return (int16_t)val; - } else { - if ((double)val < (int32_t)(1 << 31)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int32_t)(1 << 31); - } - if ((double)val > (int32_t)mask(31)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int32_t)mask(31); - } - return (int32_t)val; - } - } else { - if (half) { - if ((double)val < 0) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return 0; - } - if ((double)val > (mask(16))) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return mask(16); - } - return (uint16_t)val; - } else { - if ((double)val < 0) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return 0; - } - if ((double)val > (mask(32))) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return mask(32); - } - return (uint32_t)val; - } - } + result = vcvtFpHFp(fpscr, defaultNan, ahp, op, false); + return bitsToFp(result, junk); } float vfpUFixedToFpS(bool flush, bool defaultNan, - uint32_t val, bool half, uint8_t imm) + uint64_t val, uint8_t width, uint8_t imm) { fesetround(FeRoundNearest); - if (half) + if (width == 16) val = (uint16_t)val; + else if (width == 32) + val = (uint32_t)val; + else if (width != 64) + panic("Unsupported width %d", width); float scale = powf(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); @@ -646,11 +666,16 @@ vfpUFixedToFpS(bool flush, bool defaultNan, float vfpSFixedToFpS(bool flush, bool defaultNan, - int32_t val, bool half, uint8_t imm) + int64_t val, uint8_t width, uint8_t imm) { fesetround(FeRoundNearest); - if (half) + if (width == 16) val = sext<16>(val & mask(16)); + else if (width == 32) + val = sext<32>(val & mask(32)); + else if (width != 64) + panic("Unsupported width %d", width); + float scale = powf(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); @@ -658,106 +683,19 @@ vfpSFixedToFpS(bool flush, bool defaultNan, return fixDivDest(flush, defaultNan, val / scale, (float)val, scale); } -uint64_t -vfpFpDToFixed(double val, bool isSigned, bool half, - uint8_t imm, bool rzero) -{ - int rmode = rzero ? FeRoundZero : fegetround(); - fesetround(FeRoundNearest); - val = val * pow(2.0, imm); - __asm__ __volatile__("" : "=m" (val) : "m" (val)); - fesetround(rmode); - feclearexcept(FeAllExceptions); - __asm__ __volatile__("" : "=m" (val) : "m" (val)); - double origVal = val; - val = rint(val); - int fpType = std::fpclassify(val); - if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { - if (fpType == FP_NAN) { - feraiseexcept(FeInvalid); - } - val = 0.0; - } else if (origVal != val) { - switch (rmode) { - case FeRoundNearest: - if (origVal - val > 0.5) - val += 1.0; - else if (val - origVal > 0.5) - val -= 1.0; - break; - case FeRoundDown: - if (origVal < val) - val -= 1.0; - break; - case FeRoundUpward: - if (origVal > val) - val += 1.0; - break; - } - feraiseexcept(FeInexact); - } - if (isSigned) { - if (half) { - if (val < (int16_t)(1 << 15)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int16_t)(1 << 15); - } - if (val > (int16_t)mask(15)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int16_t)mask(15); - } - return (int16_t)val; - } else { - if (val < (int32_t)(1 << 31)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int32_t)(1 << 31); - } - if (val > (int32_t)mask(31)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int32_t)mask(31); - } - return (int32_t)val; - } - } else { - if (half) { - if (val < 0) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return 0; - } - if (val > mask(16)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return mask(16); - } - return (uint16_t)val; - } else { - if (val < 0) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return 0; - } - if (val > mask(32)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return mask(32); - } - return (uint32_t)val; - } - } -} double vfpUFixedToFpD(bool flush, bool defaultNan, - uint32_t val, bool half, uint8_t imm) + uint64_t val, uint8_t width, uint8_t imm) { fesetround(FeRoundNearest); - if (half) + if (width == 16) val = (uint16_t)val; + else if (width == 32) + val = (uint32_t)val; + else if (width != 64) + panic("Unsupported width %d", width); + double scale = pow(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); @@ -767,11 +705,16 @@ vfpUFixedToFpD(bool flush, bool defaultNan, double vfpSFixedToFpD(bool flush, bool defaultNan, - int32_t val, bool half, uint8_t imm) + int64_t val, uint8_t width, uint8_t imm) { fesetround(FeRoundNearest); - if (half) + if (width == 16) val = sext<16>(val & mask(16)); + else if (width == 32) + val = sext<32>(val & mask(32)); + else if (width != 64) + panic("Unsupported width %d", width); + double scale = pow(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); @@ -976,6 +919,85 @@ template double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, double op1, double op2) const; +// @TODO remove this function when we've finished switching all FMA code to use the new FPLIB +template +fpType +FpOp::ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3, + fpType (*func)(fpType, fpType, fpType), + bool flush, bool defaultNan, uint32_t rMode) const +{ + const bool single = (sizeof(fpType) == sizeof(float)); + fpType junk = 0.0; + + if (flush && (flushToZero(op1, op2) || flushToZero(op3))) + fpscr.idc = 1; + VfpSavedState state = prepFpState(rMode); + __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3), "=m" (state) + : "m" (op1), "m" (op2), "m" (op3), "m" (state)); + fpType dest = func(op1, op2, op3); + __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); + + int fpClass = std::fpclassify(dest); + // Get NAN behavior right. This varies between x86 and ARM. + if (fpClass == FP_NAN) { + const uint64_t qnan = + single ? 0x7fc00000 : ULL(0x7ff8000000000000); + const bool nan1 = std::isnan(op1); + const bool nan2 = std::isnan(op2); + const bool nan3 = std::isnan(op3); + const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); + const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); + const bool signal3 = nan3 && ((fpToBits(op3) & qnan) != qnan); + if ((!nan1 && !nan2 && !nan3) || (defaultNan == 1)) { + dest = bitsToFp(qnan, junk); + } else if (signal1) { + dest = bitsToFp(fpToBits(op1) | qnan, junk); + } else if (signal2) { + dest = bitsToFp(fpToBits(op2) | qnan, junk); + } else if (signal3) { + dest = bitsToFp(fpToBits(op3) | qnan, junk); + } else if (nan1) { + dest = op1; + } else if (nan2) { + dest = op2; + } else if (nan3) { + dest = op3; + } + } else if (flush && flushToZero(dest)) { + feraiseexcept(FeUnderflow); + } else if (( + (single && (dest == bitsToFp(0x00800000, junk) || + dest == bitsToFp(0x80800000, junk))) || + (!single && + (dest == bitsToFp(ULL(0x0010000000000000), junk) || + dest == bitsToFp(ULL(0x8010000000000000), junk))) + ) && rMode != VfpRoundZero) { + /* + * Correct for the fact that underflow is detected -before- rounding + * in ARM and -after- rounding in x86. + */ + fesetround(FeRoundZero); + __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3) + : "m" (op1), "m" (op2), "m" (op3)); + fpType temp = func(op1, op2, op2); + __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); + if (flush && flushToZero(temp)) { + dest = temp; + } + } + finishVfp(fpscr, state, flush); + return dest; +} + +template +float FpOp::ternaryOp(FPSCR &fpscr, float op1, float op2, float op3, + float (*func)(float, float, float), + bool flush, bool defaultNan, uint32_t rMode) const; +template +double FpOp::ternaryOp(FPSCR &fpscr, double op1, double op2, double op3, + double (*func)(double, double, double), + bool flush, bool defaultNan, uint32_t rMode) const; + template fpType FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2, diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh index 9babaae04..f17f90973 100644 --- a/src/arch/arm/insts/vfp.hh +++ b/src/arch/arm/insts/vfp.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -104,7 +104,8 @@ enum VfpRoundingMode VfpRoundNearest = 0, VfpRoundUpward = 1, VfpRoundDown = 2, - VfpRoundZero = 3 + VfpRoundZero = 3, + VfpRoundAway = 4 }; static inline float bitsToFp(uint64_t, float); @@ -212,7 +213,7 @@ isSnan(fpType val) typedef int VfpSavedState; VfpSavedState prepFpState(uint32_t rMode); -void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush); +void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask = FpscrExcMask); template fpType fixDest(FPSCR fpscr, fpType val, fpType op1); @@ -228,7 +229,11 @@ double fixFpSFpDDest(FPSCR fpscr, float val); uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, uint32_t rMode, bool ahp, float op); -float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op); +uint16_t vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan, + uint32_t rMode, bool ahp, double op); + +float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op); +double vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op); static inline double makeDouble(uint32_t low, uint32_t high) @@ -249,19 +254,192 @@ highFromDouble(double val) return fpToBits(val) >> 32; } -uint64_t vfpFpSToFixed(float val, bool isSigned, bool half, - uint8_t imm, bool rzero = true); +static inline void +setFPExceptions(int exceptions) { + feclearexcept(FeAllExceptions); + feraiseexcept(exceptions); +} + +template +uint64_t +vfpFpToFixed(T val, bool isSigned, uint8_t width, uint8_t imm, bool + useRmode = true, VfpRoundingMode roundMode = VfpRoundZero, + bool aarch64 = false) +{ + int rmode; + bool roundAwayFix = false; + + if (!useRmode) { + rmode = fegetround(); + } else { + switch (roundMode) + { + case VfpRoundNearest: + rmode = FeRoundNearest; + break; + case VfpRoundUpward: + rmode = FeRoundUpward; + break; + case VfpRoundDown: + rmode = FeRoundDown; + break; + case VfpRoundZero: + rmode = FeRoundZero; + break; + case VfpRoundAway: + // There is no equivalent rounding mode, use round down and we'll + // fix it later + rmode = FeRoundDown; + roundAwayFix = true; + break; + default: + panic("Unsupported roundMode %d\n", roundMode); + } + } + __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); + fesetround(FeRoundNearest); + val = val * pow(2.0, imm); + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + fesetround(rmode); + feclearexcept(FeAllExceptions); + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + T origVal = val; + val = rint(val); + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + + int exceptions = fetestexcept(FeAllExceptions); + + int fpType = std::fpclassify(val); + if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { + if (fpType == FP_NAN) { + exceptions |= FeInvalid; + } + val = 0.0; + } else if (origVal != val) { + switch (rmode) { + case FeRoundNearest: + if (origVal - val > 0.5) + val += 1.0; + else if (val - origVal > 0.5) + val -= 1.0; + break; + case FeRoundDown: + if (roundAwayFix) { + // The ordering on the subtraction looks a bit odd in that we + // don't do the obvious origVal - val, instead we do + // -(val - origVal). This is required to get the corruct bit + // exact behaviour when very close to the 0.5 threshold. + volatile T error = val; + error -= origVal; + error = -error; + if ( (error > 0.5) || + ((error == 0.5) && (val >= 0)) ) + val += 1.0; + } else { + if (origVal < val) + val -= 1.0; + } + break; + case FeRoundUpward: + if (origVal > val) + val += 1.0; + break; + } + exceptions |= FeInexact; + } + + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + + if (isSigned) { + bool outOfRange = false; + int64_t result = (int64_t) val; + uint64_t finalVal; + + if (!aarch64) { + if (width == 16) { + finalVal = (int16_t)val; + } else if (width == 32) { + finalVal =(int32_t)val; + } else if (width == 64) { + finalVal = result; + } else { + panic("Unsupported width %d\n", width); + } + + // check if value is in range + int64_t minVal = ~mask(width-1); + if ((double)val < minVal) { + outOfRange = true; + finalVal = minVal; + } + int64_t maxVal = mask(width-1); + if ((double)val > maxVal) { + outOfRange = true; + finalVal = maxVal; + } + } else { + bool isNeg = val < 0; + finalVal = result & mask(width); + // If the result is supposed to be less than 64 bits check that the + // upper bits that got thrown away are just sign extension bits + if (width != 64) { + outOfRange = ((uint64_t) result >> (width - 1)) != + (isNeg ? mask(64-width+1) : 0); + } + // Check if the original floating point value doesn't matches the + // integer version we are also out of range. So create a saturated + // result. + if (isNeg) { + outOfRange |= val < result; + if (outOfRange) { + finalVal = 1LL << (width-1); + } + } else { + outOfRange |= val > result; + if (outOfRange) { + finalVal = mask(width-1); + } + } + } + + // Raise an exception if the value was out of range + if (outOfRange) { + exceptions |= FeInvalid; + exceptions &= ~FeInexact; + } + setFPExceptions(exceptions); + return finalVal; + } else { + if ((double)val < 0) { + exceptions |= FeInvalid; + exceptions &= ~FeInexact; + setFPExceptions(exceptions); + return 0; + } + + uint64_t result = ((uint64_t) val) & mask(width); + if (val > result) { + exceptions |= FeInvalid; + exceptions &= ~FeInexact; + setFPExceptions(exceptions); + return mask(width); + } + + setFPExceptions(exceptions); + return result; + } +}; + + float vfpUFixedToFpS(bool flush, bool defaultNan, - uint32_t val, bool half, uint8_t imm); + uint64_t val, uint8_t width, uint8_t imm); float vfpSFixedToFpS(bool flush, bool defaultNan, - int32_t val, bool half, uint8_t imm); + int64_t val, uint8_t width, uint8_t imm); -uint64_t vfpFpDToFixed(double val, bool isSigned, bool half, - uint8_t imm, bool rzero = true); double vfpUFixedToFpD(bool flush, bool defaultNan, - uint32_t val, bool half, uint8_t imm); + uint64_t val, uint8_t width, uint8_t imm); double vfpSFixedToFpD(bool flush, bool defaultNan, - int32_t val, bool half, uint8_t imm); + int64_t val, uint8_t width, uint8_t imm); float fprSqrtEstimate(FPSCR &fpscr, float op); uint32_t unsignedRSqrtEstimate(uint32_t op); @@ -292,6 +470,20 @@ class VfpMacroOp : public PredMacroOp void nextIdxs(IntRegIndex &dest); }; +template +static inline T +fpAdd(T a, T b) +{ + return a + b; +}; + +template +static inline T +fpSub(T a, T b) +{ + return a - b; +}; + static inline float fpAddS(float a, float b) { @@ -328,6 +520,54 @@ fpDivD(double a, double b) return a / b; } +template +static inline T +fpDiv(T a, T b) +{ + return a / b; +}; + +template +static inline T +fpMulX(T a, T b) +{ + uint64_t opData; + uint32_t sign1; + uint32_t sign2; + const bool single = (sizeof(T) == sizeof(float)); + if (single) { + opData = (fpToBits(a)); + sign1 = opData>>31; + opData = (fpToBits(b)); + sign2 = opData>>31; + } else { + opData = (fpToBits(a)); + sign1 = opData>>63; + opData = (fpToBits(b)); + sign2 = opData>>63; + } + bool inf1 = (std::fpclassify(a) == FP_INFINITE); + bool inf2 = (std::fpclassify(b) == FP_INFINITE); + bool zero1 = (std::fpclassify(a) == FP_ZERO); + bool zero2 = (std::fpclassify(b) == FP_ZERO); + if ((inf1 && zero2) || (zero1 && inf2)) { + if(sign1 ^ sign2) + return (T)(-2.0); + else + return (T)(2.0); + } else { + return (a * b); + } +}; + + +template +static inline T +fpMul(T a, T b) +{ + return a * b; +}; + static inline float fpMulS(float a, float b) { @@ -340,23 +580,140 @@ fpMulD(double a, double b) return a * b; } -static inline float -fpMaxS(float a, float b) +template +static inline T +// @todo remove this when all calls to it have been replaced with the new fplib implementation +fpMulAdd(T op1, T op2, T addend) +{ + T result; + + if (sizeof(T) == sizeof(float)) + result = fmaf(op1, op2, addend); + else + result = fma(op1, op2, addend); + + // ARM doesn't generate signed nan's from this opperation, so fix up the result + if (std::isnan(result) && !std::isnan(op1) && + !std::isnan(op2) && !std::isnan(addend)) + { + uint64_t bitMask = ULL(0x1) << ((sizeof(T) * 8) - 1); + result = bitsToFp(fpToBits(result) & ~bitMask, op1); + } + return result; +} + +template +static inline T +fpRIntX(T a, FPSCR &fpscr) +{ + T rVal; + + rVal = rint(a); + if (rVal != a && !std::isnan(a)) + fpscr.ixc = 1; + return (rVal); +}; + +template +static inline T +fpMaxNum(T a, T b) { + const bool single = (sizeof(T) == sizeof(float)); + const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); + + if (std::isnan(a)) + return ((fpToBits(a) & qnan) == qnan) ? b : a; + if (std::isnan(b)) + return ((fpToBits(b) & qnan) == qnan) ? a : b; // Handle comparisons of +0 and -0. if (!std::signbit(a) && std::signbit(b)) return a; - return fmaxf(a, b); -} + return fmax(a, b); +}; -static inline float -fpMinS(float a, float b) +template +static inline T +fpMax(T a, T b) { + if (std::isnan(a)) + return a; + if (std::isnan(b)) + return b; + return fpMaxNum(a, b); +}; + +template +static inline T +fpMinNum(T a, T b) +{ + const bool single = (sizeof(T) == sizeof(float)); + const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); + + if (std::isnan(a)) + return ((fpToBits(a) & qnan) == qnan) ? b : a; + if (std::isnan(b)) + return ((fpToBits(b) & qnan) == qnan) ? a : b; // Handle comparisons of +0 and -0. if (std::signbit(a) && !std::signbit(b)) return a; - return fminf(a, b); -} + return fmin(a, b); +}; + +template +static inline T +fpMin(T a, T b) +{ + if (std::isnan(a)) + return a; + if (std::isnan(b)) + return b; + return fpMinNum(a, b); +}; + +template +static inline T +fpRSqrts(T a, T b) +{ + int fpClassA = std::fpclassify(a); + int fpClassB = std::fpclassify(b); + T aXb; + int fpClassAxB; + + if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) || + (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) { + return 1.5; + } + aXb = a*b; + fpClassAxB = std::fpclassify(aXb); + if(fpClassAxB == FP_SUBNORMAL) { + feraiseexcept(FeUnderflow); + return 1.5; + } + return (3.0 - (a * b)) / 2.0; +}; + +template +static inline T +fpRecps(T a, T b) +{ + int fpClassA = std::fpclassify(a); + int fpClassB = std::fpclassify(b); + T aXb; + int fpClassAxB; + + if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) || + (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) { + return 2.0; + } + aXb = a*b; + fpClassAxB = std::fpclassify(aXb); + if(fpClassAxB == FP_SUBNORMAL) { + feraiseexcept(FeUnderflow); + return 2.0; + } + return 2.0 - (a * b); +}; + static inline float fpRSqrtsS(float a, float b) @@ -400,6 +757,23 @@ fpRecpsS(float a, float b) return 2.0 - (a * b); } +template +static inline T +roundNEven(T a) { + T val; + + val = round(a); + if (a - val == 0.5) { + if ( (((int) a) & 1) == 0 ) val += 1.0; + } + else if (a - val == -0.5) { + if ( (((int) a) & 1) == 0 ) val -= 1.0; + } + return val; +} + + + class FpOp : public PredOp { protected: @@ -455,6 +829,12 @@ class FpOp : public PredOp processNans(FPSCR &fpscr, bool &done, bool defaultNan, fpType op1, fpType op2) const; + template + fpType + ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3, + fpType (*func)(fpType, fpType, fpType), + bool flush, bool defaultNan, uint32_t rMode) const; + template fpType binaryOp(FPSCR &fpscr, fpType op1, fpType op2, @@ -478,6 +858,55 @@ class FpOp : public PredOp pcState.advance(); } } + + float + fpSqrt (FPSCR fpscr,float x) const + { + + return unaryOp(fpscr,x,sqrtf,fpscr.fz,fpscr.rMode); + + } + + double + fpSqrt (FPSCR fpscr,double x) const + { + + return unaryOp(fpscr,x,sqrt,fpscr.fz,fpscr.rMode); + + } +}; + +class FpCondCompRegOp : public FpOp +{ + protected: + IntRegIndex op1, op2; + ConditionCode condCode; + uint8_t defCc; + + FpCondCompRegOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode, uint8_t _defCc) : + FpOp(mnem, _machInst, __opClass), + op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class FpCondSelOp : public FpOp +{ + protected: + IntRegIndex dest, op1, op2; + ConditionCode condCode; + + FpCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode) : + FpOp(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), condCode(_condCode) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; class FpRegRegOp : public FpOp @@ -550,6 +979,26 @@ class FpRegRegRegOp : public FpOp std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class FpRegRegRegRegOp : public FpOp +{ + protected: + IntRegIndex dest; + IntRegIndex op1; + IntRegIndex op2; + IntRegIndex op3; + + FpRegRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _op3, VfpMicroMode mode = VfpNotAMicroop) : + FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2), + op3(_op3) + { + setVfpMicroFlags(mode, flags); + } + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + class FpRegRegRegImmOp : public FpOp { protected: diff --git a/src/arch/arm/interrupts.cc b/src/arch/arm/interrupts.cc index c05ae984e..6682b75a0 100644 --- a/src/arch/arm/interrupts.cc +++ b/src/arch/arm/interrupts.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009 ARM Limited + * Copyright (c) 2009, 2012-2013 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -38,9 +38,128 @@ */ #include "arch/arm/interrupts.hh" +#include "arch/arm/system.hh" ArmISA::Interrupts * ArmInterruptsParams::create() { return new ArmISA::Interrupts(this); } + +bool +ArmISA::Interrupts::takeInt(ThreadContext *tc, InterruptTypes int_type) const +{ + // Table G1-17~19 of ARM V8 ARM + InterruptMask mask; + bool highest_el_is_64 = ArmSystem::highestELIs64(tc); + + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + SCR scr; + HCR hcr; + hcr = tc->readMiscReg(MISCREG_HCR); + ExceptionLevel el = (ExceptionLevel) ((uint32_t) cpsr.el); + bool cpsr_mask_bit, scr_routing_bit, scr_fwaw_bit, hcr_mask_override_bit; + + if (!highest_el_is_64) + scr = tc->readMiscReg(MISCREG_SCR); + else + scr = tc->readMiscReg(MISCREG_SCR_EL3); + + bool is_secure = inSecureState(scr, cpsr); + + switch(int_type) { + case INT_FIQ: + cpsr_mask_bit = cpsr.f; + scr_routing_bit = scr.fiq; + scr_fwaw_bit = scr.fw; + hcr_mask_override_bit = hcr.fmo; + break; + case INT_IRQ: + cpsr_mask_bit = cpsr.i; + scr_routing_bit = scr.irq; + scr_fwaw_bit = 1; + hcr_mask_override_bit = hcr.imo; + break; + case INT_ABT: + cpsr_mask_bit = cpsr.a; + scr_routing_bit = scr.ea; + scr_fwaw_bit = scr.aw; + hcr_mask_override_bit = hcr.amo; + break; + default: + panic("Unhandled interrupt type!"); + } + + if (hcr.tge) + hcr_mask_override_bit = 1; + + if (!highest_el_is_64) { + // AArch32 + if (!scr_routing_bit) { + // SCR IRQ == 0 + if (!hcr_mask_override_bit) + mask = INT_MASK_M; + else { + if (!is_secure && (el == EL0 || el == EL1)) + mask = INT_MASK_T; + else + mask = INT_MASK_M; + } + } else { + // SCR IRQ == 1 + if ((!is_secure) && + (hcr_mask_override_bit || + (!scr_fwaw_bit && !hcr_mask_override_bit))) + mask = INT_MASK_T; + else + mask = INT_MASK_M; + } + } else { + // AArch64 + if (!scr_routing_bit) { + // SCR IRQ == 0 + if (!scr.rw) { + // SCR RW == 0 + if (!hcr_mask_override_bit) { + if (el == EL3) + mask = INT_MASK_P; + else + mask = INT_MASK_M; + } else { + if (el == EL3) + mask = INT_MASK_T; + else if (is_secure || el == EL2) + mask = INT_MASK_M; + else + mask = INT_MASK_T; + } + } else { + // SCR RW == 1 + if (!hcr_mask_override_bit) { + if (el == EL3 || el == EL2) + mask = INT_MASK_P; + else + mask = INT_MASK_M; + } else { + if (el == EL3) + mask = INT_MASK_P; + else if (is_secure || el == EL2) + mask = INT_MASK_M; + else + mask = INT_MASK_T; + } + } + } else { + // SCR IRQ == 1 + if (el == EL3) + mask = INT_MASK_M; + else + mask = INT_MASK_T; + } + } + + return ((mask == INT_MASK_T) || + ((mask == INT_MASK_M) && !cpsr_mask_bit)) && + (mask != INT_MASK_P); +} + diff --git a/src/arch/arm/interrupts.hh b/src/arch/arm/interrupts.hh index 7def6ddd6..8e6c2b261 100644 --- a/src/arch/arm/interrupts.hh +++ b/src/arch/arm/interrupts.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010,2012 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -47,6 +47,7 @@ #include "arch/arm/isa_traits.hh" #include "arch/arm/miscregs.hh" #include "arch/arm/registers.hh" +#include "arch/arm/utility.hh" #include "cpu/thread_context.hh" #include "debug/Interrupt.hh" #include "params/ArmInterrupts.hh" @@ -123,31 +124,79 @@ class Interrupts : public SimObject memset(interrupts, 0, sizeof(interrupts)); } + enum InterruptMask { + INT_MASK_M, // masked (subject to PSTATE.{A,I,F} mask bit + INT_MASK_T, // taken regardless of mask + INT_MASK_P // pending + }; + + bool takeInt(ThreadContext *tc, InterruptTypes int_type) const; + bool checkInterrupts(ThreadContext *tc) const { - if (!intStatus) + HCR hcr = tc->readMiscReg(MISCREG_HCR); + + if (!(intStatus || hcr.va || hcr.vi || hcr.vf)) return false; CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); - - return ((interrupts[INT_IRQ] && !cpsr.i) || - (interrupts[INT_FIQ] && !cpsr.f) || - (interrupts[INT_ABT] && !cpsr.a) || - (interrupts[INT_RST]) || - (interrupts[INT_SEV])); + SCR scr = tc->readMiscReg(MISCREG_SCR); + + bool isHypMode = cpsr.mode == MODE_HYP; + bool isSecure = inSecureState(scr, cpsr); + bool allowVIrq = !cpsr.i && hcr.imo && !isSecure && !isHypMode; + bool allowVFiq = !cpsr.f && hcr.fmo && !isSecure && !isHypMode; + bool allowVAbort = !cpsr.a && hcr.amo && !isSecure && !isHypMode; + + bool take_irq = takeInt(tc, INT_IRQ); + bool take_fiq = takeInt(tc, INT_FIQ); + bool take_ea = takeInt(tc, INT_ABT); + + return ((interrupts[INT_IRQ] && take_irq) || + (interrupts[INT_FIQ] && take_fiq) || + (interrupts[INT_ABT] && take_ea) || + ((interrupts[INT_VIRT_IRQ] || hcr.vi) && allowVIrq) || + ((interrupts[INT_VIRT_FIQ] || hcr.vf) && allowVFiq) || + (hcr.va && allowVAbort) || + (interrupts[INT_RST]) || + (interrupts[INT_SEV]) + ); } /** - * Check the raw interrupt state. * This function is used to check if a wfi operation should sleep. If there * is an interrupt pending, even if it's masked, wfi doesn't sleep. * @return any interrupts pending */ bool - checkRaw() const + checkWfiWake(HCR hcr, CPSR cpsr, SCR scr) const + { + uint64_t maskedIntStatus; + bool virtWake; + + maskedIntStatus = intStatus & ~((1 << INT_VIRT_IRQ) | + (1 << INT_VIRT_FIQ)); + virtWake = (hcr.vi || interrupts[INT_VIRT_IRQ]) && hcr.imo; + virtWake |= (hcr.vf || interrupts[INT_VIRT_FIQ]) && hcr.fmo; + virtWake |= hcr.va && hcr.amo; + virtWake &= (cpsr.mode != MODE_HYP) && !inSecureState(scr, cpsr); + return maskedIntStatus || virtWake; + } + + uint32_t + getISR(HCR hcr, CPSR cpsr, SCR scr) { - return intStatus; + bool useHcrMux; + CPSR isr = 0; // ARM ARM states ISR reg uses same bit possitions as CPSR + + useHcrMux = (cpsr.mode != MODE_HYP) && !inSecureState(scr, cpsr); + isr.i = (useHcrMux & hcr.imo) ? (interrupts[INT_VIRT_IRQ] || hcr.vi) + : interrupts[INT_IRQ]; + isr.f = (useHcrMux & hcr.fmo) ? (interrupts[INT_VIRT_FIQ] || hcr.vf) + : interrupts[INT_FIQ]; + isr.a = (useHcrMux & hcr.amo) ? hcr.va : interrupts[INT_ABT]; + return isr; } /** @@ -172,22 +221,45 @@ class Interrupts : public SimObject Fault getInterrupt(ThreadContext *tc) { - if (!intStatus) + HCR hcr = tc->readMiscReg(MISCREG_HCR); + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + SCR scr = tc->readMiscReg(MISCREG_SCR); + + // Calculate a few temp vars so we can work out if there's a pending + // virtual interrupt, and if its allowed to happen + // ARM ARM Issue C section B1.9.9, B1.9.11, and B1.9.13 + bool isHypMode = cpsr.mode == MODE_HYP; + bool isSecure = inSecureState(scr, cpsr); + bool allowVIrq = !cpsr.i && hcr.imo && !isSecure && !isHypMode; + bool allowVFiq = !cpsr.f && hcr.fmo && !isSecure && !isHypMode; + bool allowVAbort = !cpsr.a && hcr.amo && !isSecure && !isHypMode; + + if ( !(intStatus || (hcr.vi && allowVIrq) || (hcr.vf && allowVFiq) || + (hcr.va && allowVAbort)) ) return NoFault; - CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + bool take_irq = takeInt(tc, INT_IRQ); + bool take_fiq = takeInt(tc, INT_FIQ); + bool take_ea = takeInt(tc, INT_ABT); + - if (interrupts[INT_IRQ] && !cpsr.i) + if (interrupts[INT_IRQ] && take_irq) return new Interrupt; - if (interrupts[INT_FIQ] && !cpsr.f) + if ((interrupts[INT_VIRT_IRQ] || hcr.vi) && allowVIrq) + return new VirtualInterrupt; + if (interrupts[INT_FIQ] && take_fiq) return new FastInterrupt; - if (interrupts[INT_ABT] && !cpsr.a) - return new DataAbort(0, false, 0, - ArmFault::AsynchronousExternalAbort); + if ((interrupts[INT_VIRT_FIQ] || hcr.vf) && allowVFiq) + return new VirtualFastInterrupt; + if (interrupts[INT_ABT] && take_ea) + return new SystemError; + if (hcr.va && allowVAbort) + return new VirtualDataAbort(0, TlbEntry::DomainType::NoAccess, false, + ArmFault::AsynchronousExternalAbort); if (interrupts[INT_RST]) - return new Reset; + return new Reset; if (interrupts[INT_SEV]) - return new ArmSev; + return new ArmSev; panic("intStatus and interrupts not in sync\n"); } diff --git a/src/arch/arm/intregs.hh b/src/arch/arm/intregs.hh index 3fe00b765..fa18aa68d 100644 --- a/src/arch/arm/intregs.hh +++ b/src/arch/arm/intregs.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -83,6 +83,9 @@ enum IntRegIndex INTREG_R14_MON, INTREG_LR_MON = INTREG_R14_MON, + INTREG_R13_HYP, + INTREG_SP_HYP = INTREG_R13_HYP, + INTREG_R13_ABT, INTREG_SP_ABT = INTREG_R13_ABT, INTREG_R14_ABT, @@ -108,7 +111,7 @@ enum IntRegIndex INTREG_R14_FIQ, INTREG_LR_FIQ = INTREG_R14_FIQ, - INTREG_ZERO, // Dummy zero reg since there has to be one. + INTREG_ZERO, INTREG_UREG0, INTREG_UREG1, INTREG_UREG2, @@ -117,12 +120,54 @@ enum IntRegIndex INTREG_CONDCODES_V, INTREG_CONDCODES_GE, INTREG_FPCONDCODES, + INTREG_DUMMY, // Dummy reg used to throw away int reg results + + INTREG_SP0, + INTREG_SP1, + INTREG_SP2, + INTREG_SP3, NUM_INTREGS, - NUM_ARCH_INTREGS = INTREG_PC + 1, + NUM_ARCH_INTREGS = 32, + + /* AArch64 registers */ + INTREG_X0 = 0, + INTREG_X1, + INTREG_X2, + INTREG_X3, + INTREG_X4, + INTREG_X5, + INTREG_X6, + INTREG_X7, + INTREG_X8, + INTREG_X9, + INTREG_X10, + INTREG_X11, + INTREG_X12, + INTREG_X13, + INTREG_X14, + INTREG_X15, + INTREG_X16, + INTREG_X17, + INTREG_X18, + INTREG_X19, + INTREG_X20, + INTREG_X21, + INTREG_X22, + INTREG_X23, + INTREG_X24, + INTREG_X25, + INTREG_X26, + INTREG_X27, + INTREG_X28, + INTREG_X29, + INTREG_X30, + INTREG_X31, + + INTREG_SPX = NUM_INTREGS, /* All the aliased indexes. */ - + /* USR mode */ INTREG_R0_USR = INTREG_R0, INTREG_R1_USR = INTREG_R1, @@ -195,6 +240,25 @@ enum IntRegIndex INTREG_PC_ABT = INTREG_PC, INTREG_R15_ABT = INTREG_R15, + /* HYP mode */ + INTREG_R0_HYP = INTREG_R0, + INTREG_R1_HYP = INTREG_R1, + INTREG_R2_HYP = INTREG_R2, + INTREG_R3_HYP = INTREG_R3, + INTREG_R4_HYP = INTREG_R4, + INTREG_R5_HYP = INTREG_R5, + INTREG_R6_HYP = INTREG_R6, + INTREG_R7_HYP = INTREG_R7, + INTREG_R8_HYP = INTREG_R8, + INTREG_R9_HYP = INTREG_R9, + INTREG_R10_HYP = INTREG_R10, + INTREG_R11_HYP = INTREG_R11, + INTREG_R12_HYP = INTREG_R12, + INTREG_LR_HYP = INTREG_LR, + INTREG_R14_HYP = INTREG_R14, + INTREG_PC_HYP = INTREG_PC, + INTREG_R15_HYP = INTREG_R15, + /* UND mode */ INTREG_R0_UND = INTREG_R0, INTREG_R1_UND = INTREG_R1, @@ -244,11 +308,26 @@ enum IntRegIndex typedef IntRegIndex IntRegMap[NUM_ARCH_INTREGS]; +const IntRegMap IntReg64Map = { + INTREG_R0, INTREG_R1, INTREG_R2, INTREG_R3, + INTREG_R4, INTREG_R5, INTREG_R6, INTREG_R7, + INTREG_R8_USR, INTREG_R9_USR, INTREG_R10_USR, INTREG_R11_USR, + INTREG_R12_USR, INTREG_R13_USR, INTREG_R14_USR, INTREG_R13_HYP, + INTREG_R14_IRQ, INTREG_R13_IRQ, INTREG_R14_SVC, INTREG_R13_SVC, + INTREG_R14_ABT, INTREG_R13_ABT, INTREG_R14_UND, INTREG_R13_UND, + INTREG_R8_FIQ, INTREG_R9_FIQ, INTREG_R10_FIQ, INTREG_R11_FIQ, + INTREG_R12_FIQ, INTREG_R13_FIQ, INTREG_R14_FIQ, INTREG_ZERO +}; + const IntRegMap IntRegUsrMap = { INTREG_R0_USR, INTREG_R1_USR, INTREG_R2_USR, INTREG_R3_USR, INTREG_R4_USR, INTREG_R5_USR, INTREG_R6_USR, INTREG_R7_USR, INTREG_R8_USR, INTREG_R9_USR, INTREG_R10_USR, INTREG_R11_USR, - INTREG_R12_USR, INTREG_R13_USR, INTREG_R14_USR, INTREG_R15_USR + INTREG_R12_USR, INTREG_R13_USR, INTREG_R14_USR, INTREG_R15_USR, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -258,11 +337,33 @@ INTREG_USR(unsigned index) return IntRegUsrMap[index]; } +const IntRegMap IntRegHypMap = { + INTREG_R0_HYP, INTREG_R1_HYP, INTREG_R2_HYP, INTREG_R3_HYP, + INTREG_R4_HYP, INTREG_R5_HYP, INTREG_R6_HYP, INTREG_R7_HYP, + INTREG_R8_HYP, INTREG_R9_HYP, INTREG_R10_HYP, INTREG_R11_HYP, + INTREG_R12_HYP, INTREG_R13_HYP, INTREG_R14_HYP, INTREG_R15_HYP, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO +}; + +static inline IntRegIndex +INTREG_HYP(unsigned index) +{ + assert(index < NUM_ARCH_INTREGS); + return IntRegHypMap[index]; +} + const IntRegMap IntRegSvcMap = { INTREG_R0_SVC, INTREG_R1_SVC, INTREG_R2_SVC, INTREG_R3_SVC, INTREG_R4_SVC, INTREG_R5_SVC, INTREG_R6_SVC, INTREG_R7_SVC, INTREG_R8_SVC, INTREG_R9_SVC, INTREG_R10_SVC, INTREG_R11_SVC, - INTREG_R12_SVC, INTREG_R13_SVC, INTREG_R14_SVC, INTREG_R15_SVC + INTREG_R12_SVC, INTREG_R13_SVC, INTREG_R14_SVC, INTREG_R15_SVC, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -276,7 +377,11 @@ const IntRegMap IntRegMonMap = { INTREG_R0_MON, INTREG_R1_MON, INTREG_R2_MON, INTREG_R3_MON, INTREG_R4_MON, INTREG_R5_MON, INTREG_R6_MON, INTREG_R7_MON, INTREG_R8_MON, INTREG_R9_MON, INTREG_R10_MON, INTREG_R11_MON, - INTREG_R12_MON, INTREG_R13_MON, INTREG_R14_MON, INTREG_R15_MON + INTREG_R12_MON, INTREG_R13_MON, INTREG_R14_MON, INTREG_R15_MON, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -290,7 +395,11 @@ const IntRegMap IntRegAbtMap = { INTREG_R0_ABT, INTREG_R1_ABT, INTREG_R2_ABT, INTREG_R3_ABT, INTREG_R4_ABT, INTREG_R5_ABT, INTREG_R6_ABT, INTREG_R7_ABT, INTREG_R8_ABT, INTREG_R9_ABT, INTREG_R10_ABT, INTREG_R11_ABT, - INTREG_R12_ABT, INTREG_R13_ABT, INTREG_R14_ABT, INTREG_R15_ABT + INTREG_R12_ABT, INTREG_R13_ABT, INTREG_R14_ABT, INTREG_R15_ABT, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -304,7 +413,11 @@ const IntRegMap IntRegUndMap = { INTREG_R0_UND, INTREG_R1_UND, INTREG_R2_UND, INTREG_R3_UND, INTREG_R4_UND, INTREG_R5_UND, INTREG_R6_UND, INTREG_R7_UND, INTREG_R8_UND, INTREG_R9_UND, INTREG_R10_UND, INTREG_R11_UND, - INTREG_R12_UND, INTREG_R13_UND, INTREG_R14_UND, INTREG_R15_UND + INTREG_R12_UND, INTREG_R13_UND, INTREG_R14_UND, INTREG_R15_UND, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -318,7 +431,11 @@ const IntRegMap IntRegIrqMap = { INTREG_R0_IRQ, INTREG_R1_IRQ, INTREG_R2_IRQ, INTREG_R3_IRQ, INTREG_R4_IRQ, INTREG_R5_IRQ, INTREG_R6_IRQ, INTREG_R7_IRQ, INTREG_R8_IRQ, INTREG_R9_IRQ, INTREG_R10_IRQ, INTREG_R11_IRQ, - INTREG_R12_IRQ, INTREG_R13_IRQ, INTREG_R14_IRQ, INTREG_R15_IRQ + INTREG_R12_IRQ, INTREG_R13_IRQ, INTREG_R14_IRQ, INTREG_R15_IRQ, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -332,7 +449,11 @@ const IntRegMap IntRegFiqMap = { INTREG_R0_FIQ, INTREG_R1_FIQ, INTREG_R2_FIQ, INTREG_R3_FIQ, INTREG_R4_FIQ, INTREG_R5_FIQ, INTREG_R6_FIQ, INTREG_R7_FIQ, INTREG_R8_FIQ, INTREG_R9_FIQ, INTREG_R10_FIQ, INTREG_R11_FIQ, - INTREG_R12_FIQ, INTREG_R13_FIQ, INTREG_R14_FIQ, INTREG_R15_FIQ + INTREG_R12_FIQ, INTREG_R13_FIQ, INTREG_R14_FIQ, INTREG_R15_FIQ, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -351,6 +472,51 @@ intRegInMode(OperatingMode mode, int reg) return mode * intRegsPerMode + reg; } +static inline int +flattenIntRegModeIndex(int reg) +{ + int mode = reg / intRegsPerMode; + reg = reg % intRegsPerMode; + switch (mode) { + case MODE_USER: + case MODE_SYSTEM: + return INTREG_USR(reg); + case MODE_FIQ: + return INTREG_FIQ(reg); + case MODE_IRQ: + return INTREG_IRQ(reg); + case MODE_SVC: + return INTREG_SVC(reg); + case MODE_MON: + return INTREG_MON(reg); + case MODE_ABORT: + return INTREG_ABT(reg); + case MODE_HYP: + return INTREG_HYP(reg); + case MODE_UNDEFINED: + return INTREG_UND(reg); + default: + panic("%d: Flattening into an unknown mode: reg:%#x mode:%#x\n", + curTick(), reg, mode); + } +} + + +static inline IntRegIndex +makeSP(IntRegIndex reg) +{ + if (reg == INTREG_X31) + reg = INTREG_SPX; + return reg; +} + + +static inline bool +isSP(IntRegIndex reg) +{ + return reg == INTREG_SPX; +} + } #endif diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc index 86be2803d..4f1ef91ec 100644 --- a/src/arch/arm/isa.cc +++ b/src/arch/arm/isa.cc @@ -51,12 +51,111 @@ namespace ArmISA { + +/** + * Some registers aliase with others, and therefore need to be translated. + * For each entry: + * The first value is the misc register that is to be looked up + * the second value is the lower part of the translation + * the third the upper part + */ +const struct ISA::MiscRegInitializerEntry + ISA::MiscRegSwitch[miscRegTranslateMax] = { + {MISCREG_CSSELR_EL1, {MISCREG_CSSELR, 0}}, + {MISCREG_SCTLR_EL1, {MISCREG_SCTLR, 0}}, + {MISCREG_SCTLR_EL2, {MISCREG_HSCTLR, 0}}, + {MISCREG_ACTLR_EL1, {MISCREG_ACTLR, 0}}, + {MISCREG_ACTLR_EL2, {MISCREG_HACTLR, 0}}, + {MISCREG_CPACR_EL1, {MISCREG_CPACR, 0}}, + {MISCREG_CPTR_EL2, {MISCREG_HCPTR, 0}}, + {MISCREG_HCR_EL2, {MISCREG_HCR, 0}}, + {MISCREG_MDCR_EL2, {MISCREG_HDCR, 0}}, + {MISCREG_HSTR_EL2, {MISCREG_HSTR, 0}}, + {MISCREG_HACR_EL2, {MISCREG_HACR, 0}}, + {MISCREG_TTBR0_EL1, {MISCREG_TTBR0, 0}}, + {MISCREG_TTBR1_EL1, {MISCREG_TTBR1, 0}}, + {MISCREG_TTBR0_EL2, {MISCREG_HTTBR, 0}}, + {MISCREG_VTTBR_EL2, {MISCREG_VTTBR, 0}}, + {MISCREG_TCR_EL1, {MISCREG_TTBCR, 0}}, + {MISCREG_TCR_EL2, {MISCREG_HTCR, 0}}, + {MISCREG_VTCR_EL2, {MISCREG_VTCR, 0}}, + {MISCREG_AFSR0_EL1, {MISCREG_ADFSR, 0}}, + {MISCREG_AFSR1_EL1, {MISCREG_AIFSR, 0}}, + {MISCREG_AFSR0_EL2, {MISCREG_HADFSR, 0}}, + {MISCREG_AFSR1_EL2, {MISCREG_HAIFSR, 0}}, + {MISCREG_ESR_EL2, {MISCREG_HSR, 0}}, + {MISCREG_FAR_EL1, {MISCREG_DFAR, MISCREG_IFAR}}, + {MISCREG_FAR_EL2, {MISCREG_HDFAR, MISCREG_HIFAR}}, + {MISCREG_HPFAR_EL2, {MISCREG_HPFAR, 0}}, + {MISCREG_PAR_EL1, {MISCREG_PAR, 0}}, + {MISCREG_MAIR_EL1, {MISCREG_PRRR, MISCREG_NMRR}}, + {MISCREG_MAIR_EL2, {MISCREG_HMAIR0, MISCREG_HMAIR1}}, + {MISCREG_AMAIR_EL1, {MISCREG_AMAIR0, MISCREG_AMAIR1}}, + {MISCREG_VBAR_EL1, {MISCREG_VBAR, 0}}, + {MISCREG_VBAR_EL2, {MISCREG_HVBAR, 0}}, + {MISCREG_CONTEXTIDR_EL1, {MISCREG_CONTEXTIDR, 0}}, + {MISCREG_TPIDR_EL0, {MISCREG_TPIDRURW, 0}}, + {MISCREG_TPIDRRO_EL0, {MISCREG_TPIDRURO, 0}}, + {MISCREG_TPIDR_EL1, {MISCREG_TPIDRPRW, 0}}, + {MISCREG_TPIDR_EL2, {MISCREG_HTPIDR, 0}}, + {MISCREG_TEECR32_EL1, {MISCREG_TEECR, 0}}, + {MISCREG_CNTFRQ_EL0, {MISCREG_CNTFRQ, 0}}, + {MISCREG_CNTPCT_EL0, {MISCREG_CNTPCT, 0}}, + {MISCREG_CNTVCT_EL0, {MISCREG_CNTVCT, 0}}, + {MISCREG_CNTVOFF_EL2, {MISCREG_CNTVOFF, 0}}, + {MISCREG_CNTKCTL_EL1, {MISCREG_CNTKCTL, 0}}, + {MISCREG_CNTHCTL_EL2, {MISCREG_CNTHCTL, 0}}, + {MISCREG_CNTP_TVAL_EL0, {MISCREG_CNTP_TVAL, 0}}, + {MISCREG_CNTP_CTL_EL0, {MISCREG_CNTP_CTL, 0}}, + {MISCREG_CNTP_CVAL_EL0, {MISCREG_CNTP_CVAL, 0}}, + {MISCREG_CNTV_TVAL_EL0, {MISCREG_CNTV_TVAL, 0}}, + {MISCREG_CNTV_CTL_EL0, {MISCREG_CNTV_CTL, 0}}, + {MISCREG_CNTV_CVAL_EL0, {MISCREG_CNTV_CVAL, 0}}, + {MISCREG_CNTHP_TVAL_EL2, {MISCREG_CNTHP_TVAL, 0}}, + {MISCREG_CNTHP_CTL_EL2, {MISCREG_CNTHP_CTL, 0}}, + {MISCREG_CNTHP_CVAL_EL2, {MISCREG_CNTHP_CVAL, 0}}, + {MISCREG_DACR32_EL2, {MISCREG_DACR, 0}}, + {MISCREG_IFSR32_EL2, {MISCREG_IFSR, 0}}, + {MISCREG_TEEHBR32_EL1, {MISCREG_TEEHBR, 0}}, + {MISCREG_SDER32_EL3, {MISCREG_SDER, 0}} +}; + + ISA::ISA(Params *p) - : SimObject(p) + : SimObject(p), system(NULL), lookUpMiscReg(NUM_MISCREGS, {0,0}) { SCTLR sctlr; sctlr = 0; miscRegs[MISCREG_SCTLR_RST] = sctlr; + + system = dynamic_cast(p->system); + DPRINTFN("ISA system set to: %p %p\n", system, p->system); + + // Cache system-level properties + if (FullSystem && system) { + haveSecurity = system->haveSecurity(); + haveLPAE = system->haveLPAE(); + haveVirtualization = system->haveVirtualization(); + haveLargeAsid64 = system->haveLargeAsid64(); + physAddrRange64 = system->physAddrRange64(); + } else { + haveSecurity = haveLPAE = haveVirtualization = false; + haveLargeAsid64 = false; + physAddrRange64 = 32; // dummy value + } + + /** Fill in the miscReg translation table */ + for (uint32_t i = 0; i < miscRegTranslateMax; i++) { + struct MiscRegLUTEntry new_entry; + + uint32_t select = MiscRegSwitch[i].index; + new_entry = MiscRegSwitch[i].entry; + + lookUpMiscReg[select] = new_entry; + } + + preUnflattenMiscReg(); + clear(); } @@ -73,27 +172,42 @@ ISA::clear() SCTLR sctlr_rst = miscRegs[MISCREG_SCTLR_RST]; memset(miscRegs, 0, sizeof(miscRegs)); + + // Initialize configurable default values + miscRegs[MISCREG_MIDR] = p->midr; + miscRegs[MISCREG_MIDR_EL1] = p->midr; + miscRegs[MISCREG_VPIDR] = p->midr; + + if (FullSystem && system->highestELIs64()) { + // Initialize AArch64 state + clear64(p); + return; + } + + // Initialize AArch32 state... + CPSR cpsr = 0; cpsr.mode = MODE_USER; miscRegs[MISCREG_CPSR] = cpsr; updateRegMap(cpsr); SCTLR sctlr = 0; - sctlr.te = (bool)sctlr_rst.te; - sctlr.nmfi = (bool)sctlr_rst.nmfi; - sctlr.v = (bool)sctlr_rst.v; - sctlr.u = 1; + sctlr.te = (bool) sctlr_rst.te; + sctlr.nmfi = (bool) sctlr_rst.nmfi; + sctlr.v = (bool) sctlr_rst.v; + sctlr.u = 1; sctlr.xp = 1; sctlr.rao2 = 1; sctlr.rao3 = 1; - sctlr.rao4 = 1; - miscRegs[MISCREG_SCTLR] = sctlr; + sctlr.rao4 = 0xf; // SCTLR[6:3] + miscRegs[MISCREG_SCTLR_NS] = sctlr; miscRegs[MISCREG_SCTLR_RST] = sctlr_rst; + miscRegs[MISCREG_HCPTR] = 0; - /* Start with an event in the mailbox */ + // Start with an event in the mailbox miscRegs[MISCREG_SEV_MAILBOX] = 1; - // Separate Instruction and Data TLBs. + // Separate Instruction and Data TLBs miscRegs[MISCREG_TLBTR] = 1; MVFR0 mvfr0 = 0; @@ -119,7 +233,8 @@ ISA::clear() // Reset values of PRRR and NMRR are implementation dependent - miscRegs[MISCREG_PRRR] = + // @todo: PRRR and NMRR in secure state? + miscRegs[MISCREG_PRRR_NS] = (1 << 19) | // 19 (0 << 18) | // 18 (0 << 17) | // 17 @@ -132,7 +247,7 @@ ISA::clear() (2 << 4) | // 5:4 (1 << 2) | // 3:2 0; // 1:0 - miscRegs[MISCREG_NMRR] = + miscRegs[MISCREG_NMRR_NS] = (1 << 30) | // 31:30 (0 << 26) | // 27:26 (0 << 24) | // 25:24 @@ -151,8 +266,6 @@ ISA::clear() miscRegs[MISCREG_CPACR] = 0; - // Initialize configurable default values - miscRegs[MISCREG_MIDR] = p->midr; miscRegs[MISCREG_ID_PFR0] = p->id_pfr0; miscRegs[MISCREG_ID_PFR1] = p->id_pfr1; @@ -169,27 +282,132 @@ ISA::clear() miscRegs[MISCREG_ID_ISAR4] = p->id_isar4; miscRegs[MISCREG_ID_ISAR5] = p->id_isar5; - miscRegs[MISCREG_FPSID] = p->fpsid; + if (haveLPAE) { + TTBCR ttbcr = miscRegs[MISCREG_TTBCR_NS]; + ttbcr.eae = 0; + miscRegs[MISCREG_TTBCR_NS] = ttbcr; + // Enforce consistency with system-level settings + miscRegs[MISCREG_ID_MMFR0] = (miscRegs[MISCREG_ID_MMFR0] & ~0xf) | 0x5; + } + + if (haveSecurity) { + miscRegs[MISCREG_SCTLR_S] = sctlr; + miscRegs[MISCREG_SCR] = 0; + miscRegs[MISCREG_VBAR_S] = 0; + } else { + // we're always non-secure + miscRegs[MISCREG_SCR] = 1; + } //XXX We need to initialize the rest of the state. } +void +ISA::clear64(const ArmISAParams *p) +{ + CPSR cpsr = 0; + Addr rvbar = system->resetAddr64(); + switch (system->highestEL()) { + // Set initial EL to highest implemented EL using associated stack + // pointer (SP_ELx); set RVBAR_ELx to implementation defined reset + // value + case EL3: + cpsr.mode = MODE_EL3H; + miscRegs[MISCREG_RVBAR_EL3] = rvbar; + break; + case EL2: + cpsr.mode = MODE_EL2H; + miscRegs[MISCREG_RVBAR_EL2] = rvbar; + break; + case EL1: + cpsr.mode = MODE_EL1H; + miscRegs[MISCREG_RVBAR_EL1] = rvbar; + break; + default: + panic("Invalid highest implemented exception level"); + break; + } + + // Initialize rest of CPSR + cpsr.daif = 0xf; // Mask all interrupts + cpsr.ss = 0; + cpsr.il = 0; + miscRegs[MISCREG_CPSR] = cpsr; + updateRegMap(cpsr); + + // Initialize other control registers + miscRegs[MISCREG_MPIDR_EL1] = 0x80000000; + if (haveSecurity) { + miscRegs[MISCREG_SCTLR_EL3] = 0x30c50870; + miscRegs[MISCREG_SCR_EL3] = 0x00000030; // RES1 fields + // @todo: uncomment this to enable Virtualization + // } else if (haveVirtualization) { + // miscRegs[MISCREG_SCTLR_EL2] = 0x30c50870; + } else { + miscRegs[MISCREG_SCTLR_EL1] = 0x30c50870; + // Always non-secure + miscRegs[MISCREG_SCR_EL3] = 1; + } + + // Initialize configurable id registers + miscRegs[MISCREG_ID_AA64AFR0_EL1] = p->id_aa64afr0_el1; + miscRegs[MISCREG_ID_AA64AFR1_EL1] = p->id_aa64afr1_el1; + miscRegs[MISCREG_ID_AA64DFR0_EL1] = p->id_aa64dfr0_el1; + miscRegs[MISCREG_ID_AA64DFR1_EL1] = p->id_aa64dfr1_el1; + miscRegs[MISCREG_ID_AA64ISAR0_EL1] = p->id_aa64isar0_el1; + miscRegs[MISCREG_ID_AA64ISAR1_EL1] = p->id_aa64isar1_el1; + miscRegs[MISCREG_ID_AA64MMFR0_EL1] = p->id_aa64mmfr0_el1; + miscRegs[MISCREG_ID_AA64MMFR1_EL1] = p->id_aa64mmfr1_el1; + miscRegs[MISCREG_ID_AA64PFR0_EL1] = p->id_aa64pfr0_el1; + miscRegs[MISCREG_ID_AA64PFR1_EL1] = p->id_aa64pfr1_el1; + + // Enforce consistency with system-level settings... + + // EL3 + // (no AArch32/64 interprocessing support for now) + miscRegs[MISCREG_ID_AA64PFR0_EL1] = insertBits( + miscRegs[MISCREG_ID_AA64PFR0_EL1], 15, 12, + haveSecurity ? 0x1 : 0x0); + // EL2 + // (no AArch32/64 interprocessing support for now) + miscRegs[MISCREG_ID_AA64PFR0_EL1] = insertBits( + miscRegs[MISCREG_ID_AA64PFR0_EL1], 11, 8, + haveVirtualization ? 0x1 : 0x0); + // Large ASID support + miscRegs[MISCREG_ID_AA64MMFR0_EL1] = insertBits( + miscRegs[MISCREG_ID_AA64MMFR0_EL1], 7, 4, + haveLargeAsid64 ? 0x2 : 0x0); + // Physical address size + miscRegs[MISCREG_ID_AA64MMFR0_EL1] = insertBits( + miscRegs[MISCREG_ID_AA64MMFR0_EL1], 3, 0, + encodePhysAddrRange64(physAddrRange64)); +} + MiscReg ISA::readMiscRegNoEffect(int misc_reg) const { assert(misc_reg < NumMiscRegs); - int flat_idx; - if (misc_reg == MISCREG_SPSR) - flat_idx = flattenMiscIndex(misc_reg); - else - flat_idx = misc_reg; - MiscReg val = miscRegs[flat_idx]; + int flat_idx = flattenMiscIndex(misc_reg); // Note: indexes of AArch64 + // registers are left unchanged + MiscReg val; + + if (lookUpMiscReg[flat_idx].lower == 0 || flat_idx == MISCREG_SPSR + || flat_idx == MISCREG_SCTLR_EL1) { + if (flat_idx == MISCREG_SPSR) + flat_idx = flattenMiscIndex(MISCREG_SPSR); + if (flat_idx == MISCREG_SCTLR_EL1) + flat_idx = flattenMiscIndex(MISCREG_SCTLR); + val = miscRegs[flat_idx]; + } else + if (lookUpMiscReg[flat_idx].upper > 0) + val = ((miscRegs[lookUpMiscReg[flat_idx].lower] & mask(32)) + | (miscRegs[lookUpMiscReg[flat_idx].upper] << 32)); + else + val = miscRegs[lookUpMiscReg[flat_idx].lower]; - DPRINTF(MiscRegs, "Reading From misc reg %d (%d) : %#x\n", - misc_reg, flat_idx, val); return val; } @@ -197,33 +415,98 @@ ISA::readMiscRegNoEffect(int misc_reg) const MiscReg ISA::readMiscReg(int misc_reg, ThreadContext *tc) { - ArmSystem *arm_sys; + CPSR cpsr = 0; + PCState pc = 0; + SCR scr = 0; if (misc_reg == MISCREG_CPSR) { - CPSR cpsr = miscRegs[misc_reg]; - PCState pc = tc->pcState(); + cpsr = miscRegs[misc_reg]; + pc = tc->pcState(); cpsr.j = pc.jazelle() ? 1 : 0; cpsr.t = pc.thumb() ? 1 : 0; return cpsr; } - if (misc_reg >= MISCREG_CP15_UNIMP_START) - panic("Unimplemented CP15 register %s read.\n", - miscRegName[misc_reg]); - switch (misc_reg) { - case MISCREG_MPIDR: - arm_sys = dynamic_cast(tc->getSystemPtr()); - assert(arm_sys); +#ifndef NDEBUG + if (!miscRegInfo[misc_reg][MISCREG_IMPLEMENTED]) { + if (miscRegInfo[misc_reg][MISCREG_WARN_NOT_FAIL]) + warn("Unimplemented system register %s read.\n", + miscRegName[misc_reg]); + else + panic("Unimplemented system register %s read.\n", + miscRegName[misc_reg]); + } +#endif - if (arm_sys->multiProc) { - return 0x80000000 | // multiprocessor extensions available - tc->cpuId(); + switch (unflattenMiscReg(misc_reg)) { + case MISCREG_HCR: + { + if (!haveVirtualization) + return 0; + else + return readMiscRegNoEffect(MISCREG_HCR); + } + case MISCREG_CPACR: + { + const uint32_t ones = (uint32_t)(-1); + CPACR cpacrMask = 0; + // Only cp10, cp11, and ase are implemented, nothing else should + // be readable? (straight copy from the write code) + cpacrMask.cp10 = ones; + cpacrMask.cp11 = ones; + cpacrMask.asedis = ones; + + // Security Extensions may limit the readability of CPACR + if (haveSecurity) { + scr = readMiscRegNoEffect(MISCREG_SCR); + cpsr = readMiscRegNoEffect(MISCREG_CPSR); + if (scr.ns && (cpsr.mode != MODE_MON)) { + NSACR nsacr = readMiscRegNoEffect(MISCREG_NSACR); + // NB: Skipping the full loop, here + if (!nsacr.cp10) cpacrMask.cp10 = 0; + if (!nsacr.cp11) cpacrMask.cp11 = 0; + } + } + MiscReg val = readMiscRegNoEffect(MISCREG_CPACR); + val &= cpacrMask; + DPRINTF(MiscRegs, "Reading misc reg %s: %#x\n", + miscRegName[misc_reg], val); + return val; + } + case MISCREG_MPIDR: + cpsr = readMiscRegNoEffect(MISCREG_CPSR); + scr = readMiscRegNoEffect(MISCREG_SCR); + if ((cpsr.mode == MODE_HYP) || inSecureState(scr, cpsr)) { + return getMPIDR(system, tc); + } else { + return readMiscReg(MISCREG_VMPIDR, tc); + } + break; + case MISCREG_MPIDR_EL1: + // @todo in the absence of v8 virtualization support just return MPIDR_EL1 + return getMPIDR(system, tc) & 0xffffffff; + case MISCREG_VMPIDR: + // top bit defined as RES1 + return readMiscRegNoEffect(misc_reg) | 0x80000000; + case MISCREG_ID_AFR0: // not implemented, so alias MIDR + case MISCREG_ID_DFR0: // not implemented, so alias MIDR + case MISCREG_REVIDR: // not implemented, so alias MIDR + case MISCREG_MIDR: + cpsr = readMiscRegNoEffect(MISCREG_CPSR); + scr = readMiscRegNoEffect(MISCREG_SCR); + if ((cpsr.mode == MODE_HYP) || inSecureState(scr, cpsr)) { + return readMiscRegNoEffect(misc_reg); } else { - return 0x80000000 | // multiprocessor extensions available - 0x40000000 | // in up system - tc->cpuId(); + return readMiscRegNoEffect(MISCREG_VPIDR); } break; + case MISCREG_JOSCR: // Jazelle trivial implementation, RAZ/WI + case MISCREG_JMCR: // Jazelle trivial implementation, RAZ/WI + case MISCREG_JIDR: // Jazelle trivial implementation, RAZ/WI + case MISCREG_AIDR: // AUX ID set to 0 + case MISCREG_TCMTR: // No TCM's + return 0; + case MISCREG_CLIDR: warn_once("The clidr register always reports 0 caches.\n"); warn_once("clidr LoUIS field of 0b001 to match current " @@ -276,6 +559,75 @@ ISA::readMiscReg(int misc_reg, ThreadContext *tc) return readMiscRegNoEffect(MISCREG_FPSCR) & ~FpscrQcMask; case MISCREG_FPSCR_EXC: return readMiscRegNoEffect(MISCREG_FPSCR) & ~FpscrExcMask; + case MISCREG_FPSR: + { + const uint32_t ones = (uint32_t)(-1); + FPSCR fpscrMask = 0; + fpscrMask.ioc = ones; + fpscrMask.dzc = ones; + fpscrMask.ofc = ones; + fpscrMask.ufc = ones; + fpscrMask.ixc = ones; + fpscrMask.idc = ones; + fpscrMask.qc = ones; + fpscrMask.v = ones; + fpscrMask.c = ones; + fpscrMask.z = ones; + fpscrMask.n = ones; + return readMiscRegNoEffect(MISCREG_FPSCR) & (uint32_t)fpscrMask; + } + case MISCREG_FPCR: + { + const uint32_t ones = (uint32_t)(-1); + FPSCR fpscrMask = 0; + fpscrMask.ioe = ones; + fpscrMask.dze = ones; + fpscrMask.ofe = ones; + fpscrMask.ufe = ones; + fpscrMask.ixe = ones; + fpscrMask.ide = ones; + fpscrMask.len = ones; + fpscrMask.stride = ones; + fpscrMask.rMode = ones; + fpscrMask.fz = ones; + fpscrMask.dn = ones; + fpscrMask.ahp = ones; + return readMiscRegNoEffect(MISCREG_FPSCR) & (uint32_t)fpscrMask; + } + case MISCREG_NZCV: + { + CPSR cpsr = 0; + cpsr.nz = tc->readIntReg(INTREG_CONDCODES_NZ); + cpsr.c = tc->readIntReg(INTREG_CONDCODES_C); + cpsr.v = tc->readIntReg(INTREG_CONDCODES_V); + return cpsr; + } + case MISCREG_DAIF: + { + CPSR cpsr = 0; + cpsr.daif = (uint8_t) ((CPSR) miscRegs[MISCREG_CPSR]).daif; + return cpsr; + } + case MISCREG_SP_EL0: + { + return tc->readIntReg(INTREG_SP0); + } + case MISCREG_SP_EL1: + { + return tc->readIntReg(INTREG_SP1); + } + case MISCREG_SP_EL2: + { + return tc->readIntReg(INTREG_SP2); + } + case MISCREG_SPSEL: + { + return miscRegs[MISCREG_CPSR] & 0x1; + } + case MISCREG_CURRENTEL: + { + return miscRegs[MISCREG_CPSR] & 0xc; + } case MISCREG_L2CTLR: { // mostly unimplemented, just set NumCPUs field from sim and return @@ -289,8 +641,120 @@ ISA::readMiscReg(int misc_reg, ThreadContext *tc) * Return 0 as we don't support debug architecture yet. */ return 0; - case MISCREG_DBGDSCR_INT: + case MISCREG_DBGDSCRint: return 0; + case MISCREG_ISR: + return tc->getCpuPtr()->getInterruptController()->getISR( + readMiscRegNoEffect(MISCREG_HCR), + readMiscRegNoEffect(MISCREG_CPSR), + readMiscRegNoEffect(MISCREG_SCR)); + case MISCREG_ISR_EL1: + return tc->getCpuPtr()->getInterruptController()->getISR( + readMiscRegNoEffect(MISCREG_HCR_EL2), + readMiscRegNoEffect(MISCREG_CPSR), + readMiscRegNoEffect(MISCREG_SCR_EL3)); + case MISCREG_DCZID_EL0: + return 0x04; // DC ZVA clear 64-byte chunks + case MISCREG_HCPTR: + { + MiscReg val = readMiscRegNoEffect(misc_reg); + // The trap bit associated with CP14 is defined as RAZ + val &= ~(1 << 14); + // If a CP bit in NSACR is 0 then the corresponding bit in + // HCPTR is RAO/WI + bool secure_lookup = haveSecurity && + inSecureState(readMiscRegNoEffect(MISCREG_SCR), + readMiscRegNoEffect(MISCREG_CPSR)); + if (!secure_lookup) { + MiscReg mask = readMiscRegNoEffect(MISCREG_NSACR); + val |= (mask ^ 0x7FFF) & 0xBFFF; + } + // Set the bits for unimplemented coprocessors to RAO/WI + val |= 0x33FF; + return (val); + } + case MISCREG_HDFAR: // alias for secure DFAR + return readMiscRegNoEffect(MISCREG_DFAR_S); + case MISCREG_HIFAR: // alias for secure IFAR + return readMiscRegNoEffect(MISCREG_IFAR_S); + case MISCREG_HVBAR: // bottom bits reserved + return readMiscRegNoEffect(MISCREG_HVBAR) & 0xFFFFFFE0; + case MISCREG_SCTLR: // Some bits hardwired + // The FI field (bit 21) is common between S/NS versions of the register + return (readMiscRegNoEffect(MISCREG_SCTLR_S) & (1 << 21)) | + (readMiscRegNoEffect(misc_reg) & 0x72DD39FF) | 0x00C00818; // V8 SCTLR + case MISCREG_SCTLR_EL1: + // The FI field (bit 21) is common between S/NS versions of the register + return (readMiscRegNoEffect(MISCREG_SCTLR_S) & (1 << 21)) | + (readMiscRegNoEffect(misc_reg) & 0x37DDDBFF) | 0x30D00800; // V8 SCTLR_EL1 + case MISCREG_SCTLR_EL3: + // The FI field (bit 21) is common between S/NS versions of the register + return (readMiscRegNoEffect(MISCREG_SCTLR_S) & (1 << 21)) | + (readMiscRegNoEffect(misc_reg) & 0x32CD183F) | 0x30C50830; // V8 SCTLR_EL3 + case MISCREG_HSCTLR: // FI comes from SCTLR + { + uint32_t mask = 1 << 27; + return (readMiscRegNoEffect(MISCREG_HSCTLR) & ~mask) | + (readMiscRegNoEffect(MISCREG_SCTLR) & mask); + } + case MISCREG_SCR: + { + CPSR cpsr = readMiscRegNoEffect(MISCREG_CPSR); + if (cpsr.width) { + return readMiscRegNoEffect(MISCREG_SCR); + } else { + return readMiscRegNoEffect(MISCREG_SCR_EL3); + } + } + // Generic Timer registers + case MISCREG_CNTFRQ: + case MISCREG_CNTFRQ_EL0: + inform_once("Read CNTFREQ_EL0 frequency\n"); + return getSystemCounter(tc)->freq(); + case MISCREG_CNTPCT: + case MISCREG_CNTPCT_EL0: + return getSystemCounter(tc)->value(); + case MISCREG_CNTVCT: + return getSystemCounter(tc)->value(); + case MISCREG_CNTVCT_EL0: + return getSystemCounter(tc)->value(); + case MISCREG_CNTP_CVAL: + case MISCREG_CNTP_CVAL_EL0: + return getArchTimer(tc, tc->cpuId())->compareValue(); + case MISCREG_CNTP_TVAL: + case MISCREG_CNTP_TVAL_EL0: + return getArchTimer(tc, tc->cpuId())->timerValue(); + case MISCREG_CNTP_CTL: + case MISCREG_CNTP_CTL_EL0: + return getArchTimer(tc, tc->cpuId())->control(); + // PL1 phys. timer, secure + // AArch64 + case MISCREG_CNTPS_CVAL_EL1: + case MISCREG_CNTPS_TVAL_EL1: + case MISCREG_CNTPS_CTL_EL1: + // PL2 phys. timer, non-secure + // AArch32 + case MISCREG_CNTHCTL: + case MISCREG_CNTHP_CVAL: + case MISCREG_CNTHP_TVAL: + case MISCREG_CNTHP_CTL: + // AArch64 + case MISCREG_CNTHCTL_EL2: + case MISCREG_CNTHP_CVAL_EL2: + case MISCREG_CNTHP_TVAL_EL2: + case MISCREG_CNTHP_CTL_EL2: + // Virtual timer + // AArch32 + case MISCREG_CNTV_CVAL: + case MISCREG_CNTV_TVAL: + case MISCREG_CNTV_CTL: + // AArch64 + // case MISCREG_CNTV_CVAL_EL2: + // case MISCREG_CNTV_TVAL_EL2: + // case MISCREG_CNTV_CTL_EL2: + panic("Generic Timer register not implemented\n"); + break; + } return readMiscRegNoEffect(misc_reg); } @@ -300,15 +764,28 @@ ISA::setMiscRegNoEffect(int misc_reg, const MiscReg &val) { assert(misc_reg < NumMiscRegs); - int flat_idx; - if (misc_reg == MISCREG_SPSR) - flat_idx = flattenMiscIndex(misc_reg); - else - flat_idx = misc_reg; - miscRegs[flat_idx] = val; + int flat_idx = flattenMiscIndex(misc_reg); // Note: indexes of AArch64 + // registers are left unchanged + + int flat_idx2 = lookUpMiscReg[flat_idx].upper; - DPRINTF(MiscRegs, "Writing to misc reg %d (%d) : %#x\n", misc_reg, - flat_idx, val); + if (flat_idx2 > 0) { + miscRegs[lookUpMiscReg[flat_idx].lower] = bits(val, 31, 0); + miscRegs[flat_idx2] = bits(val, 63, 32); + DPRINTF(MiscRegs, "Writing to misc reg %d (%d:%d) : %#x\n", + misc_reg, flat_idx, flat_idx2, val); + } else { + if (flat_idx == MISCREG_SPSR) + flat_idx = flattenMiscIndex(MISCREG_SPSR); + else if (flat_idx == MISCREG_SCTLR_EL1) + flat_idx = flattenMiscIndex(MISCREG_SCTLR); + else + flat_idx = (lookUpMiscReg[flat_idx].lower > 0) ? + lookUpMiscReg[flat_idx].lower : flat_idx; + miscRegs[flat_idx] = val; + DPRINTF(MiscRegs, "Writing to misc reg %d (%d) : %#x\n", + misc_reg, flat_idx, val); + } } void @@ -317,8 +794,13 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) MiscReg newVal = val; int x; + bool secure_lookup; + bool hyp; System *sys; ThreadContext *oc; + uint8_t target_el; + uint16_t asid; + SCR scr; if (misc_reg == MISCREG_CPSR) { updateRegMap(val); @@ -346,12 +828,18 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) } else { tc->pcState(pc); } - } else if (misc_reg >= MISCREG_CP15_UNIMP_START && - misc_reg < MISCREG_CP15_END) { - panic("Unimplemented CP15 register %s wrote with %#x.\n", - miscRegName[misc_reg], val); } else { - switch (misc_reg) { +#ifndef NDEBUG + if (!miscRegInfo[misc_reg][MISCREG_IMPLEMENTED]) { + if (miscRegInfo[misc_reg][MISCREG_WARN_NOT_FAIL]) + warn("Unimplemented system register %s write with %#x.\n", + miscRegName[misc_reg], val); + else + panic("Unimplemented system register %s write with %#x.\n", + miscRegName[misc_reg], val); + } +#endif + switch (unflattenMiscReg(misc_reg)) { case MISCREG_CPACR: { @@ -362,7 +850,61 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) cpacrMask.cp10 = ones; cpacrMask.cp11 = ones; cpacrMask.asedis = ones; + + // Security Extensions may limit the writability of CPACR + if (haveSecurity) { + scr = readMiscRegNoEffect(MISCREG_SCR); + CPSR cpsr = readMiscRegNoEffect(MISCREG_CPSR); + if (scr.ns && (cpsr.mode != MODE_MON)) { + NSACR nsacr = readMiscRegNoEffect(MISCREG_NSACR); + // NB: Skipping the full loop, here + if (!nsacr.cp10) cpacrMask.cp10 = 0; + if (!nsacr.cp11) cpacrMask.cp11 = 0; + } + } + + MiscReg old_val = readMiscRegNoEffect(MISCREG_CPACR); newVal &= cpacrMask; + newVal |= old_val & ~cpacrMask; + DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n", + miscRegName[misc_reg], newVal); + } + break; + case MISCREG_CPACR_EL1: + { + const uint32_t ones = (uint32_t)(-1); + CPACR cpacrMask = 0; + cpacrMask.tta = ones; + cpacrMask.fpen = ones; + newVal &= cpacrMask; + DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n", + miscRegName[misc_reg], newVal); + } + break; + case MISCREG_CPTR_EL2: + { + const uint32_t ones = (uint32_t)(-1); + CPTR cptrMask = 0; + cptrMask.tcpac = ones; + cptrMask.tta = ones; + cptrMask.tfp = ones; + newVal &= cptrMask; + cptrMask = 0; + cptrMask.res1_13_12_el2 = ones; + cptrMask.res1_9_0_el2 = ones; + newVal |= cptrMask; + DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n", + miscRegName[misc_reg], newVal); + } + break; + case MISCREG_CPTR_EL3: + { + const uint32_t ones = (uint32_t)(-1); + CPTR cptrMask = 0; + cptrMask.tcpac = ones; + cptrMask.tta = ones; + cptrMask.tfp = ones; + newVal &= cptrMask; DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n", miscRegName[misc_reg], newVal); } @@ -370,6 +912,11 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) case MISCREG_CSSELR: warn_once("The csselr register isn't implemented.\n"); return; + + case MISCREG_DC_ZVA_Xt: + warn("Calling DC ZVA! Not Implemeted! Expect WEIRD results\n"); + return; + case MISCREG_FPSCR: { const uint32_t ones = (uint32_t)(-1); @@ -380,6 +927,12 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) fpscrMask.ufc = ones; fpscrMask.ixc = ones; fpscrMask.idc = ones; + fpscrMask.ioe = ones; + fpscrMask.dze = ones; + fpscrMask.ofe = ones; + fpscrMask.ufe = ones; + fpscrMask.ixe = ones; + fpscrMask.ide = ones; fpscrMask.len = ones; fpscrMask.stride = ones; fpscrMask.rMode = ones; @@ -392,26 +945,72 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) fpscrMask.z = ones; fpscrMask.n = ones; newVal = (newVal & (uint32_t)fpscrMask) | - (miscRegs[MISCREG_FPSCR] & ~(uint32_t)fpscrMask); + (readMiscRegNoEffect(MISCREG_FPSCR) & + ~(uint32_t)fpscrMask); tc->getDecoderPtr()->setContext(newVal); } break; + case MISCREG_FPSR: + { + const uint32_t ones = (uint32_t)(-1); + FPSCR fpscrMask = 0; + fpscrMask.ioc = ones; + fpscrMask.dzc = ones; + fpscrMask.ofc = ones; + fpscrMask.ufc = ones; + fpscrMask.ixc = ones; + fpscrMask.idc = ones; + fpscrMask.qc = ones; + fpscrMask.v = ones; + fpscrMask.c = ones; + fpscrMask.z = ones; + fpscrMask.n = ones; + newVal = (newVal & (uint32_t)fpscrMask) | + (readMiscRegNoEffect(MISCREG_FPSCR) & + ~(uint32_t)fpscrMask); + misc_reg = MISCREG_FPSCR; + } + break; + case MISCREG_FPCR: + { + const uint32_t ones = (uint32_t)(-1); + FPSCR fpscrMask = 0; + fpscrMask.ioe = ones; + fpscrMask.dze = ones; + fpscrMask.ofe = ones; + fpscrMask.ufe = ones; + fpscrMask.ixe = ones; + fpscrMask.ide = ones; + fpscrMask.len = ones; + fpscrMask.stride = ones; + fpscrMask.rMode = ones; + fpscrMask.fz = ones; + fpscrMask.dn = ones; + fpscrMask.ahp = ones; + newVal = (newVal & (uint32_t)fpscrMask) | + (readMiscRegNoEffect(MISCREG_FPSCR) & + ~(uint32_t)fpscrMask); + misc_reg = MISCREG_FPSCR; + } + break; case MISCREG_CPSR_Q: { assert(!(newVal & ~CpsrMaskQ)); - newVal = miscRegs[MISCREG_CPSR] | newVal; + newVal = readMiscRegNoEffect(MISCREG_CPSR) | newVal; misc_reg = MISCREG_CPSR; } break; case MISCREG_FPSCR_QC: { - newVal = miscRegs[MISCREG_FPSCR] | (newVal & FpscrQcMask); + newVal = readMiscRegNoEffect(MISCREG_FPSCR) | + (newVal & FpscrQcMask); misc_reg = MISCREG_FPSCR; } break; case MISCREG_FPSCR_EXC: { - newVal = miscRegs[MISCREG_FPSCR] | (newVal & FpscrExcMask); + newVal = readMiscRegNoEffect(MISCREG_FPSCR) | + (newVal & FpscrExcMask); misc_reg = MISCREG_FPSCR; } break; @@ -421,16 +1020,63 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) // bit 29 - valid only if fpexc[31] is 0 const uint32_t fpexcMask = 0x60000000; newVal = (newVal & fpexcMask) | - (miscRegs[MISCREG_FPEXC] & ~fpexcMask); + (readMiscRegNoEffect(MISCREG_FPEXC) & ~fpexcMask); + } + break; + case MISCREG_HCR: + { + if (!haveVirtualization) + return; + } + break; + case MISCREG_IFSR: + { + // ARM ARM (ARM DDI 0406C.b) B4.1.96 + const uint32_t ifsrMask = + mask(31, 13) | mask(11, 11) | mask(8, 6); + newVal = newVal & ~ifsrMask; + } + break; + case MISCREG_DFSR: + { + // ARM ARM (ARM DDI 0406C.b) B4.1.52 + const uint32_t dfsrMask = mask(31, 14) | mask(8, 8); + newVal = newVal & ~dfsrMask; + } + break; + case MISCREG_AMAIR0: + case MISCREG_AMAIR1: + { + // ARM ARM (ARM DDI 0406C.b) B4.1.5 + // Valid only with LPAE + if (!haveLPAE) + return; + DPRINTF(MiscRegs, "Writing AMAIR: %#x\n", newVal); } break; + case MISCREG_SCR: + tc->getITBPtr()->invalidateMiscReg(); + tc->getDTBPtr()->invalidateMiscReg(); + break; case MISCREG_SCTLR: { DPRINTF(MiscRegs, "Writing SCTLR: %#x\n", newVal); - SCTLR sctlr = miscRegs[MISCREG_SCTLR]; + MiscRegIndex sctlr_idx; + scr = readMiscRegNoEffect(MISCREG_SCR); + if (haveSecurity && !scr.ns) { + sctlr_idx = MISCREG_SCTLR_S; + } else { + sctlr_idx = MISCREG_SCTLR_NS; + // The FI field (bit 21) is common between S/NS versions + // of the register, we store this in the secure copy of + // the reg + miscRegs[MISCREG_SCTLR_S] &= ~(1 << 21); + miscRegs[MISCREG_SCTLR_S] |= newVal & (1 << 21); + } + SCTLR sctlr = miscRegs[sctlr_idx]; SCTLR new_sctlr = newVal; - new_sctlr.nmfi = (bool)sctlr.nmfi; - miscRegs[MISCREG_SCTLR] = (MiscReg)new_sctlr; + new_sctlr.nmfi = ((bool)sctlr.nmfi) && !haveVirtualization; + miscRegs[sctlr_idx] = (MiscReg)new_sctlr; tc->getITBPtr()->invalidateMiscReg(); tc->getDTBPtr()->invalidateMiscReg(); @@ -440,6 +1086,7 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) sys = tc->getSystemPtr(); for (x = 0; x < sys->numContexts(); x++) { oc = sys->getThreadContext(x); + // @todo: double check this for security SCTLR other_sctlr = oc->readMiscRegNoEffect(MISCREG_SCTLR); if (!other_sctlr.c && oc->status() != ThreadContext::Halted) return; @@ -479,96 +1126,317 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) case MISCREG_TLBTR: case MISCREG_MVFR0: case MISCREG_MVFR1: + + case MISCREG_ID_AA64AFR0_EL1: + case MISCREG_ID_AA64AFR1_EL1: + case MISCREG_ID_AA64DFR0_EL1: + case MISCREG_ID_AA64DFR1_EL1: + case MISCREG_ID_AA64ISAR0_EL1: + case MISCREG_ID_AA64ISAR1_EL1: + case MISCREG_ID_AA64MMFR0_EL1: + case MISCREG_ID_AA64MMFR1_EL1: + case MISCREG_ID_AA64PFR0_EL1: + case MISCREG_ID_AA64PFR1_EL1: // ID registers are constants. return; + // TLBI all entries, EL0&1 inner sharable (ignored) case MISCREG_TLBIALLIS: - case MISCREG_TLBIALL: + case MISCREG_TLBIALL: // TLBI all entries, EL0&1, + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; sys = tc->getSystemPtr(); for (x = 0; x < sys->numContexts(); x++) { oc = sys->getThreadContext(x); assert(oc->getITBPtr() && oc->getDTBPtr()); - oc->getITBPtr()->flushAll(); - oc->getDTBPtr()->flushAll(); + oc->getITBPtr()->flushAllSecurity(secure_lookup, target_el); + oc->getDTBPtr()->flushAllSecurity(secure_lookup, target_el); // If CheckerCPU is connected, need to notify it of a flush CheckerCPU *checker = oc->getCheckerCpuPtr(); if (checker) { - checker->getITBPtr()->flushAll(); - checker->getDTBPtr()->flushAll(); + checker->getITBPtr()->flushAllSecurity(secure_lookup, + target_el); + checker->getDTBPtr()->flushAllSecurity(secure_lookup, + target_el); } } return; + // TLBI all entries, EL0&1, instruction side case MISCREG_ITLBIALL: - tc->getITBPtr()->flushAll(); + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tc->getITBPtr()->flushAllSecurity(secure_lookup, target_el); return; + // TLBI all entries, EL0&1, data side case MISCREG_DTLBIALL: - tc->getDTBPtr()->flushAll(); + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tc->getDTBPtr()->flushAllSecurity(secure_lookup, target_el); return; + // TLBI based on VA, EL0&1 inner sharable (ignored) case MISCREG_TLBIMVAIS: case MISCREG_TLBIMVA: + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; sys = tc->getSystemPtr(); for (x = 0; x < sys->numContexts(); x++) { oc = sys->getThreadContext(x); assert(oc->getITBPtr() && oc->getDTBPtr()); oc->getITBPtr()->flushMvaAsid(mbits(newVal, 31, 12), - bits(newVal, 7,0)); + bits(newVal, 7,0), + secure_lookup, target_el); oc->getDTBPtr()->flushMvaAsid(mbits(newVal, 31, 12), - bits(newVal, 7,0)); + bits(newVal, 7,0), + secure_lookup, target_el); CheckerCPU *checker = oc->getCheckerCpuPtr(); if (checker) { checker->getITBPtr()->flushMvaAsid(mbits(newVal, 31, 12), - bits(newVal, 7,0)); + bits(newVal, 7,0), secure_lookup, target_el); checker->getDTBPtr()->flushMvaAsid(mbits(newVal, 31, 12), - bits(newVal, 7,0)); + bits(newVal, 7,0), secure_lookup, target_el); } } return; + // TLBI by ASID, EL0&1, inner sharable case MISCREG_TLBIASIDIS: case MISCREG_TLBIASID: + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; sys = tc->getSystemPtr(); for (x = 0; x < sys->numContexts(); x++) { oc = sys->getThreadContext(x); assert(oc->getITBPtr() && oc->getDTBPtr()); - oc->getITBPtr()->flushAsid(bits(newVal, 7,0)); - oc->getDTBPtr()->flushAsid(bits(newVal, 7,0)); + oc->getITBPtr()->flushAsid(bits(newVal, 7,0), + secure_lookup, target_el); + oc->getDTBPtr()->flushAsid(bits(newVal, 7,0), + secure_lookup, target_el); CheckerCPU *checker = oc->getCheckerCpuPtr(); if (checker) { - checker->getITBPtr()->flushAsid(bits(newVal, 7,0)); - checker->getDTBPtr()->flushAsid(bits(newVal, 7,0)); + checker->getITBPtr()->flushAsid(bits(newVal, 7,0), + secure_lookup, target_el); + checker->getDTBPtr()->flushAsid(bits(newVal, 7,0), + secure_lookup, target_el); } } return; + // TLBI by address, EL0&1, inner sharable (ignored) case MISCREG_TLBIMVAAIS: case MISCREG_TLBIMVAA: - sys = tc->getSystemPtr(); - for (x = 0; x < sys->numContexts(); x++) { - oc = sys->getThreadContext(x); - assert(oc->getITBPtr() && oc->getDTBPtr()); - oc->getITBPtr()->flushMva(mbits(newVal, 31,12)); - oc->getDTBPtr()->flushMva(mbits(newVal, 31,12)); - - CheckerCPU *checker = oc->getCheckerCpuPtr(); - if (checker) { - checker->getITBPtr()->flushMva(mbits(newVal, 31,12)); - checker->getDTBPtr()->flushMva(mbits(newVal, 31,12)); - } - } + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + hyp = 0; + tlbiMVA(tc, newVal, secure_lookup, hyp, target_el); + return; + // TLBI by address, EL2, hypervisor mode + case MISCREG_TLBIMVAH: + case MISCREG_TLBIMVAHIS: + assert32(tc); + target_el = 1; // aarch32, use hyp bit + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + hyp = 1; + tlbiMVA(tc, newVal, secure_lookup, hyp, target_el); return; + // TLBI by address and asid, EL0&1, instruction side only case MISCREG_ITLBIMVA: + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; tc->getITBPtr()->flushMvaAsid(mbits(newVal, 31, 12), - bits(newVal, 7,0)); + bits(newVal, 7,0), secure_lookup, target_el); return; + // TLBI by address and asid, EL0&1, data side only case MISCREG_DTLBIMVA: + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; tc->getDTBPtr()->flushMvaAsid(mbits(newVal, 31, 12), - bits(newVal, 7,0)); + bits(newVal, 7,0), secure_lookup, target_el); return; + // TLBI by ASID, EL0&1, instrution side only case MISCREG_ITLBIASID: - tc->getITBPtr()->flushAsid(bits(newVal, 7,0)); + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tc->getITBPtr()->flushAsid(bits(newVal, 7,0), secure_lookup, + target_el); return; + // TLBI by ASID EL0&1 data size only case MISCREG_DTLBIASID: - tc->getDTBPtr()->flushAsid(bits(newVal, 7,0)); + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tc->getDTBPtr()->flushAsid(bits(newVal, 7,0), secure_lookup, + target_el); + return; + // Invalidate entire Non-secure Hyp/Non-Hyp Unified TLB + case MISCREG_TLBIALLNSNH: + case MISCREG_TLBIALLNSNHIS: + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + hyp = 0; + tlbiALLN(tc, hyp, target_el); + return; + // TLBI all entries, EL2, hyp, + case MISCREG_TLBIALLH: + case MISCREG_TLBIALLHIS: + assert32(tc); + target_el = 1; // aarch32, use hyp bit + hyp = 1; + tlbiALLN(tc, hyp, target_el); + return; + // AArch64 TLBI: invalidate all entries EL3 + case MISCREG_TLBI_ALLE3IS: + case MISCREG_TLBI_ALLE3: + assert64(tc); + target_el = 3; + secure_lookup = true; + tlbiALL(tc, secure_lookup, target_el); + return; + // @todo: uncomment this to enable Virtualization + // case MISCREG_TLBI_ALLE2IS: + // case MISCREG_TLBI_ALLE2: + // TLBI all entries, EL0&1 + case MISCREG_TLBI_ALLE1IS: + case MISCREG_TLBI_ALLE1: + // AArch64 TLBI: invalidate all entries, stage 1, current VMID + case MISCREG_TLBI_VMALLE1IS: + case MISCREG_TLBI_VMALLE1: + // AArch64 TLBI: invalidate all entries, stages 1 & 2, current VMID + case MISCREG_TLBI_VMALLS12E1IS: + case MISCREG_TLBI_VMALLS12E1: + // @todo: handle VMID and stage 2 to enable Virtualization + assert64(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tlbiALL(tc, secure_lookup, target_el); + return; + // AArch64 TLBI: invalidate by VA and ASID, stage 1, current VMID + // VAEx(IS) and VALEx(IS) are the same because TLBs only store entries + // from the last level of translation table walks + // @todo: handle VMID to enable Virtualization + // TLBI all entries, EL0&1 + case MISCREG_TLBI_VAE3IS_Xt: + case MISCREG_TLBI_VAE3_Xt: + // TLBI by VA, EL3 regime stage 1, last level walk + case MISCREG_TLBI_VALE3IS_Xt: + case MISCREG_TLBI_VALE3_Xt: + assert64(tc); + target_el = 3; + asid = 0xbeef; // does not matter, tlbi is global + secure_lookup = true; + tlbiVA(tc, newVal, asid, secure_lookup, target_el); + return; + // TLBI by VA, EL2 + case MISCREG_TLBI_VAE2IS_Xt: + case MISCREG_TLBI_VAE2_Xt: + // TLBI by VA, EL2, stage1 last level walk + case MISCREG_TLBI_VALE2IS_Xt: + case MISCREG_TLBI_VALE2_Xt: + assert64(tc); + target_el = 2; + asid = 0xbeef; // does not matter, tlbi is global + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tlbiVA(tc, newVal, asid, secure_lookup, target_el); + return; + // TLBI by VA EL1 & 0, stage1, ASID, current VMID + case MISCREG_TLBI_VAE1IS_Xt: + case MISCREG_TLBI_VAE1_Xt: + case MISCREG_TLBI_VALE1IS_Xt: + case MISCREG_TLBI_VALE1_Xt: + assert64(tc); + asid = bits(newVal, 63, 48); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tlbiVA(tc, newVal, asid, secure_lookup, target_el); + return; + // AArch64 TLBI: invalidate by ASID, stage 1, current VMID + // @todo: handle VMID to enable Virtualization + case MISCREG_TLBI_ASIDE1IS_Xt: + case MISCREG_TLBI_ASIDE1_Xt: + assert64(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + sys = tc->getSystemPtr(); + for (x = 0; x < sys->numContexts(); x++) { + oc = sys->getThreadContext(x); + assert(oc->getITBPtr() && oc->getDTBPtr()); + asid = bits(newVal, 63, 48); + if (haveLargeAsid64) + asid &= mask(8); + oc->getITBPtr()->flushAsid(asid, secure_lookup, target_el); + oc->getDTBPtr()->flushAsid(asid, secure_lookup, target_el); + CheckerCPU *checker = oc->getCheckerCpuPtr(); + if (checker) { + checker->getITBPtr()->flushAsid(asid, + secure_lookup, target_el); + checker->getDTBPtr()->flushAsid(asid, + secure_lookup, target_el); + } + } + return; + // AArch64 TLBI: invalidate by VA, ASID, stage 1, current VMID + // VAAE1(IS) and VAALE1(IS) are the same because TLBs only store + // entries from the last level of translation table walks + // @todo: handle VMID to enable Virtualization + case MISCREG_TLBI_VAAE1IS_Xt: + case MISCREG_TLBI_VAAE1_Xt: + case MISCREG_TLBI_VAALE1IS_Xt: + case MISCREG_TLBI_VAALE1_Xt: + assert64(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + sys = tc->getSystemPtr(); + for (x = 0; x < sys->numContexts(); x++) { + // @todo: extra controls on TLBI broadcast? + oc = sys->getThreadContext(x); + assert(oc->getITBPtr() && oc->getDTBPtr()); + Addr va = ((Addr) bits(newVal, 43, 0)) << 12; + oc->getITBPtr()->flushMva(va, + secure_lookup, false, target_el); + oc->getDTBPtr()->flushMva(va, + secure_lookup, false, target_el); + + CheckerCPU *checker = oc->getCheckerCpuPtr(); + if (checker) { + checker->getITBPtr()->flushMva(va, + secure_lookup, false, target_el); + checker->getDTBPtr()->flushMva(va, + secure_lookup, false, target_el); + } + } + return; + // AArch64 TLBI: invalidate by IPA, stage 2, current VMID + case MISCREG_TLBI_IPAS2LE1IS_Xt: + case MISCREG_TLBI_IPAS2LE1_Xt: + case MISCREG_TLBI_IPAS2E1IS_Xt: + case MISCREG_TLBI_IPAS2E1_Xt: + assert64(tc); + // @todo: implement these as part of Virtualization + warn("Not doing anything for write of miscreg ITLB_IPAS2\n"); return; case MISCREG_ACTLR: warn("Not doing anything for write of miscreg ACTLR\n"); @@ -591,77 +1459,566 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) warn("Not doing anything for write to miscreg %s\n", miscRegName[misc_reg]); break; - case MISCREG_V2PCWPR: - case MISCREG_V2PCWPW: - case MISCREG_V2PCWUR: - case MISCREG_V2PCWUW: - case MISCREG_V2POWPR: - case MISCREG_V2POWPW: - case MISCREG_V2POWUR: - case MISCREG_V2POWUW: + case MISCREG_HSTR: // TJDBX, now redifined to be RES0 + { + HSTR hstrMask = 0; + hstrMask.tjdbx = 1; + newVal &= ~((uint32_t) hstrMask); + break; + } + case MISCREG_HCPTR: + { + // If a CP bit in NSACR is 0 then the corresponding bit in + // HCPTR is RAO/WI. Same applies to NSASEDIS + secure_lookup = haveSecurity && + inSecureState(readMiscRegNoEffect(MISCREG_SCR), + readMiscRegNoEffect(MISCREG_CPSR)); + if (!secure_lookup) { + MiscReg oldValue = readMiscRegNoEffect(MISCREG_HCPTR); + MiscReg mask = (readMiscRegNoEffect(MISCREG_NSACR) ^ 0x7FFF) & 0xBFFF; + newVal = (newVal & ~mask) | (oldValue & mask); + } + break; + } + case MISCREG_HDFAR: // alias for secure DFAR + misc_reg = MISCREG_DFAR_S; + break; + case MISCREG_HIFAR: // alias for secure IFAR + misc_reg = MISCREG_IFAR_S; + break; + case MISCREG_ATS1CPR: + case MISCREG_ATS1CPW: + case MISCREG_ATS1CUR: + case MISCREG_ATS1CUW: + case MISCREG_ATS12NSOPR: + case MISCREG_ATS12NSOPW: + case MISCREG_ATS12NSOUR: + case MISCREG_ATS12NSOUW: + case MISCREG_ATS1HR: + case MISCREG_ATS1HW: { RequestPtr req = new Request; - unsigned flags; - BaseTLB::Mode mode; + unsigned flags = 0; + BaseTLB::Mode mode = BaseTLB::Read; + TLB::ArmTranslationType tranType = TLB::NormalTran; Fault fault; switch(misc_reg) { - case MISCREG_V2PCWPR: - flags = TLB::MustBeOne; - mode = BaseTLB::Read; - break; - case MISCREG_V2PCWPW: - flags = TLB::MustBeOne; - mode = BaseTLB::Write; - break; - case MISCREG_V2PCWUR: - flags = TLB::MustBeOne | TLB::UserMode; - mode = BaseTLB::Read; - break; - case MISCREG_V2PCWUW: - flags = TLB::MustBeOne | TLB::UserMode; - mode = BaseTLB::Write; - break; - default: - panic("Security Extensions not implemented!"); + case MISCREG_ATS1CPR: + flags = TLB::MustBeOne; + tranType = TLB::S1CTran; + mode = BaseTLB::Read; + break; + case MISCREG_ATS1CPW: + flags = TLB::MustBeOne; + tranType = TLB::S1CTran; + mode = BaseTLB::Write; + break; + case MISCREG_ATS1CUR: + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1CTran; + mode = BaseTLB::Read; + break; + case MISCREG_ATS1CUW: + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1CTran; + mode = BaseTLB::Write; + break; + case MISCREG_ATS12NSOPR: + if (!haveSecurity) + panic("Security Extensions required for ATS12NSOPR"); + flags = TLB::MustBeOne; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Read; + break; + case MISCREG_ATS12NSOPW: + if (!haveSecurity) + panic("Security Extensions required for ATS12NSOPW"); + flags = TLB::MustBeOne; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Write; + break; + case MISCREG_ATS12NSOUR: + if (!haveSecurity) + panic("Security Extensions required for ATS12NSOUR"); + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Read; + break; + case MISCREG_ATS12NSOUW: + if (!haveSecurity) + panic("Security Extensions required for ATS12NSOUW"); + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Write; + break; + case MISCREG_ATS1HR: // only really useful from secure mode. + flags = TLB::MustBeOne; + tranType = TLB::HypMode; + mode = BaseTLB::Read; + break; + case MISCREG_ATS1HW: + flags = TLB::MustBeOne; + tranType = TLB::HypMode; + mode = BaseTLB::Write; + break; } - warn("Translating via MISCREG in atomic mode! Fix Me!\n"); - req->setVirt(0, val, 1, flags, tc->pcState().pc(), - Request::funcMasterId); - fault = tc->getDTBPtr()->translateAtomic(req, tc, mode); + // If we're in timing mode then doing the translation in + // functional mode then we're slightly distorting performance + // results obtained from simulations. The translation should be + // done in the same mode the core is running in. NOTE: This + // can't be an atomic translation because that causes problems + // with unexpected atomic snoop requests. + warn("Translating via MISCREG(%d) in functional mode! Fix Me!\n", misc_reg); + req->setVirt(0, val, 1, flags, Request::funcMasterId, + tc->pcState().pc()); + req->setThreadContext(tc->contextId(), tc->threadId()); + fault = tc->getDTBPtr()->translateFunctional(req, tc, mode, tranType); + TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR); + HCR hcr = readMiscRegNoEffect(MISCREG_HCR); + + MiscReg newVal; if (fault == NoFault) { - miscRegs[MISCREG_PAR] = - (req->getPaddr() & 0xfffff000) | - (tc->getDTBPtr()->getAttr() ); + Addr paddr = req->getPaddr(); + if (haveLPAE && (ttbcr.eae || tranType & TLB::HypMode || + ((tranType & TLB::S1S2NsTran) && hcr.vm) )) { + newVal = (paddr & mask(39, 12)) | + (tc->getDTBPtr()->getAttr()); + } else { + newVal = (paddr & 0xfffff000) | + (tc->getDTBPtr()->getAttr()); + } DPRINTF(MiscRegs, "MISCREG: Translated addr 0x%08x: PAR: 0x%08x\n", - val, miscRegs[MISCREG_PAR]); - } - else { + val, newVal); + } else { + ArmFault *armFault = reinterpret_cast(fault.get()); // Set fault bit and FSR - FSR fsr = miscRegs[MISCREG_DFSR]; - miscRegs[MISCREG_PAR] = - (fsr.ext << 6) | - (fsr.fsHigh << 5) | - (fsr.fsLow << 1) | - 0x1; // F bit + FSR fsr = armFault->getFsr(tc); + + newVal = ((fsr >> 9) & 1) << 11; + if (newVal) { + // LPAE - rearange fault status + newVal |= ((fsr >> 0) & 0x3f) << 1; + } else { + // VMSA - rearange fault status + newVal |= ((fsr >> 0) & 0xf) << 1; + newVal |= ((fsr >> 10) & 0x1) << 5; + newVal |= ((fsr >> 12) & 0x1) << 6; + } + newVal |= 0x1; // F bit + newVal |= ((armFault->iss() >> 7) & 0x1) << 8; + newVal |= armFault->isStage2() ? 0x200 : 0; + DPRINTF(MiscRegs, + "MISCREG: Translated addr 0x%08x fault fsr %#x: PAR: 0x%08x\n", + val, fsr, newVal); } + delete req; + setMiscRegNoEffect(MISCREG_PAR, newVal); return; } + case MISCREG_TTBCR: + { + TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR); + const uint32_t ones = (uint32_t)(-1); + TTBCR ttbcrMask = 0; + TTBCR ttbcrNew = newVal; + + // ARM DDI 0406C.b, ARMv7-32 + ttbcrMask.n = ones; // T0SZ + if (haveSecurity) { + ttbcrMask.pd0 = ones; + ttbcrMask.pd1 = ones; + } + ttbcrMask.epd0 = ones; + ttbcrMask.irgn0 = ones; + ttbcrMask.orgn0 = ones; + ttbcrMask.sh0 = ones; + ttbcrMask.ps = ones; // T1SZ + ttbcrMask.a1 = ones; + ttbcrMask.epd1 = ones; + ttbcrMask.irgn1 = ones; + ttbcrMask.orgn1 = ones; + ttbcrMask.sh1 = ones; + if (haveLPAE) + ttbcrMask.eae = ones; + + if (haveLPAE && ttbcrNew.eae) { + newVal = newVal & ttbcrMask; + } else { + newVal = (newVal & ttbcrMask) | (ttbcr & (~ttbcrMask)); + } + } + case MISCREG_TTBR0: + case MISCREG_TTBR1: + { + TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR); + if (haveLPAE) { + if (ttbcr.eae) { + // ARMv7 bit 63-56, 47-40 reserved, UNK/SBZP + // ARMv8 AArch32 bit 63-56 only + uint64_t ttbrMask = mask(63,56) | mask(47,40); + newVal = (newVal & (~ttbrMask)); + } + } + } case MISCREG_CONTEXTIDR: case MISCREG_PRRR: case MISCREG_NMRR: + case MISCREG_MAIR0: + case MISCREG_MAIR1: case MISCREG_DACR: + case MISCREG_VTTBR: + case MISCREG_SCR_EL3: + case MISCREG_SCTLR_EL1: + case MISCREG_SCTLR_EL2: + case MISCREG_SCTLR_EL3: + case MISCREG_TCR_EL1: + case MISCREG_TCR_EL2: + case MISCREG_TCR_EL3: + case MISCREG_TTBR0_EL1: + case MISCREG_TTBR1_EL1: + case MISCREG_TTBR0_EL2: + case MISCREG_TTBR0_EL3: tc->getITBPtr()->invalidateMiscReg(); tc->getDTBPtr()->invalidateMiscReg(); break; + case MISCREG_NZCV: + { + CPSR cpsr = val; + + tc->setIntReg(INTREG_CONDCODES_NZ, cpsr.nz); + tc->setIntReg(INTREG_CONDCODES_C, cpsr.c); + tc->setIntReg(INTREG_CONDCODES_V, cpsr.v); + } + break; + case MISCREG_DAIF: + { + CPSR cpsr = miscRegs[MISCREG_CPSR]; + cpsr.daif = (uint8_t) ((CPSR) newVal).daif; + newVal = cpsr; + misc_reg = MISCREG_CPSR; + } + break; + case MISCREG_SP_EL0: + tc->setIntReg(INTREG_SP0, newVal); + break; + case MISCREG_SP_EL1: + tc->setIntReg(INTREG_SP1, newVal); + break; + case MISCREG_SP_EL2: + tc->setIntReg(INTREG_SP2, newVal); + break; + case MISCREG_SPSEL: + { + CPSR cpsr = miscRegs[MISCREG_CPSR]; + cpsr.sp = (uint8_t) ((CPSR) newVal).sp; + newVal = cpsr; + misc_reg = MISCREG_CPSR; + } + break; + case MISCREG_CURRENTEL: + { + CPSR cpsr = miscRegs[MISCREG_CPSR]; + cpsr.el = (uint8_t) ((CPSR) newVal).el; + newVal = cpsr; + misc_reg = MISCREG_CPSR; + } + break; + case MISCREG_AT_S1E1R_Xt: + case MISCREG_AT_S1E1W_Xt: + case MISCREG_AT_S1E0R_Xt: + case MISCREG_AT_S1E0W_Xt: + case MISCREG_AT_S1E2R_Xt: + case MISCREG_AT_S1E2W_Xt: + case MISCREG_AT_S12E1R_Xt: + case MISCREG_AT_S12E1W_Xt: + case MISCREG_AT_S12E0R_Xt: + case MISCREG_AT_S12E0W_Xt: + case MISCREG_AT_S1E3R_Xt: + case MISCREG_AT_S1E3W_Xt: + { + RequestPtr req = new Request; + unsigned flags = 0; + BaseTLB::Mode mode = BaseTLB::Read; + TLB::ArmTranslationType tranType = TLB::NormalTran; + Fault fault; + switch(misc_reg) { + case MISCREG_AT_S1E1R_Xt: + flags = TLB::MustBeOne; + tranType = TLB::S1CTran; + mode = BaseTLB::Read; + break; + case MISCREG_AT_S1E1W_Xt: + flags = TLB::MustBeOne; + tranType = TLB::S1CTran; + mode = BaseTLB::Write; + break; + case MISCREG_AT_S1E0R_Xt: + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1CTran; + mode = BaseTLB::Read; + break; + case MISCREG_AT_S1E0W_Xt: + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1CTran; + mode = BaseTLB::Write; + break; + case MISCREG_AT_S1E2R_Xt: + flags = TLB::MustBeOne; + tranType = TLB::HypMode; + mode = BaseTLB::Read; + break; + case MISCREG_AT_S1E2W_Xt: + flags = TLB::MustBeOne; + tranType = TLB::HypMode; + mode = BaseTLB::Write; + break; + case MISCREG_AT_S12E0R_Xt: + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Read; + break; + case MISCREG_AT_S12E0W_Xt: + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Write; + break; + case MISCREG_AT_S12E1R_Xt: + flags = TLB::MustBeOne; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Read; + break; + case MISCREG_AT_S12E1W_Xt: + flags = TLB::MustBeOne; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Write; + break; + case MISCREG_AT_S1E3R_Xt: + flags = TLB::MustBeOne; + tranType = TLB::HypMode; // There is no TZ mode defined. + mode = BaseTLB::Read; + break; + case MISCREG_AT_S1E3W_Xt: + flags = TLB::MustBeOne; + tranType = TLB::HypMode; // There is no TZ mode defined. + mode = BaseTLB::Write; + break; + } + // If we're in timing mode then doing the translation in + // functional mode then we're slightly distorting performance + // results obtained from simulations. The translation should be + // done in the same mode the core is running in. NOTE: This + // can't be an atomic translation because that causes problems + // with unexpected atomic snoop requests. + warn("Translating via MISCREG(%d) in functional mode! Fix Me!\n", misc_reg); + req->setVirt(0, val, 1, flags, Request::funcMasterId, + tc->pcState().pc()); + req->setThreadContext(tc->contextId(), tc->threadId()); + fault = tc->getDTBPtr()->translateFunctional(req, tc, mode, + tranType); + + MiscReg newVal; + if (fault == NoFault) { + Addr paddr = req->getPaddr(); + uint64_t attr = tc->getDTBPtr()->getAttr(); + uint64_t attr1 = attr >> 56; + if (!attr1 || attr1 ==0x44) { + attr |= 0x100; + attr &= ~ uint64_t(0x80); + } + newVal = (paddr & mask(47, 12)) | attr; + DPRINTF(MiscRegs, + "MISCREG: Translated addr %#x: PAR_EL1: %#xx\n", + val, newVal); + } else { + ArmFault *armFault = reinterpret_cast(fault.get()); + // Set fault bit and FSR + FSR fsr = armFault->getFsr(tc); + + newVal = ((fsr >> 9) & 1) << 11; + // rearange fault status + newVal |= ((fsr >> 0) & 0x3f) << 1; + newVal |= 0x1; // F bit + newVal |= ((armFault->iss() >> 7) & 0x1) << 8; + newVal |= armFault->isStage2() ? 0x200 : 0; + DPRINTF(MiscRegs, + "MISCREG: Translated addr %#x fault fsr %#x: PAR: %#x\n", + val, fsr, newVal); + } + delete req; + setMiscRegNoEffect(MISCREG_PAR_EL1, newVal); + return; + } + case MISCREG_SPSR_EL3: + case MISCREG_SPSR_EL2: + case MISCREG_SPSR_EL1: + // Force bits 23:21 to 0 + newVal = val & ~(0x7 << 21); + break; case MISCREG_L2CTLR: warn("miscreg L2CTLR (%s) written with %#x. ignored...\n", miscRegName[misc_reg], uint32_t(val)); + break; + + // Generic Timer registers + case MISCREG_CNTFRQ: + case MISCREG_CNTFRQ_EL0: + getSystemCounter(tc)->setFreq(val); + break; + case MISCREG_CNTP_CVAL: + case MISCREG_CNTP_CVAL_EL0: + getArchTimer(tc, tc->cpuId())->setCompareValue(val); + break; + case MISCREG_CNTP_TVAL: + case MISCREG_CNTP_TVAL_EL0: + getArchTimer(tc, tc->cpuId())->setTimerValue(val); + break; + case MISCREG_CNTP_CTL: + case MISCREG_CNTP_CTL_EL0: + getArchTimer(tc, tc->cpuId())->setControl(val); + break; + // PL1 phys. timer, secure + // AArch64 + case MISCREG_CNTPS_CVAL_EL1: + case MISCREG_CNTPS_TVAL_EL1: + case MISCREG_CNTPS_CTL_EL1: + // PL2 phys. timer, non-secure + // AArch32 + case MISCREG_CNTHCTL: + case MISCREG_CNTHP_CVAL: + case MISCREG_CNTHP_TVAL: + case MISCREG_CNTHP_CTL: + // AArch64 + case MISCREG_CNTHCTL_EL2: + case MISCREG_CNTHP_CVAL_EL2: + case MISCREG_CNTHP_TVAL_EL2: + case MISCREG_CNTHP_CTL_EL2: + // Virtual timer + // AArch32 + case MISCREG_CNTV_CVAL: + case MISCREG_CNTV_TVAL: + case MISCREG_CNTV_CTL: + // AArch64 + // case MISCREG_CNTV_CVAL_EL2: + // case MISCREG_CNTV_TVAL_EL2: + // case MISCREG_CNTV_CTL_EL2: + panic("Generic Timer register not implemented\n"); + break; } } setMiscRegNoEffect(misc_reg, newVal); } +void +ISA::tlbiVA(ThreadContext *tc, MiscReg newVal, uint8_t asid, bool secure_lookup, + uint8_t target_el) +{ + if (haveLargeAsid64) + asid &= mask(8); + Addr va = ((Addr) bits(newVal, 43, 0)) << 12; + System *sys = tc->getSystemPtr(); + for (int x = 0; x < sys->numContexts(); x++) { + ThreadContext *oc = sys->getThreadContext(x); + assert(oc->getITBPtr() && oc->getDTBPtr()); + oc->getITBPtr()->flushMvaAsid(va, asid, + secure_lookup, target_el); + oc->getDTBPtr()->flushMvaAsid(va, asid, + secure_lookup, target_el); + + CheckerCPU *checker = oc->getCheckerCpuPtr(); + if (checker) { + checker->getITBPtr()->flushMvaAsid( + va, asid, secure_lookup, target_el); + checker->getDTBPtr()->flushMvaAsid( + va, asid, secure_lookup, target_el); + } + } +} + +void +ISA::tlbiALL(ThreadContext *tc, bool secure_lookup, uint8_t target_el) +{ + System *sys = tc->getSystemPtr(); + for (int x = 0; x < sys->numContexts(); x++) { + ThreadContext *oc = sys->getThreadContext(x); + assert(oc->getITBPtr() && oc->getDTBPtr()); + oc->getITBPtr()->flushAllSecurity(secure_lookup, target_el); + oc->getDTBPtr()->flushAllSecurity(secure_lookup, target_el); + + // If CheckerCPU is connected, need to notify it of a flush + CheckerCPU *checker = oc->getCheckerCpuPtr(); + if (checker) { + checker->getITBPtr()->flushAllSecurity(secure_lookup, + target_el); + checker->getDTBPtr()->flushAllSecurity(secure_lookup, + target_el); + } + } +} + +void +ISA::tlbiALLN(ThreadContext *tc, bool hyp, uint8_t target_el) +{ + System *sys = tc->getSystemPtr(); + for (int x = 0; x < sys->numContexts(); x++) { + ThreadContext *oc = sys->getThreadContext(x); + assert(oc->getITBPtr() && oc->getDTBPtr()); + oc->getITBPtr()->flushAllNs(hyp, target_el); + oc->getDTBPtr()->flushAllNs(hyp, target_el); + + CheckerCPU *checker = oc->getCheckerCpuPtr(); + if (checker) { + checker->getITBPtr()->flushAllNs(hyp, target_el); + checker->getDTBPtr()->flushAllNs(hyp, target_el); + } + } +} + +void +ISA::tlbiMVA(ThreadContext *tc, MiscReg newVal, bool secure_lookup, bool hyp, + uint8_t target_el) +{ + System *sys = tc->getSystemPtr(); + for (int x = 0; x < sys->numContexts(); x++) { + ThreadContext *oc = sys->getThreadContext(x); + assert(oc->getITBPtr() && oc->getDTBPtr()); + oc->getITBPtr()->flushMva(mbits(newVal, 31,12), + secure_lookup, hyp, target_el); + oc->getDTBPtr()->flushMva(mbits(newVal, 31,12), + secure_lookup, hyp, target_el); + + CheckerCPU *checker = oc->getCheckerCpuPtr(); + if (checker) { + checker->getITBPtr()->flushMva(mbits(newVal, 31,12), + secure_lookup, hyp, target_el); + checker->getDTBPtr()->flushMva(mbits(newVal, 31,12), + secure_lookup, hyp, target_el); + } + } +} + +::GenericTimer::SystemCounter * +ISA::getSystemCounter(ThreadContext *tc) +{ + ::GenericTimer::SystemCounter *cnt = ((ArmSystem *) tc->getSystemPtr())-> + getSystemCounter(); + if (cnt == NULL) { + panic("System counter not available\n"); + } + return cnt; +} + +::GenericTimer::ArchTimer * +ISA::getArchTimer(ThreadContext *tc, int cpu_id) +{ + ::GenericTimer::ArchTimer *timer = ((ArmSystem *) tc->getSystemPtr())-> + getArchTimer(cpu_id); + if (timer == NULL) { + panic("Architected timer not available\n"); + } + return timer; +} + } ArmISA::ISA * diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh index c747fc770..c72d5d50f 100644 --- a/src/arch/arm/isa.hh +++ b/src/arch/arm/isa.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -44,9 +44,11 @@ #define __ARCH_ARM_ISA_HH__ #include "arch/arm/registers.hh" +#include "arch/arm/system.hh" #include "arch/arm/tlb.hh" #include "arch/arm/types.hh" #include "debug/Checkpoint.hh" +#include "dev/arm/generic_timer.hh" #include "sim/sim_object.hh" struct ArmISAParams; @@ -56,45 +58,174 @@ class EventManager; namespace ArmISA { + + /** + * At the moment there are 57 registers which need to be aliased/ + * translated with other registers in the ISA. This enum helps with that + * translation. + */ + enum translateTable { + miscRegTranslateCSSELR_EL1, + miscRegTranslateSCTLR_EL1, + miscRegTranslateSCTLR_EL2, + miscRegTranslateACTLR_EL1, + miscRegTranslateACTLR_EL2, + miscRegTranslateCPACR_EL1, + miscRegTranslateCPTR_EL2, + miscRegTranslateHCR_EL2, + miscRegTranslateMDCR_EL2, + miscRegTranslateHSTR_EL2, + miscRegTranslateHACR_EL2, + miscRegTranslateTTBR0_EL1, + miscRegTranslateTTBR1_EL1, + miscRegTranslateTTBR0_EL2, + miscRegTranslateVTTBR_EL2, + miscRegTranslateTCR_EL1, + miscRegTranslateTCR_EL2, + miscRegTranslateVTCR_EL2, + miscRegTranslateAFSR0_EL1, + miscRegTranslateAFSR1_EL1, + miscRegTranslateAFSR0_EL2, + miscRegTranslateAFSR1_EL2, + miscRegTranslateESR_EL2, + miscRegTranslateFAR_EL1, + miscRegTranslateFAR_EL2, + miscRegTranslateHPFAR_EL2, + miscRegTranslatePAR_EL1, + miscRegTranslateMAIR_EL1, + miscRegTranslateMAIR_EL2, + miscRegTranslateAMAIR_EL1, + miscRegTranslateVBAR_EL1, + miscRegTranslateVBAR_EL2, + miscRegTranslateCONTEXTIDR_EL1, + miscRegTranslateTPIDR_EL0, + miscRegTranslateTPIDRRO_EL0, + miscRegTranslateTPIDR_EL1, + miscRegTranslateTPIDR_EL2, + miscRegTranslateTEECR32_EL1, + miscRegTranslateCNTFRQ_EL0, + miscRegTranslateCNTPCT_EL0, + miscRegTranslateCNTVCT_EL0, + miscRegTranslateCNTVOFF_EL2, + miscRegTranslateCNTKCTL_EL1, + miscRegTranslateCNTHCTL_EL2, + miscRegTranslateCNTP_TVAL_EL0, + miscRegTranslateCNTP_CTL_EL0, + miscRegTranslateCNTP_CVAL_EL0, + miscRegTranslateCNTV_TVAL_EL0, + miscRegTranslateCNTV_CTL_EL0, + miscRegTranslateCNTV_CVAL_EL0, + miscRegTranslateCNTHP_TVAL_EL2, + miscRegTranslateCNTHP_CTL_EL2, + miscRegTranslateCNTHP_CVAL_EL2, + miscRegTranslateDACR32_EL2, + miscRegTranslateIFSR32_EL2, + miscRegTranslateTEEHBR32_EL1, + miscRegTranslateSDER32_EL3, + miscRegTranslateMax + }; + class ISA : public SimObject { protected: + // Parent system + ArmSystem *system; + + // Cached copies of system-level properties + bool haveSecurity; + bool haveLPAE; + bool haveVirtualization; + bool haveLargeAsid64; + uint8_t physAddrRange64; + + /** Register translation entry used in lookUpMiscReg */ + struct MiscRegLUTEntry { + uint32_t lower; + uint32_t upper; + }; + + struct MiscRegInitializerEntry { + uint32_t index; + struct MiscRegLUTEntry entry; + }; + + /** Register table noting all translations */ + static const struct MiscRegInitializerEntry + MiscRegSwitch[miscRegTranslateMax]; + + /** Translation table accessible via the value of the register */ + std::vector lookUpMiscReg; + MiscReg miscRegs[NumMiscRegs]; const IntRegIndex *intRegMap; void updateRegMap(CPSR cpsr) { - switch (cpsr.mode) { - case MODE_USER: - case MODE_SYSTEM: - intRegMap = IntRegUsrMap; - break; - case MODE_FIQ: - intRegMap = IntRegFiqMap; - break; - case MODE_IRQ: - intRegMap = IntRegIrqMap; - break; - case MODE_SVC: - intRegMap = IntRegSvcMap; - break; - case MODE_MON: - intRegMap = IntRegMonMap; - break; - case MODE_ABORT: - intRegMap = IntRegAbtMap; - break; - case MODE_UNDEFINED: - intRegMap = IntRegUndMap; - break; - default: - panic("Unrecognized mode setting in CPSR.\n"); + if (cpsr.width == 0) { + intRegMap = IntReg64Map; + } else { + switch (cpsr.mode) { + case MODE_USER: + case MODE_SYSTEM: + intRegMap = IntRegUsrMap; + break; + case MODE_FIQ: + intRegMap = IntRegFiqMap; + break; + case MODE_IRQ: + intRegMap = IntRegIrqMap; + break; + case MODE_SVC: + intRegMap = IntRegSvcMap; + break; + case MODE_MON: + intRegMap = IntRegMonMap; + break; + case MODE_ABORT: + intRegMap = IntRegAbtMap; + break; + case MODE_HYP: + intRegMap = IntRegHypMap; + break; + case MODE_UNDEFINED: + intRegMap = IntRegUndMap; + break; + default: + panic("Unrecognized mode setting in CPSR.\n"); + } } } + ::GenericTimer::SystemCounter * getSystemCounter(ThreadContext *tc); + ::GenericTimer::ArchTimer * getArchTimer(ThreadContext *tc, + int cpu_id); + + + private: + inline void assert32(ThreadContext *tc) { + CPSR cpsr M5_VAR_USED = readMiscReg(MISCREG_CPSR, tc); + assert(cpsr.width); + } + + inline void assert64(ThreadContext *tc) { + CPSR cpsr M5_VAR_USED = readMiscReg(MISCREG_CPSR, tc); + assert(!cpsr.width); + } + + void tlbiVA(ThreadContext *tc, MiscReg newVal, uint8_t asid, + bool secure_lookup, uint8_t target_el); + + void tlbiALL(ThreadContext *tc, bool secure_lookup, uint8_t target_el); + + void tlbiALLN(ThreadContext *tc, bool hyp, uint8_t target_el); + + void tlbiMVA(ThreadContext *tc, MiscReg newVal, bool secure_lookup, + bool hyp, uint8_t target_el); + public: void clear(); + void clear64(const ArmISAParams *p); MiscReg readMiscRegNoEffect(int misc_reg) const; MiscReg readMiscReg(int misc_reg, ThreadContext *tc); @@ -109,28 +240,28 @@ namespace ArmISA return intRegMap[reg]; } else if (reg < NUM_INTREGS) { return reg; - } else { - int mode = reg / intRegsPerMode; - reg = reg % intRegsPerMode; - switch (mode) { - case MODE_USER: - case MODE_SYSTEM: - return INTREG_USR(reg); - case MODE_FIQ: - return INTREG_FIQ(reg); - case MODE_IRQ: - return INTREG_IRQ(reg); - case MODE_SVC: - return INTREG_SVC(reg); - case MODE_MON: - return INTREG_MON(reg); - case MODE_ABORT: - return INTREG_ABT(reg); - case MODE_UNDEFINED: - return INTREG_UND(reg); + } else if (reg == INTREG_SPX) { + CPSR cpsr = miscRegs[MISCREG_CPSR]; + ExceptionLevel el = opModeToEL( + (OperatingMode) (uint8_t) cpsr.mode); + if (!cpsr.sp && el != EL0) + return INTREG_SP0; + switch (el) { + case EL3: + return INTREG_SP3; + // @todo: uncomment this to enable Virtualization + // case EL2: + // return INTREG_SP2; + case EL1: + return INTREG_SP1; + case EL0: + return INTREG_SP0; default: - panic("Flattening into an unknown mode.\n"); + panic("Invalid exception level"); + break; } + } else { + return flattenIntRegModeIndex(reg); } } @@ -150,47 +281,127 @@ namespace ArmISA int flattenMiscIndex(int reg) const { + int flat_idx = reg; + if (reg == MISCREG_SPSR) { - int spsr_idx = NUM_MISCREGS; CPSR cpsr = miscRegs[MISCREG_CPSR]; switch (cpsr.mode) { + case MODE_EL0T: + warn("User mode does not have SPSR\n"); + flat_idx = MISCREG_SPSR; + break; + case MODE_EL1T: + case MODE_EL1H: + flat_idx = MISCREG_SPSR_EL1; + break; + case MODE_EL2T: + case MODE_EL2H: + flat_idx = MISCREG_SPSR_EL2; + break; + case MODE_EL3T: + case MODE_EL3H: + flat_idx = MISCREG_SPSR_EL3; + break; case MODE_USER: warn("User mode does not have SPSR\n"); - spsr_idx = MISCREG_SPSR; + flat_idx = MISCREG_SPSR; break; case MODE_FIQ: - spsr_idx = MISCREG_SPSR_FIQ; + flat_idx = MISCREG_SPSR_FIQ; break; case MODE_IRQ: - spsr_idx = MISCREG_SPSR_IRQ; + flat_idx = MISCREG_SPSR_IRQ; break; case MODE_SVC: - spsr_idx = MISCREG_SPSR_SVC; + flat_idx = MISCREG_SPSR_SVC; break; case MODE_MON: - spsr_idx = MISCREG_SPSR_MON; + flat_idx = MISCREG_SPSR_MON; break; case MODE_ABORT: - spsr_idx = MISCREG_SPSR_ABT; + flat_idx = MISCREG_SPSR_ABT; + break; + case MODE_HYP: + flat_idx = MISCREG_SPSR_HYP; break; case MODE_UNDEFINED: - spsr_idx = MISCREG_SPSR_UND; + flat_idx = MISCREG_SPSR_UND; break; default: warn("Trying to access SPSR in an invalid mode: %d\n", cpsr.mode); - spsr_idx = MISCREG_SPSR; + flat_idx = MISCREG_SPSR; break; } - return spsr_idx; + } else if (miscRegInfo[reg][MISCREG_MUTEX]) { + // Mutually exclusive CP15 register + switch (reg) { + case MISCREG_PRRR_MAIR0: + case MISCREG_PRRR_MAIR0_NS: + case MISCREG_PRRR_MAIR0_S: + { + TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR); + // If the muxed reg has been flattened, work out the + // offset and apply it to the unmuxed reg + int idxOffset = reg - MISCREG_PRRR_MAIR0; + if (ttbcr.eae) + flat_idx = flattenMiscIndex(MISCREG_MAIR0 + + idxOffset); + else + flat_idx = flattenMiscIndex(MISCREG_PRRR + + idxOffset); + } + break; + case MISCREG_NMRR_MAIR1: + case MISCREG_NMRR_MAIR1_NS: + case MISCREG_NMRR_MAIR1_S: + { + TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR); + // If the muxed reg has been flattened, work out the + // offset and apply it to the unmuxed reg + int idxOffset = reg - MISCREG_NMRR_MAIR1; + if (ttbcr.eae) + flat_idx = flattenMiscIndex(MISCREG_MAIR1 + + idxOffset); + else + flat_idx = flattenMiscIndex(MISCREG_NMRR + + idxOffset); + } + break; + case MISCREG_PMXEVTYPER_PMCCFILTR: + { + PMSELR pmselr = miscRegs[MISCREG_PMSELR]; + if (pmselr.sel == 31) + flat_idx = flattenMiscIndex(MISCREG_PMCCFILTR); + else + flat_idx = flattenMiscIndex(MISCREG_PMXEVTYPER); + } + break; + default: + panic("Unrecognized misc. register.\n"); + break; + } + } else { + if (miscRegInfo[reg][MISCREG_BANKED]) { + bool secureReg = haveSecurity && + inSecureState(miscRegs[MISCREG_SCR], + miscRegs[MISCREG_CPSR]); + flat_idx += secureReg ? 2 : 1; + } } - return reg; + return flat_idx; } void serialize(std::ostream &os) { DPRINTF(Checkpoint, "Serializing Arm Misc Registers\n"); SERIALIZE_ARRAY(miscRegs, NumMiscRegs); + + SERIALIZE_SCALAR(haveSecurity); + SERIALIZE_SCALAR(haveLPAE); + SERIALIZE_SCALAR(haveVirtualization); + SERIALIZE_SCALAR(haveLargeAsid64); + SERIALIZE_SCALAR(physAddrRange64); } void unserialize(Checkpoint *cp, const std::string §ion) { @@ -198,6 +409,12 @@ namespace ArmISA UNSERIALIZE_ARRAY(miscRegs, NumMiscRegs); CPSR tmp_cpsr = miscRegs[MISCREG_CPSR]; updateRegMap(tmp_cpsr); + + UNSERIALIZE_SCALAR(haveSecurity); + UNSERIALIZE_SCALAR(haveLPAE); + UNSERIALIZE_SCALAR(haveVirtualization); + UNSERIALIZE_SCALAR(haveLargeAsid64); + UNSERIALIZE_SCALAR(physAddrRange64); } void startup(ThreadContext *tc) {} diff --git a/src/arch/arm/isa/bitfields.isa b/src/arch/arm/isa/bitfields.isa index 5a8b5db6d..6006cfb2d 100644 --- a/src/arch/arm/isa/bitfields.isa +++ b/src/arch/arm/isa/bitfields.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -73,6 +73,7 @@ def bitfield SEVEN_AND_FOUR sevenAndFour; def bitfield THUMB thumb; def bitfield BIGTHUMB bigThumb; +def bitfield AARCH64 aarch64; // Other def bitfield COND_CODE condCode; diff --git a/src/arch/arm/isa/decoder/aarch64.isa b/src/arch/arm/isa/decoder/aarch64.isa new file mode 100644 index 000000000..a6c0fa2df --- /dev/null +++ b/src/arch/arm/isa/decoder/aarch64.isa @@ -0,0 +1,48 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +//////////////////////////////////////////////////////////////////// +// +// The 64 bit ARM decoder +// -------------------------- +// + + +Aarch64::aarch64(); + diff --git a/src/arch/arm/isa/decoder/arm.isa b/src/arch/arm/isa/decoder/arm.isa index 4bd9d5cf4..f0c0dec18 100644 --- a/src/arch/arm/isa/decoder/arm.isa +++ b/src/arch/arm/isa/decoder/arm.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2012 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -73,7 +73,11 @@ format DataOp { 0x9: ArmBlxReg::armBlxReg(); } 0x5: ArmSatAddSub::armSatAddSub(); - 0x7: Breakpoint::bkpt(); + 0x6: ArmERet::armERet(); + 0x7: decode OPCODE_22 { + 0: Breakpoint::bkpt(); + 1: ArmSmcHyp::armSmcHyp(); + } } 0x1: ArmHalfWordMultAndMultAcc::armHalfWordMultAndMultAcc(); } @@ -105,6 +109,10 @@ format DataOp { } 0x6: decode CPNUM { 0xa, 0xb: ExtensionRegLoadStore::extensionRegLoadStore(); + 0xf: decode OPCODE_20 { + 0: Mcrr15::Mcrr15(); + 1: Mrrc15::Mrrc15(); + } } 0x7: decode OPCODE_24 { 0: decode OPCODE_4 { diff --git a/src/arch/arm/isa/decoder/decoder.isa b/src/arch/arm/isa/decoder/decoder.isa index cf7d17871..94685b943 100644 --- a/src/arch/arm/isa/decoder/decoder.isa +++ b/src/arch/arm/isa/decoder/decoder.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -41,8 +41,12 @@ // Authors: Gabe Black decode THUMB default Unknown::unknown() { -0: -##include "arm.isa" +0: decode AARCH64 { + 0: + ##include "arm.isa" + 1: + ##include "aarch64.isa" +} 1: ##include "thumb.isa" } diff --git a/src/arch/arm/isa/decoder/thumb.isa b/src/arch/arm/isa/decoder/thumb.isa index f54cc728d..31495793e 100644 --- a/src/arch/arm/isa/decoder/thumb.isa +++ b/src/arch/arm/isa/decoder/thumb.isa @@ -95,8 +95,14 @@ decode BIGTHUMB { 0xa, 0xb: ExtensionRegLoadStore::extensionRegLoadStre(); 0xf: decode HTOPCODE_9_4 { 0x00: Unknown::undefined(); - 0x04: WarnUnimpl::mcrr(); // mcrr2 - 0x05: WarnUnimpl::mrrc(); // mrrc2 + 0x04: decode LTCOPROC { + 0xf: Mcrr15::Mcrr15(); + default: WarnUnimpl::mcrr(); // mcrr2 + } + 0x05: decode LTCOPROC { + 0xf: Mrrc15::Mrrc15(); + default: WarnUnimpl::mrrc(); // mrrc2 + } 0x02, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e: WarnUnimpl::stc(); // stc2 diff --git a/src/arch/arm/isa/formats/aarch64.isa b/src/arch/arm/isa/formats/aarch64.isa new file mode 100644 index 000000000..3ed70ce81 --- /dev/null +++ b/src/arch/arm/isa/formats/aarch64.isa @@ -0,0 +1,2035 @@ +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black +// Thomas Grocutt +// Mbou Eyole +// Giacomo Gabrielli + +output header {{ +namespace Aarch64 +{ + StaticInstPtr decodeDataProcImm(ExtMachInst machInst); + StaticInstPtr decodeBranchExcSys(ExtMachInst machInst); + StaticInstPtr decodeLoadsStores(ExtMachInst machInst); + StaticInstPtr decodeDataProcReg(ExtMachInst machInst); + + StaticInstPtr decodeFpAdvSIMD(ExtMachInst machInst); + StaticInstPtr decodeFp(ExtMachInst machInst); + StaticInstPtr decodeAdvSIMD(ExtMachInst machInst); + StaticInstPtr decodeAdvSIMDScalar(ExtMachInst machInst); + + StaticInstPtr decodeGem5Ops(ExtMachInst machInst); +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeDataProcImm(ExtMachInst machInst) + { + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rdsp = makeSP(rd); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + + uint8_t opc = bits(machInst, 30, 29); + bool sf = bits(machInst, 31); + bool n = bits(machInst, 22); + uint8_t immr = bits(machInst, 21, 16); + uint8_t imms = bits(machInst, 15, 10); + switch (bits(machInst, 25, 23)) { + case 0x0: + case 0x1: + { + uint64_t immlo = bits(machInst, 30, 29); + uint64_t immhi = bits(machInst, 23, 5); + uint64_t imm = (immlo << 0) | (immhi << 2); + if (bits(machInst, 31) == 0) + return new AdrXImm(machInst, rd, INTREG_ZERO, sext<21>(imm)); + else + return new AdrpXImm(machInst, rd, INTREG_ZERO, + sext<33>(imm << 12)); + } + case 0x2: + case 0x3: + { + uint32_t imm12 = bits(machInst, 21, 10); + uint8_t shift = bits(machInst, 23, 22); + uint32_t imm; + if (shift == 0x0) + imm = imm12 << 0; + else if (shift == 0x1) + imm = imm12 << 12; + else + return new Unknown64(machInst); + switch (opc) { + case 0x0: + return new AddXImm(machInst, rdsp, rnsp, imm); + case 0x1: + return new AddXImmCc(machInst, rd, rnsp, imm); + case 0x2: + return new SubXImm(machInst, rdsp, rnsp, imm); + case 0x3: + return new SubXImmCc(machInst, rd, rnsp, imm); + } + } + case 0x4: + { + if (!sf && n) + return new Unknown64(machInst); + // len = MSB(n:NOT(imms)), len < 1 is undefined. + uint8_t len = 0; + if (n) { + len = 6; + } else if (imms == 0x3f || imms == 0x3e) { + return new Unknown64(machInst); + } else { + len = findMsbSet(imms ^ 0x3f); + } + // Generate r, s, and size. + uint64_t r = bits(immr, len - 1, 0); + uint64_t s = bits(imms, len - 1, 0); + uint8_t size = 1 << len; + if (s == size - 1) + return new Unknown64(machInst); + // Generate the pattern with s 1s, rotated by r, with size bits. + uint64_t pattern = mask(s + 1); + if (r) { + pattern = (pattern >> r) | (pattern << (size - r)); + pattern &= mask(size); + } + uint8_t width = sf ? 64 : 32; + // Replicate that to fill up the immediate. + for (unsigned i = 1; i < (width / size); i *= 2) + pattern |= (pattern << (i * size)); + uint64_t imm = pattern; + + switch (opc) { + case 0x0: + return new AndXImm(machInst, rdsp, rn, imm); + case 0x1: + return new OrrXImm(machInst, rdsp, rn, imm); + case 0x2: + return new EorXImm(machInst, rdsp, rn, imm); + case 0x3: + return new AndXImmCc(machInst, rd, rn, imm); + } + } + case 0x5: + { + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + uint32_t imm16 = bits(machInst, 20, 5); + uint32_t hw = bits(machInst, 22, 21); + switch (opc) { + case 0x0: + return new Movn(machInst, rd, imm16, hw * 16); + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new Movz(machInst, rd, imm16, hw * 16); + case 0x3: + return new Movk(machInst, rd, imm16, hw * 16); + } + } + case 0x6: + if ((sf != n) || (!sf && (bits(immr, 5) || bits(imms, 5)))) + return new Unknown64(machInst); + switch (opc) { + case 0x0: + return new Sbfm64(machInst, rd, rn, immr, imms); + case 0x1: + return new Bfm64(machInst, rd, rn, immr, imms); + case 0x2: + return new Ubfm64(machInst, rd, rn, immr, imms); + case 0x3: + return new Unknown64(machInst); + } + case 0x7: + { + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + if (opc || bits(machInst, 21)) + return new Unknown64(machInst); + else + return new Extr64(machInst, rd, rn, rm, imms); + } + } + return new FailUnimplemented("Unhandled Case8", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeBranchExcSys(ExtMachInst machInst) + { + switch (bits(machInst, 30, 29)) { + case 0x0: + { + int64_t imm = sext<26>(bits(machInst, 25, 0)) << 2; + if (bits(machInst, 31) == 0) + return new B64(machInst, imm); + else + return new Bl64(machInst, imm); + } + case 0x1: + { + IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + if (bits(machInst, 25) == 0) { + int64_t imm = sext<19>(bits(machInst, 23, 5)) << 2; + if (bits(machInst, 24) == 0) + return new Cbz64(machInst, imm, rt); + else + return new Cbnz64(machInst, imm, rt); + } else { + uint64_t bitmask = 0x1; + bitmask <<= bits(machInst, 23, 19); + int64_t imm = sext<14>(bits(machInst, 18, 5)) << 2; + if (bits(machInst, 31)) + bitmask <<= 32; + if (bits(machInst, 24) == 0) + return new Tbz64(machInst, bitmask, imm, rt); + else + return new Tbnz64(machInst, bitmask, imm, rt); + } + } + case 0x2: + // bit 30:26=10101 + if (bits(machInst, 31) == 0) { + if (bits(machInst, 25, 24) || bits(machInst, 4)) + return new Unknown64(machInst); + int64_t imm = sext<19>(bits(machInst, 23, 5)) << 2; + ConditionCode condCode = + (ConditionCode)(uint8_t)(bits(machInst, 3, 0)); + return new BCond64(machInst, imm, condCode); + } else if (bits(machInst, 25, 24) == 0x0) { + if (bits(machInst, 4, 2)) + return new Unknown64(machInst); + uint8_t decVal = (bits(machInst, 1, 0) << 0) | + (bits(machInst, 23, 21) << 2); + switch (decVal) { + case 0x01: + return new Svc64(machInst); + case 0x02: + return new FailUnimplemented("hvc", machInst); + case 0x03: + return new Smc64(machInst); + case 0x04: + return new FailUnimplemented("brk", machInst); + case 0x08: + return new FailUnimplemented("hlt", machInst); + case 0x15: + return new FailUnimplemented("dcps1", machInst); + case 0x16: + return new FailUnimplemented("dcps2", machInst); + case 0x17: + return new FailUnimplemented("dcps3", machInst); + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 25, 22) == 0x4) { + // bit 31:22=1101010100 + bool l = bits(machInst, 21); + uint8_t op0 = bits(machInst, 20, 19); + uint8_t op1 = bits(machInst, 18, 16); + uint8_t crn = bits(machInst, 15, 12); + uint8_t crm = bits(machInst, 11, 8); + uint8_t op2 = bits(machInst, 7, 5); + IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + switch (op0) { + case 0x0: + if (rt != 0x1f || l) + return new Unknown64(machInst); + if (crn == 0x2 && op1 == 0x3) { + switch (op2) { + case 0x0: + return new NopInst(machInst); + case 0x1: + return new YieldInst(machInst); + case 0x2: + return new WfeInst(machInst); + case 0x3: + return new WfiInst(machInst); + case 0x4: + return new SevInst(machInst); + case 0x5: + return new SevlInst(machInst); + default: + return new Unknown64(machInst); + } + } else if (crn == 0x3 && op1 == 0x3) { + switch (op2) { + case 0x2: + return new Clrex64(machInst); + case 0x4: + return new Dsb64(machInst); + case 0x5: + return new Dmb64(machInst); + case 0x6: + return new Isb64(machInst); + default: + return new Unknown64(machInst); + } + } else if (crn == 0x4) { + // MSR immediate + switch (op1 << 3 | op2) { + case 0x5: + // SP + return new MsrSP64(machInst, + (IntRegIndex) MISCREG_SPSEL, + INTREG_ZERO, + crm & 0x1); + case 0x1e: + // DAIFSet + return new MsrDAIFSet64( + machInst, + (IntRegIndex) MISCREG_DAIF, + INTREG_ZERO, + crm); + case 0x1f: + // DAIFClr + return new MsrDAIFClr64( + machInst, + (IntRegIndex) MISCREG_DAIF, + INTREG_ZERO, + crm); + default: + return new Unknown64(machInst); + } + } else { + return new Unknown64(machInst); + } + break; + case 0x1: + case 0x2: + case 0x3: + { + // bit 31:22=1101010100, 20:19=11 + bool read = l; + MiscRegIndex miscReg = + decodeAArch64SysReg(op0, op1, crn, crm, op2); + if (read) { + if ((miscReg == MISCREG_DC_CIVAC_Xt) || + (miscReg == MISCREG_DC_CVAC_Xt) || + (miscReg == MISCREG_DC_ZVA_Xt)) { + return new Unknown64(machInst); + } + } + // Check for invalid registers + if (miscReg == MISCREG_UNKNOWN) { + return new Unknown64(machInst); + } else if (miscRegInfo[miscReg][MISCREG_IMPLEMENTED]) { + if (miscReg == MISCREG_NZCV) { + if (read) + return new MrsNZCV64(machInst, rt, (IntRegIndex) miscReg); + else + return new MsrNZCV64(machInst, (IntRegIndex) miscReg, rt); + } + uint32_t iss = msrMrs64IssBuild(read, op0, op1, crn, crm, op2, rt); + if (miscReg == MISCREG_DC_ZVA_Xt && !read) + return new Dczva(machInst, rt, (IntRegIndex) miscReg, iss); + + if (read) + return new Mrs64(machInst, rt, (IntRegIndex) miscReg, iss); + else + return new Msr64(machInst, (IntRegIndex) miscReg, rt, iss); + } else if (miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL]) { + std::string full_mnem = csprintf("%s %s", + read ? "mrs" : "msr", miscRegName[miscReg]); + return new WarnUnimplemented(read ? "mrs" : "msr", + machInst, full_mnem); + } else { + return new FailUnimplemented(csprintf("%s %s", + read ? "mrs" : "msr", miscRegName[miscReg]).c_str(), + machInst); + } + } + break; + } + } else if (bits(machInst, 25) == 0x1) { + uint8_t opc = bits(machInst, 24, 21); + uint8_t op2 = bits(machInst, 20, 16); + uint8_t op3 = bits(machInst, 15, 10); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + uint8_t op4 = bits(machInst, 4, 0); + if (op2 != 0x1f || op3 != 0x0 || op4 != 0x0) + return new Unknown64(machInst); + switch (opc) { + case 0x0: + return new Br64(machInst, rn); + case 0x1: + return new Blr64(machInst, rn); + case 0x2: + return new Ret64(machInst, rn); + case 0x4: + if (rn != 0x1f) + return new Unknown64(machInst); + return new Eret64(machInst); + case 0x5: + if (rn != 0x1f) + return new Unknown64(machInst); + return new FailUnimplemented("dret", machInst); + } + } + default: + return new Unknown64(machInst); + } + return new FailUnimplemented("Unhandled Case7", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeLoadsStores(ExtMachInst machInst) + { + // bit 27,25=10 + switch (bits(machInst, 29, 28)) { + case 0x0: + if (bits(machInst, 26) == 0) { + if (bits(machInst, 24) != 0) + return new Unknown64(machInst); + IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + IntRegIndex rt2 = (IntRegIndex)(uint8_t)bits(machInst, 14, 10); + IntRegIndex rs = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + uint8_t opc = (bits(machInst, 15) << 0) | + (bits(machInst, 23, 21) << 1); + uint8_t size = bits(machInst, 31, 30); + switch (opc) { + case 0x0: + switch (size) { + case 0x0: + return new STXRB64(machInst, rt, rnsp, rs); + case 0x1: + return new STXRH64(machInst, rt, rnsp, rs); + case 0x2: + return new STXRW64(machInst, rt, rnsp, rs); + case 0x3: + return new STXRX64(machInst, rt, rnsp, rs); + } + case 0x1: + switch (size) { + case 0x0: + return new STLXRB64(machInst, rt, rnsp, rs); + case 0x1: + return new STLXRH64(machInst, rt, rnsp, rs); + case 0x2: + return new STLXRW64(machInst, rt, rnsp, rs); + case 0x3: + return new STLXRX64(machInst, rt, rnsp, rs); + } + case 0x2: + switch (size) { + case 0x0: + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new STXPW64(machInst, rs, rt, rt2, rnsp); + case 0x3: + return new STXPX64(machInst, rs, rt, rt2, rnsp); + } + + case 0x3: + switch (size) { + case 0x0: + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new STLXPW64(machInst, rs, rt, rt2, rnsp); + case 0x3: + return new STLXPX64(machInst, rs, rt, rt2, rnsp); + } + + case 0x4: + switch (size) { + case 0x0: + return new LDXRB64(machInst, rt, rnsp, rs); + case 0x1: + return new LDXRH64(machInst, rt, rnsp, rs); + case 0x2: + return new LDXRW64(machInst, rt, rnsp, rs); + case 0x3: + return new LDXRX64(machInst, rt, rnsp, rs); + } + case 0x5: + switch (size) { + case 0x0: + return new LDAXRB64(machInst, rt, rnsp, rs); + case 0x1: + return new LDAXRH64(machInst, rt, rnsp, rs); + case 0x2: + return new LDAXRW64(machInst, rt, rnsp, rs); + case 0x3: + return new LDAXRX64(machInst, rt, rnsp, rs); + } + case 0x6: + switch (size) { + case 0x0: + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new LDXPW64(machInst, rt, rt2, rnsp); + case 0x3: + return new LDXPX64(machInst, rt, rt2, rnsp); + } + + case 0x7: + switch (size) { + case 0x0: + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new LDAXPW64(machInst, rt, rt2, rnsp); + case 0x3: + return new LDAXPX64(machInst, rt, rt2, rnsp); + } + + case 0x9: + switch (size) { + case 0x0: + return new STLRB64(machInst, rt, rnsp); + case 0x1: + return new STLRH64(machInst, rt, rnsp); + case 0x2: + return new STLRW64(machInst, rt, rnsp); + case 0x3: + return new STLRX64(machInst, rt, rnsp); + } + case 0xd: + switch (size) { + case 0x0: + return new LDARB64(machInst, rt, rnsp); + case 0x1: + return new LDARH64(machInst, rt, rnsp); + case 0x2: + return new LDARW64(machInst, rt, rnsp); + case 0x3: + return new LDARX64(machInst, rt, rnsp); + } + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 31)) { + return new Unknown64(machInst); + } else { + return decodeNeonMem(machInst); + } + case 0x1: + { + if (bits(machInst, 24) != 0) + return new Unknown64(machInst); + uint8_t switchVal = (bits(machInst, 26) << 0) | + (bits(machInst, 31, 30) << 1); + int64_t imm = sext<19>(bits(machInst, 23, 5)) << 2; + IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + switch (switchVal) { + case 0x0: + return new LDRWL64_LIT(machInst, rt, imm); + case 0x1: + return new LDRSFP64_LIT(machInst, rt, imm); + case 0x2: + return new LDRXL64_LIT(machInst, rt, imm); + case 0x3: + return new LDRDFP64_LIT(machInst, rt, imm); + case 0x4: + return new LDRSWL64_LIT(machInst, rt, imm); + case 0x5: + return new BigFpMemLit("ldr", machInst, rt, imm); + case 0x6: + return new PRFM64_LIT(machInst, rt, imm); + default: + return new Unknown64(machInst); + } + } + case 0x2: + { + uint8_t opc = bits(machInst, 31, 30); + if (opc >= 3) + return new Unknown64(machInst); + uint32_t size = 0; + bool fp = bits(machInst, 26); + bool load = bits(machInst, 22); + if (fp) { + size = 4 << opc; + } else { + if ((opc == 1) && !load) + return new Unknown64(machInst); + size = (opc == 0 || opc == 1) ? 4 : 8; + } + uint8_t type = bits(machInst, 24, 23); + int64_t imm = sext<7>(bits(machInst, 21, 15)) * size; + + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rt2 = (IntRegIndex)(uint8_t)bits(machInst, 14, 10); + + bool noAlloc = (type == 0); + bool signExt = !noAlloc && !fp && opc == 1; + PairMemOp::AddrMode mode; + const char *mnemonic = NULL; + switch (type) { + case 0x0: + case 0x2: + mode = PairMemOp::AddrMd_Offset; + break; + case 0x1: + mode = PairMemOp::AddrMd_PostIndex; + break; + case 0x3: + mode = PairMemOp::AddrMd_PreIndex; + break; + default: + return new Unknown64(machInst); + } + if (load) { + if (noAlloc) + mnemonic = "ldnp"; + else if (signExt) + mnemonic = "ldpsw"; + else + mnemonic = "ldp"; + } else { + if (noAlloc) + mnemonic = "stnp"; + else + mnemonic = "stp"; + } + + return new LdpStp(mnemonic, machInst, size, fp, load, noAlloc, + signExt, false, false, imm, mode, rn, rt, rt2); + } + // bit 29:27=111, 25=0 + case 0x3: + { + uint8_t switchVal = (bits(machInst, 23, 22) << 0) | + (bits(machInst, 26) << 2) | + (bits(machInst, 31, 30) << 3); + if (bits(machInst, 24) == 1) { + uint64_t imm12 = bits(machInst, 21, 10); + IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + switch (switchVal) { + case 0x00: + return new STRB64_IMM(machInst, rt, rnsp, imm12); + case 0x01: + return new LDRB64_IMM(machInst, rt, rnsp, imm12); + case 0x02: + return new LDRSBX64_IMM(machInst, rt, rnsp, imm12); + case 0x03: + return new LDRSBW64_IMM(machInst, rt, rnsp, imm12); + case 0x04: + return new STRBFP64_IMM(machInst, rt, rnsp, imm12); + case 0x05: + return new LDRBFP64_IMM(machInst, rt, rnsp, imm12); + case 0x06: + return new BigFpMemImm("str", machInst, false, + rt, rnsp, imm12 << 4); + case 0x07: + return new BigFpMemImm("ldr", machInst, true, + rt, rnsp, imm12 << 4); + case 0x08: + return new STRH64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x09: + return new LDRH64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x0a: + return new LDRSHX64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x0b: + return new LDRSHW64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x0c: + return new STRHFP64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x0d: + return new LDRHFP64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x10: + return new STRW64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x11: + return new LDRW64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x12: + return new LDRSW64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x14: + return new STRSFP64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x15: + return new LDRSFP64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x18: + return new STRX64_IMM(machInst, rt, rnsp, imm12 << 3); + case 0x19: + return new LDRX64_IMM(machInst, rt, rnsp, imm12 << 3); + case 0x1a: + return new PRFM64_IMM(machInst, rt, rnsp, imm12 << 3); + case 0x1c: + return new STRDFP64_IMM(machInst, rt, rnsp, imm12 << 3); + case 0x1d: + return new LDRDFP64_IMM(machInst, rt, rnsp, imm12 << 3); + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 21) == 1) { + if (bits(machInst, 11, 10) != 0x2) + return new Unknown64(machInst); + if (!bits(machInst, 14)) + return new Unknown64(machInst); + IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16); + ArmExtendType type = + (ArmExtendType)(uint32_t)bits(machInst, 15, 13); + uint8_t s = bits(machInst, 12); + switch (switchVal) { + case 0x00: + return new STRB64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x01: + return new LDRB64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x02: + return new LDRSBX64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x03: + return new LDRSBW64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x04: + return new STRBFP64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x05: + return new LDRBFP64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x6: + return new BigFpMemReg("str", machInst, false, + rt, rnsp, rm, type, s * 4); + case 0x7: + return new BigFpMemReg("ldr", machInst, true, + rt, rnsp, rm, type, s * 4); + case 0x08: + return new STRH64_REG(machInst, rt, rnsp, rm, type, s); + case 0x09: + return new LDRH64_REG(machInst, rt, rnsp, rm, type, s); + case 0x0a: + return new LDRSHX64_REG(machInst, rt, rnsp, rm, type, s); + case 0x0b: + return new LDRSHW64_REG(machInst, rt, rnsp, rm, type, s); + case 0x0c: + return new STRHFP64_REG(machInst, rt, rnsp, rm, type, s); + case 0x0d: + return new LDRHFP64_REG(machInst, rt, rnsp, rm, type, s); + case 0x10: + return new STRW64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x11: + return new LDRW64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x12: + return new LDRSW64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x14: + return new STRSFP64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x15: + return new LDRSFP64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x18: + return new STRX64_REG(machInst, rt, rnsp, rm, type, s * 3); + case 0x19: + return new LDRX64_REG(machInst, rt, rnsp, rm, type, s * 3); + case 0x1a: + return new PRFM64_REG(machInst, rt, rnsp, rm, type, s * 3); + case 0x1c: + return new STRDFP64_REG(machInst, rt, rnsp, rm, type, s * 3); + case 0x1d: + return new LDRDFP64_REG(machInst, rt, rnsp, rm, type, s * 3); + default: + return new Unknown64(machInst); + } + } else { + // bit 29:27=111, 25:24=00, 21=0 + switch (bits(machInst, 11, 10)) { + case 0x0: + { + IntRegIndex rt = + (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = + (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + uint64_t imm = sext<9>(bits(machInst, 20, 12)); + switch (switchVal) { + case 0x00: + return new STURB64_IMM(machInst, rt, rnsp, imm); + case 0x01: + return new LDURB64_IMM(machInst, rt, rnsp, imm); + case 0x02: + return new LDURSBX64_IMM(machInst, rt, rnsp, imm); + case 0x03: + return new LDURSBW64_IMM(machInst, rt, rnsp, imm); + case 0x04: + return new STURBFP64_IMM(machInst, rt, rnsp, imm); + case 0x05: + return new LDURBFP64_IMM(machInst, rt, rnsp, imm); + case 0x06: + return new BigFpMemImm("stur", machInst, false, + rt, rnsp, imm); + case 0x07: + return new BigFpMemImm("ldur", machInst, true, + rt, rnsp, imm); + case 0x08: + return new STURH64_IMM(machInst, rt, rnsp, imm); + case 0x09: + return new LDURH64_IMM(machInst, rt, rnsp, imm); + case 0x0a: + return new LDURSHX64_IMM(machInst, rt, rnsp, imm); + case 0x0b: + return new LDURSHW64_IMM(machInst, rt, rnsp, imm); + case 0x0c: + return new STURHFP64_IMM(machInst, rt, rnsp, imm); + case 0x0d: + return new LDURHFP64_IMM(machInst, rt, rnsp, imm); + case 0x10: + return new STURW64_IMM(machInst, rt, rnsp, imm); + case 0x11: + return new LDURW64_IMM(machInst, rt, rnsp, imm); + case 0x12: + return new LDURSW64_IMM(machInst, rt, rnsp, imm); + case 0x14: + return new STURSFP64_IMM(machInst, rt, rnsp, imm); + case 0x15: + return new LDURSFP64_IMM(machInst, rt, rnsp, imm); + case 0x18: + return new STURX64_IMM(machInst, rt, rnsp, imm); + case 0x19: + return new LDURX64_IMM(machInst, rt, rnsp, imm); + case 0x1a: + return new PRFUM64_IMM(machInst, rt, rnsp, imm); + case 0x1c: + return new STURDFP64_IMM(machInst, rt, rnsp, imm); + case 0x1d: + return new LDURDFP64_IMM(machInst, rt, rnsp, imm); + default: + return new Unknown64(machInst); + } + } + // bit 29:27=111, 25:24=00, 21=0, 11:10=01 + case 0x1: + { + IntRegIndex rt = + (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = + (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + uint64_t imm = sext<9>(bits(machInst, 20, 12)); + switch (switchVal) { + case 0x00: + return new STRB64_POST(machInst, rt, rnsp, imm); + case 0x01: + return new LDRB64_POST(machInst, rt, rnsp, imm); + case 0x02: + return new LDRSBX64_POST(machInst, rt, rnsp, imm); + case 0x03: + return new LDRSBW64_POST(machInst, rt, rnsp, imm); + case 0x04: + return new STRBFP64_POST(machInst, rt, rnsp, imm); + case 0x05: + return new LDRBFP64_POST(machInst, rt, rnsp, imm); + case 0x06: + return new BigFpMemPost("str", machInst, false, + rt, rnsp, imm); + case 0x07: + return new BigFpMemPost("ldr", machInst, true, + rt, rnsp, imm); + case 0x08: + return new STRH64_POST(machInst, rt, rnsp, imm); + case 0x09: + return new LDRH64_POST(machInst, rt, rnsp, imm); + case 0x0a: + return new LDRSHX64_POST(machInst, rt, rnsp, imm); + case 0x0b: + return new LDRSHW64_POST(machInst, rt, rnsp, imm); + case 0x0c: + return new STRHFP64_POST(machInst, rt, rnsp, imm); + case 0x0d: + return new LDRHFP64_POST(machInst, rt, rnsp, imm); + case 0x10: + return new STRW64_POST(machInst, rt, rnsp, imm); + case 0x11: + return new LDRW64_POST(machInst, rt, rnsp, imm); + case 0x12: + return new LDRSW64_POST(machInst, rt, rnsp, imm); + case 0x14: + return new STRSFP64_POST(machInst, rt, rnsp, imm); + case 0x15: + return new LDRSFP64_POST(machInst, rt, rnsp, imm); + case 0x18: + return new STRX64_POST(machInst, rt, rnsp, imm); + case 0x19: + return new LDRX64_POST(machInst, rt, rnsp, imm); + case 0x1c: + return new STRDFP64_POST(machInst, rt, rnsp, imm); + case 0x1d: + return new LDRDFP64_POST(machInst, rt, rnsp, imm); + default: + return new Unknown64(machInst); + } + } + case 0x2: + { + IntRegIndex rt = + (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = + (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + uint64_t imm = sext<9>(bits(machInst, 20, 12)); + switch (switchVal) { + case 0x00: + return new STTRB64_IMM(machInst, rt, rnsp, imm); + case 0x01: + return new LDTRB64_IMM(machInst, rt, rnsp, imm); + case 0x02: + return new LDTRSBX64_IMM(machInst, rt, rnsp, imm); + case 0x03: + return new LDTRSBW64_IMM(machInst, rt, rnsp, imm); + case 0x08: + return new STTRH64_IMM(machInst, rt, rnsp, imm); + case 0x09: + return new LDTRH64_IMM(machInst, rt, rnsp, imm); + case 0x0a: + return new LDTRSHX64_IMM(machInst, rt, rnsp, imm); + case 0x0b: + return new LDTRSHW64_IMM(machInst, rt, rnsp, imm); + case 0x10: + return new STTRW64_IMM(machInst, rt, rnsp, imm); + case 0x11: + return new LDTRW64_IMM(machInst, rt, rnsp, imm); + case 0x12: + return new LDTRSW64_IMM(machInst, rt, rnsp, imm); + case 0x18: + return new STTRX64_IMM(machInst, rt, rnsp, imm); + case 0x19: + return new LDTRX64_IMM(machInst, rt, rnsp, imm); + default: + return new Unknown64(machInst); + } + } + case 0x3: + { + IntRegIndex rt = + (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = + (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + uint64_t imm = sext<9>(bits(machInst, 20, 12)); + switch (switchVal) { + case 0x00: + return new STRB64_PRE(machInst, rt, rnsp, imm); + case 0x01: + return new LDRB64_PRE(machInst, rt, rnsp, imm); + case 0x02: + return new LDRSBX64_PRE(machInst, rt, rnsp, imm); + case 0x03: + return new LDRSBW64_PRE(machInst, rt, rnsp, imm); + case 0x04: + return new STRBFP64_PRE(machInst, rt, rnsp, imm); + case 0x05: + return new LDRBFP64_PRE(machInst, rt, rnsp, imm); + case 0x06: + return new BigFpMemPre("str", machInst, false, + rt, rnsp, imm); + case 0x07: + return new BigFpMemPre("ldr", machInst, true, + rt, rnsp, imm); + case 0x08: + return new STRH64_PRE(machInst, rt, rnsp, imm); + case 0x09: + return new LDRH64_PRE(machInst, rt, rnsp, imm); + case 0x0a: + return new LDRSHX64_PRE(machInst, rt, rnsp, imm); + case 0x0b: + return new LDRSHW64_PRE(machInst, rt, rnsp, imm); + case 0x0c: + return new STRHFP64_PRE(machInst, rt, rnsp, imm); + case 0x0d: + return new LDRHFP64_PRE(machInst, rt, rnsp, imm); + case 0x10: + return new STRW64_PRE(machInst, rt, rnsp, imm); + case 0x11: + return new LDRW64_PRE(machInst, rt, rnsp, imm); + case 0x12: + return new LDRSW64_PRE(machInst, rt, rnsp, imm); + case 0x14: + return new STRSFP64_PRE(machInst, rt, rnsp, imm); + case 0x15: + return new LDRSFP64_PRE(machInst, rt, rnsp, imm); + case 0x18: + return new STRX64_PRE(machInst, rt, rnsp, imm); + case 0x19: + return new LDRX64_PRE(machInst, rt, rnsp, imm); + case 0x1c: + return new STRDFP64_PRE(machInst, rt, rnsp, imm); + case 0x1d: + return new LDRDFP64_PRE(machInst, rt, rnsp, imm); + default: + return new Unknown64(machInst); + } + } + } + } + } + } + return new FailUnimplemented("Unhandled Case1", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeDataProcReg(ExtMachInst machInst) + { + uint8_t switchVal = (bits(machInst, 28) << 1) | + (bits(machInst, 24) << 0); + switch (switchVal) { + case 0x0: + { + uint8_t switchVal = (bits(machInst, 21) << 0) | + (bits(machInst, 30, 29) << 1); + ArmShiftType type = (ArmShiftType)(uint8_t)bits(machInst, 23, 22); + uint8_t imm6 = bits(machInst, 15, 10); + bool sf = bits(machInst, 31); + if (!sf && (imm6 & 0x20)) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + + switch (switchVal) { + case 0x0: + return new AndXSReg(machInst, rd, rn, rm, imm6, type); + case 0x1: + return new BicXSReg(machInst, rd, rn, rm, imm6, type); + case 0x2: + return new OrrXSReg(machInst, rd, rn, rm, imm6, type); + case 0x3: + return new OrnXSReg(machInst, rd, rn, rm, imm6, type); + case 0x4: + return new EorXSReg(machInst, rd, rn, rm, imm6, type); + case 0x5: + return new EonXSReg(machInst, rd, rn, rm, imm6, type); + case 0x6: + return new AndXSRegCc(machInst, rd, rn, rm, imm6, type); + case 0x7: + return new BicXSRegCc(machInst, rd, rn, rm, imm6, type); + } + } + case 0x1: + { + uint8_t switchVal = bits(machInst, 30, 29); + if (bits(machInst, 21) == 0) { + ArmShiftType type = + (ArmShiftType)(uint8_t)bits(machInst, 23, 22); + if (type == ROR) + return new Unknown64(machInst); + uint8_t imm6 = bits(machInst, 15, 10); + if (!bits(machInst, 31) && bits(imm6, 5)) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + switch (switchVal) { + case 0x0: + return new AddXSReg(machInst, rd, rn, rm, imm6, type); + case 0x1: + return new AddXSRegCc(machInst, rd, rn, rm, imm6, type); + case 0x2: + return new SubXSReg(machInst, rd, rn, rm, imm6, type); + case 0x3: + return new SubXSRegCc(machInst, rd, rn, rm, imm6, type); + } + } else { + if (bits(machInst, 23, 22) != 0 || bits(machInst, 12, 10) > 0x4) + return new Unknown64(machInst); + ArmExtendType type = + (ArmExtendType)(uint8_t)bits(machInst, 15, 13); + uint8_t imm3 = bits(machInst, 12, 10); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rdsp = makeSP(rd); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + + switch (switchVal) { + case 0x0: + return new AddXEReg(machInst, rdsp, rnsp, rm, type, imm3); + case 0x1: + return new AddXERegCc(machInst, rd, rnsp, rm, type, imm3); + case 0x2: + return new SubXEReg(machInst, rdsp, rnsp, rm, type, imm3); + case 0x3: + return new SubXERegCc(machInst, rd, rnsp, rm, type, imm3); + } + } + } + case 0x2: + { + if (bits(machInst, 21) == 1) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + switch (bits(machInst, 23, 22)) { + case 0x0: + { + if (bits(machInst, 15, 10)) + return new Unknown64(machInst); + uint8_t switchVal = bits(machInst, 30, 29); + switch (switchVal) { + case 0x0: + return new AdcXSReg(machInst, rd, rn, rm, 0, LSL); + case 0x1: + return new AdcXSRegCc(machInst, rd, rn, rm, 0, LSL); + case 0x2: + return new SbcXSReg(machInst, rd, rn, rm, 0, LSL); + case 0x3: + return new SbcXSRegCc(machInst, rd, rn, rm, 0, LSL); + } + } + case 0x1: + { + if ((bits(machInst, 4) == 1) || + (bits(machInst, 10) == 1) || + (bits(machInst, 29) == 0)) { + return new Unknown64(machInst); + } + ConditionCode cond = + (ConditionCode)(uint8_t)bits(machInst, 15, 12); + uint8_t flags = bits(machInst, 3, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + if (bits(machInst, 11) == 0) { + IntRegIndex rm = + (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + if (bits(machInst, 30) == 0) { + return new CcmnReg64(machInst, rn, rm, cond, flags); + } else { + return new CcmpReg64(machInst, rn, rm, cond, flags); + } + } else { + uint8_t imm5 = bits(machInst, 20, 16); + if (bits(machInst, 30) == 0) { + return new CcmnImm64(machInst, rn, imm5, cond, flags); + } else { + return new CcmpImm64(machInst, rn, imm5, cond, flags); + } + } + } + case 0x2: + { + if (bits(machInst, 29) == 1 || + bits(machInst, 11) == 1) { + return new Unknown64(machInst); + } + uint8_t switchVal = (bits(machInst, 10) << 0) | + (bits(machInst, 30) << 1); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + ConditionCode cond = + (ConditionCode)(uint8_t)bits(machInst, 15, 12); + switch (switchVal) { + case 0x0: + return new Csel64(machInst, rd, rn, rm, cond); + case 0x1: + return new Csinc64(machInst, rd, rn, rm, cond); + case 0x2: + return new Csinv64(machInst, rd, rn, rm, cond); + case 0x3: + return new Csneg64(machInst, rd, rn, rm, cond); + } + } + case 0x3: + if (bits(machInst, 30) == 0) { + if (bits(machInst, 29) != 0) + return new Unknown64(machInst); + uint8_t switchVal = bits(machInst, 15, 10); + switch (switchVal) { + case 0x2: + return new Udiv64(machInst, rd, rn, rm); + case 0x3: + return new Sdiv64(machInst, rd, rn, rm); + case 0x8: + return new Lslv64(machInst, rd, rn, rm); + case 0x9: + return new Lsrv64(machInst, rd, rn, rm); + case 0xa: + return new Asrv64(machInst, rd, rn, rm); + case 0xb: + return new Rorv64(machInst, rd, rn, rm); + default: + return new Unknown64(machInst); + } + } else { + if (bits(machInst, 20, 16) != 0 || + bits(machInst, 29) != 0) { + return new Unknown64(machInst); + } + uint8_t switchVal = bits(machInst, 15, 10); + switch (switchVal) { + case 0x0: + return new Rbit64(machInst, rd, rn); + case 0x1: + return new Rev1664(machInst, rd, rn); + case 0x2: + if (bits(machInst, 31) == 0) + return new Rev64(machInst, rd, rn); + else + return new Rev3264(machInst, rd, rn); + case 0x3: + if (bits(machInst, 31) != 1) + return new Unknown64(machInst); + return new Rev64(machInst, rd, rn); + case 0x4: + return new Clz64(machInst, rd, rn); + case 0x5: + return new Cls64(machInst, rd, rn); + } + } + } + } + case 0x3: + { + if (bits(machInst, 30, 29) != 0x0 || + (bits(machInst, 23, 21) != 0 && bits(machInst, 31) == 0)) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex ra = (IntRegIndex)(uint8_t)bits(machInst, 14, 10); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + switch (bits(machInst, 23, 21)) { + case 0x0: + if (bits(machInst, 15) == 0) + return new Madd64(machInst, rd, ra, rn, rm); + else + return new Msub64(machInst, rd, ra, rn, rm); + case 0x1: + if (bits(machInst, 15) == 0) + return new Smaddl64(machInst, rd, ra, rn, rm); + else + return new Smsubl64(machInst, rd, ra, rn, rm); + case 0x2: + if (bits(machInst, 15) != 0) + return new Unknown64(machInst); + return new Smulh64(machInst, rd, rn, rm); + case 0x5: + if (bits(machInst, 15) == 0) + return new Umaddl64(machInst, rd, ra, rn, rm); + else + return new Umsubl64(machInst, rd, ra, rn, rm); + case 0x6: + if (bits(machInst, 15) != 0) + return new Unknown64(machInst); + return new Umulh64(machInst, rd, rn, rm); + default: + return new Unknown64(machInst); + } + } + } + return new FailUnimplemented("Unhandled Case2", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeAdvSIMD(ExtMachInst machInst) + { + if (bits(machInst, 24) == 1) { + if (bits(machInst, 10) == 0) { + return decodeNeonIndexedElem(machInst); + } else if (bits(machInst, 23) == 1) { + return new Unknown64(machInst); + } else { + if (bits(machInst, 22, 19)) { + return decodeNeonShiftByImm(machInst); + } else { + return decodeNeonModImm(machInst); + } + } + } else if (bits(machInst, 21) == 1) { + if (bits(machInst, 10) == 1) { + return decodeNeon3Same(machInst); + } else if (bits(machInst, 11) == 0) { + return decodeNeon3Diff(machInst); + } else if (bits(machInst, 20, 17) == 0x0) { + return decodeNeon2RegMisc(machInst); + } else if (bits(machInst, 20, 17) == 0x8) { + return decodeNeonAcrossLanes(machInst); + } else { + return new Unknown64(machInst); + } + } else if (bits(machInst, 24) || + bits(machInst, 21) || + bits(machInst, 15)) { + return new Unknown64(machInst); + } else if (bits(machInst, 10) == 1) { + if (bits(machInst, 23, 22)) + return new Unknown64(machInst); + return decodeNeonCopy(machInst); + } else if (bits(machInst, 29) == 1) { + return decodeNeonExt(machInst); + } else if (bits(machInst, 11) == 1) { + return decodeNeonZipUzpTrn(machInst); + } else if (bits(machInst, 23, 22) == 0x0) { + return decodeNeonTblTbx(machInst); + } else { + return new Unknown64(machInst); + } + return new FailUnimplemented("Unhandled Case3", machInst); + } +} +}}; + + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + // bit 30=0, 28:25=1111 + decodeFp(ExtMachInst machInst) + { + if (bits(machInst, 24) == 1) { + if (bits(machInst, 31) || bits(machInst, 29)) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16); + IntRegIndex ra = (IntRegIndex)(uint32_t)bits(machInst, 14, 10); + uint8_t switchVal = (bits(machInst, 23, 21) << 1) | + (bits(machInst, 15) << 0); + switch (switchVal) { + case 0x0: // FMADD Sd = Sa + Sn*Sm + return new FMAddS(machInst, rd, rn, rm, ra); + case 0x1: // FMSUB Sd = Sa + (-Sn)*Sm + return new FMSubS(machInst, rd, rn, rm, ra); + case 0x2: // FNMADD Sd = (-Sa) + (-Sn)*Sm + return new FNMAddS(machInst, rd, rn, rm, ra); + case 0x3: // FNMSUB Sd = (-Sa) + Sn*Sm + return new FNMSubS(machInst, rd, rn, rm, ra); + case 0x4: // FMADD Dd = Da + Dn*Dm + return new FMAddD(machInst, rd, rn, rm, ra); + case 0x5: // FMSUB Dd = Da + (-Dn)*Dm + return new FMSubD(machInst, rd, rn, rm, ra); + case 0x6: // FNMADD Dd = (-Da) + (-Dn)*Dm + return new FNMAddD(machInst, rd, rn, rm, ra); + case 0x7: // FNMSUB Dd = (-Da) + Dn*Dm + return new FNMSubD(machInst, rd, rn, rm, ra); + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 21) == 0) { + bool s = bits(machInst, 29); + if (s) + return new Unknown64(machInst); + uint8_t switchVal = bits(machInst, 20, 16); + uint8_t type = bits(machInst, 23, 22); + uint8_t scale = bits(machInst, 15, 10); + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + if (bits(machInst, 18, 17) == 3 && scale != 0) + return new Unknown64(machInst); + // 30:24=0011110, 21=0 + switch (switchVal) { + case 0x00: + return new FailUnimplemented("fcvtns", machInst); + case 0x01: + return new FailUnimplemented("fcvtnu", machInst); + case 0x02: + switch ( (bits(machInst, 31) << 2) | type ) { + case 0: // SCVTF Sd = convertFromInt(Wn/(2^fbits)) + return new FcvtSFixedFpSW(machInst, rd, rn, scale); + case 1: // SCVTF Dd = convertFromInt(Wn/(2^fbits)) + return new FcvtSFixedFpDW(machInst, rd, rn, scale); + case 4: // SCVTF Sd = convertFromInt(Xn/(2^fbits)) + return new FcvtSFixedFpSX(machInst, rd, rn, scale); + case 5: // SCVTF Dd = convertFromInt(Xn/(2^fbits)) + return new FcvtSFixedFpDX(machInst, rd, rn, scale); + default: + return new Unknown64(machInst); + } + case 0x03: + switch ( (bits(machInst, 31) << 2) | type ) { + case 0: // UCVTF Sd = convertFromInt(Wn/(2^fbits)) + return new FcvtUFixedFpSW(machInst, rd, rn, scale); + case 1: // UCVTF Dd = convertFromInt(Wn/(2^fbits)) + return new FcvtUFixedFpDW(machInst, rd, rn, scale); + case 4: // UCVTF Sd = convertFromInt(Xn/(2^fbits)) + return new FcvtUFixedFpSX(machInst, rd, rn, scale); + case 5: // UCVTF Dd = convertFromInt(Xn/(2^fbits)) + return new FcvtUFixedFpDX(machInst, rd, rn, scale); + default: + return new Unknown64(machInst); + } + case 0x04: + return new FailUnimplemented("fcvtas", machInst); + case 0x05: + return new FailUnimplemented("fcvtau", machInst); + case 0x08: + return new FailUnimplemented("fcvtps", machInst); + case 0x09: + return new FailUnimplemented("fcvtpu", machInst); + case 0x0e: + return new FailUnimplemented("fmov elem. to 64", machInst); + case 0x0f: + return new FailUnimplemented("fmov 64 bit", machInst); + case 0x10: + return new FailUnimplemented("fcvtms", machInst); + case 0x11: + return new FailUnimplemented("fcvtmu", machInst); + case 0x18: + switch ( (bits(machInst, 31) << 2) | type ) { + case 0: // FCVTZS Wd = convertToIntExactTowardZero(Sn*(2^fbits)) + return new FcvtFpSFixedSW(machInst, rd, rn, scale); + case 1: // FCVTZS Wd = convertToIntExactTowardZero(Dn*(2^fbits)) + return new FcvtFpSFixedDW(machInst, rd, rn, scale); + case 4: // FCVTZS Xd = convertToIntExactTowardZero(Sn*(2^fbits)) + return new FcvtFpSFixedSX(machInst, rd, rn, scale); + case 5: // FCVTZS Xd = convertToIntExactTowardZero(Dn*(2^fbits)) + return new FcvtFpSFixedDX(machInst, rd, rn, scale); + default: + return new Unknown64(machInst); + } + case 0x19: + switch ( (bits(machInst, 31) << 2) | type ) { + case 0: // FCVTZU Wd = convertToIntExactTowardZero(Sn*(2^fbits)) + return new FcvtFpUFixedSW(machInst, rd, rn, scale); + case 1: // FCVTZU Wd = convertToIntExactTowardZero(Dn*(2^fbits)) + return new FcvtFpUFixedDW(machInst, rd, rn, scale); + case 4: // FCVTZU Xd = convertToIntExactTowardZero(Sn*(2^fbits)) + return new FcvtFpUFixedSX(machInst, rd, rn, scale); + case 5: // FCVTZU Xd = convertToIntExactTowardZero(Dn*(2^fbits)) + return new FcvtFpUFixedDX(machInst, rd, rn, scale); + default: + return new Unknown64(machInst); + } + } + } else { + // 30=0, 28:24=11110, 21=1 + uint8_t type = bits(machInst, 23, 22); + uint8_t imm8 = bits(machInst, 20, 13); + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + switch (bits(machInst, 11, 10)) { + case 0x0: + if (bits(machInst, 12) == 1) { + if (bits(machInst, 31) || + bits(machInst, 29) || + bits(machInst, 9, 5)) { + return new Unknown64(machInst); + } + // 31:29=000, 28:24=11110, 21=1, 12:10=100 + if (type == 0) { + // FMOV S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,5) + // :imm8<5:0>:Zeros(19) + uint32_t imm = vfp_modified_imm(imm8, false); + return new FmovImmS(machInst, rd, imm); + } else if (type == 1) { + // FMOV D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,8) + // :imm8<5:0>:Zeros(48) + uint64_t imm = vfp_modified_imm(imm8, true); + return new FmovImmD(machInst, rd, imm); + } else { + return new Unknown64(machInst); + } + } else if (bits(machInst, 13) == 1) { + if (bits(machInst, 31) || + bits(machInst, 29) || + bits(machInst, 15, 14) || + bits(machInst, 23) || + bits(machInst, 2, 0)) { + return new Unknown64(machInst); + } + uint8_t switchVal = (bits(machInst, 4, 3) << 0) | + (bits(machInst, 22) << 2); + IntRegIndex rm = (IntRegIndex)(uint32_t) + bits(machInst, 20, 16); + // 28:23=000111100, 21=1, 15:10=001000, 2:0=000 + switch (switchVal) { + case 0x0: + // FCMP flags = compareQuiet(Sn,Sm) + return new FCmpRegS(machInst, rn, rm); + case 0x1: + // FCMP flags = compareQuiet(Sn,0.0) + return new FCmpImmS(machInst, rn, 0); + case 0x2: + // FCMPE flags = compareSignaling(Sn,Sm) + return new FCmpERegS(machInst, rn, rm); + case 0x3: + // FCMPE flags = compareSignaling(Sn,0.0) + return new FCmpEImmS(machInst, rn, 0); + case 0x4: + // FCMP flags = compareQuiet(Dn,Dm) + return new FCmpRegD(machInst, rn, rm); + case 0x5: + // FCMP flags = compareQuiet(Dn,0.0) + return new FCmpImmD(machInst, rn, 0); + case 0x6: + // FCMPE flags = compareSignaling(Dn,Dm) + return new FCmpERegD(machInst, rn, rm); + case 0x7: + // FCMPE flags = compareSignaling(Dn,0.0) + return new FCmpEImmD(machInst, rn, 0); + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 14) == 1) { + if (bits(machInst, 31) || bits(machInst, 29)) + return new Unknown64(machInst); + uint8_t opcode = bits(machInst, 20, 15); + // Bits 31:24=00011110, 21=1, 14:10=10000 + switch (opcode) { + case 0x0: + if (type == 0) + // FMOV Sd = Sn + return new FmovRegS(machInst, rd, rn); + else if (type == 1) + // FMOV Dd = Dn + return new FmovRegD(machInst, rd, rn); + break; + case 0x1: + if (type == 0) + // FABS Sd = abs(Sn) + return new FAbsS(machInst, rd, rn); + else if (type == 1) + // FABS Dd = abs(Dn) + return new FAbsD(machInst, rd, rn); + break; + case 0x2: + if (type == 0) + // FNEG Sd = -Sn + return new FNegS(machInst, rd, rn); + else if (type == 1) + // FNEG Dd = -Dn + return new FNegD(machInst, rd, rn); + break; + case 0x3: + if (type == 0) + // FSQRT Sd = sqrt(Sn) + return new FSqrtS(machInst, rd, rn); + else if (type == 1) + // FSQRT Dd = sqrt(Dn) + return new FSqrtD(machInst, rd, rn); + break; + case 0x4: + if (type == 1) + // FCVT Sd = convertFormat(Dn) + return new FcvtFpDFpS(machInst, rd, rn); + else if (type == 3) + // FCVT Sd = convertFormat(Hn) + return new FcvtFpHFpS(machInst, rd, rn); + break; + case 0x5: + if (type == 0) + // FCVT Dd = convertFormat(Sn) + return new FCvtFpSFpD(machInst, rd, rn); + else if (type == 3) + // FCVT Dd = convertFormat(Hn) + return new FcvtFpHFpD(machInst, rd, rn); + break; + case 0x7: + if (type == 0) + // FCVT Hd = convertFormat(Sn) + return new FcvtFpSFpH(machInst, rd, rn); + else if (type == 1) + // FCVT Hd = convertFormat(Dn) + return new FcvtFpDFpH(machInst, rd, rn); + break; + case 0x8: + if (type == 0) // FRINTN Sd = roundToIntegralTiesToEven(Sn) + return new FRIntNS(machInst, rd, rn); + else if (type == 1) // FRINTN Dd = roundToIntegralTiesToEven(Dn) + return new FRIntND(machInst, rd, rn); + break; + case 0x9: + if (type == 0) // FRINTP Sd = roundToIntegralTowardPlusInf(Sn) + return new FRIntPS(machInst, rd, rn); + else if (type == 1) // FRINTP Dd = roundToIntegralTowardPlusInf(Dn) + return new FRIntPD(machInst, rd, rn); + break; + case 0xa: + if (type == 0) // FRINTM Sd = roundToIntegralTowardMinusInf(Sn) + return new FRIntMS(machInst, rd, rn); + else if (type == 1) // FRINTM Dd = roundToIntegralTowardMinusInf(Dn) + return new FRIntMD(machInst, rd, rn); + break; + case 0xb: + if (type == 0) // FRINTZ Sd = roundToIntegralTowardZero(Sn) + return new FRIntZS(machInst, rd, rn); + else if (type == 1) // FRINTZ Dd = roundToIntegralTowardZero(Dn) + return new FRIntZD(machInst, rd, rn); + break; + case 0xc: + if (type == 0) // FRINTA Sd = roundToIntegralTiesToAway(Sn) + return new FRIntAS(machInst, rd, rn); + else if (type == 1) // FRINTA Dd = roundToIntegralTiesToAway(Dn) + return new FRIntAD(machInst, rd, rn); + break; + case 0xe: + if (type == 0) // FRINTX Sd = roundToIntegralExact(Sn) + return new FRIntXS(machInst, rd, rn); + else if (type == 1) // FRINTX Dd = roundToIntegralExact(Dn) + return new FRIntXD(machInst, rd, rn); + break; + case 0xf: + if (type == 0) // FRINTI Sd = roundToIntegral(Sn) + return new FRIntIS(machInst, rd, rn); + else if (type == 1) // FRINTI Dd = roundToIntegral(Dn) + return new FRIntID(machInst, rd, rn); + break; + default: + return new Unknown64(machInst); + } + return new Unknown64(machInst); + } else if (bits(machInst, 15) == 1) { + return new Unknown64(machInst); + } else { + if (bits(machInst, 29)) + return new Unknown64(machInst); + uint8_t rmode = bits(machInst, 20, 19); + uint8_t switchVal1 = bits(machInst, 18, 16); + uint8_t switchVal2 = (type << 1) | bits(machInst, 31); + // 30:24=0011110, 21=1, 15:10=000000 + switch (switchVal1) { + case 0x0: + switch ((switchVal2 << 2) | rmode) { + case 0x0: //FCVTNS Wd = convertToIntExactTiesToEven(Sn) + return new FcvtFpSIntWSN(machInst, rd, rn); + case 0x1: //FCVTPS Wd = convertToIntExactTowardPlusInf(Sn) + return new FcvtFpSIntWSP(machInst, rd, rn); + case 0x2: //FCVTMS Wd = convertToIntExactTowardMinusInf(Sn) + return new FcvtFpSIntWSM(machInst, rd, rn); + case 0x3: //FCVTZS Wd = convertToIntExactTowardZero(Sn) + return new FcvtFpSIntWSZ(machInst, rd, rn); + case 0x4: //FCVTNS Xd = convertToIntExactTiesToEven(Sn) + return new FcvtFpSIntXSN(machInst, rd, rn); + case 0x5: //FCVTPS Xd = convertToIntExactTowardPlusInf(Sn) + return new FcvtFpSIntXSP(machInst, rd, rn); + case 0x6: //FCVTMS Xd = convertToIntExactTowardMinusInf(Sn) + return new FcvtFpSIntXSM(machInst, rd, rn); + case 0x7: //FCVTZS Xd = convertToIntExactTowardZero(Sn) + return new FcvtFpSIntXSZ(machInst, rd, rn); + case 0x8: //FCVTNS Wd = convertToIntExactTiesToEven(Dn) + return new FcvtFpSIntWDN(machInst, rd, rn); + case 0x9: //FCVTPS Wd = convertToIntExactTowardPlusInf(Dn) + return new FcvtFpSIntWDP(machInst, rd, rn); + case 0xA: //FCVTMS Wd = convertToIntExactTowardMinusInf(Dn) + return new FcvtFpSIntWDM(machInst, rd, rn); + case 0xB: //FCVTZS Wd = convertToIntExactTowardZero(Dn) + return new FcvtFpSIntWDZ(machInst, rd, rn); + case 0xC: //FCVTNS Xd = convertToIntExactTiesToEven(Dn) + return new FcvtFpSIntXDN(machInst, rd, rn); + case 0xD: //FCVTPS Xd = convertToIntExactTowardPlusInf(Dn) + return new FcvtFpSIntXDP(machInst, rd, rn); + case 0xE: //FCVTMS Xd = convertToIntExactTowardMinusInf(Dn) + return new FcvtFpSIntXDM(machInst, rd, rn); + case 0xF: //FCVTZS Xd = convertToIntExactTowardZero(Dn) + return new FcvtFpSIntXDZ(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x1: + switch ((switchVal2 << 2) | rmode) { + case 0x0: //FCVTNU Wd = convertToIntExactTiesToEven(Sn) + return new FcvtFpUIntWSN(machInst, rd, rn); + case 0x1: //FCVTPU Wd = convertToIntExactTowardPlusInf(Sn) + return new FcvtFpUIntWSP(machInst, rd, rn); + case 0x2: //FCVTMU Wd = convertToIntExactTowardMinusInf(Sn) + return new FcvtFpUIntWSM(machInst, rd, rn); + case 0x3: //FCVTZU Wd = convertToIntExactTowardZero(Sn) + return new FcvtFpUIntWSZ(machInst, rd, rn); + case 0x4: //FCVTNU Xd = convertToIntExactTiesToEven(Sn) + return new FcvtFpUIntXSN(machInst, rd, rn); + case 0x5: //FCVTPU Xd = convertToIntExactTowardPlusInf(Sn) + return new FcvtFpUIntXSP(machInst, rd, rn); + case 0x6: //FCVTMU Xd = convertToIntExactTowardMinusInf(Sn) + return new FcvtFpUIntXSM(machInst, rd, rn); + case 0x7: //FCVTZU Xd = convertToIntExactTowardZero(Sn) + return new FcvtFpUIntXSZ(machInst, rd, rn); + case 0x8: //FCVTNU Wd = convertToIntExactTiesToEven(Dn) + return new FcvtFpUIntWDN(machInst, rd, rn); + case 0x9: //FCVTPU Wd = convertToIntExactTowardPlusInf(Dn) + return new FcvtFpUIntWDP(machInst, rd, rn); + case 0xA: //FCVTMU Wd = convertToIntExactTowardMinusInf(Dn) + return new FcvtFpUIntWDM(machInst, rd, rn); + case 0xB: //FCVTZU Wd = convertToIntExactTowardZero(Dn) + return new FcvtFpUIntWDZ(machInst, rd, rn); + case 0xC: //FCVTNU Xd = convertToIntExactTiesToEven(Dn) + return new FcvtFpUIntXDN(machInst, rd, rn); + case 0xD: //FCVTPU Xd = convertToIntExactTowardPlusInf(Dn) + return new FcvtFpUIntXDP(machInst, rd, rn); + case 0xE: //FCVTMU Xd = convertToIntExactTowardMinusInf(Dn) + return new FcvtFpUIntXDM(machInst, rd, rn); + case 0xF: //FCVTZU Xd = convertToIntExactTowardZero(Dn) + return new FcvtFpUIntXDZ(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x2: + if (rmode != 0) + return new Unknown64(machInst); + switch (switchVal2) { + case 0: // SCVTF Sd = convertFromInt(Wn) + return new FcvtWSIntFpS(machInst, rd, rn); + case 1: // SCVTF Sd = convertFromInt(Xn) + return new FcvtXSIntFpS(machInst, rd, rn); + case 2: // SCVTF Dd = convertFromInt(Wn) + return new FcvtWSIntFpD(machInst, rd, rn); + case 3: // SCVTF Dd = convertFromInt(Xn) + return new FcvtXSIntFpD(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x3: + switch (switchVal2) { + case 0: // UCVTF Sd = convertFromInt(Wn) + return new FcvtWUIntFpS(machInst, rd, rn); + case 1: // UCVTF Sd = convertFromInt(Xn) + return new FcvtXUIntFpS(machInst, rd, rn); + case 2: // UCVTF Dd = convertFromInt(Wn) + return new FcvtWUIntFpD(machInst, rd, rn); + case 3: // UCVTF Dd = convertFromInt(Xn) + return new FcvtXUIntFpD(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x4: + if (rmode != 0) + return new Unknown64(machInst); + switch (switchVal2) { + case 0: // FCVTAS Wd = convertToIntExactTiesToAway(Sn) + return new FcvtFpSIntWSA(machInst, rd, rn); + case 1: // FCVTAS Xd = convertToIntExactTiesToAway(Sn) + return new FcvtFpSIntXSA(machInst, rd, rn); + case 2: // FCVTAS Wd = convertToIntExactTiesToAway(Dn) + return new FcvtFpSIntWDA(machInst, rd, rn); + case 3: // FCVTAS Wd = convertToIntExactTiesToAway(Dn) + return new FcvtFpSIntXDA(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x5: + switch (switchVal2) { + case 0: // FCVTAU Wd = convertToIntExactTiesToAway(Sn) + return new FcvtFpUIntWSA(machInst, rd, rn); + case 1: // FCVTAU Xd = convertToIntExactTiesToAway(Sn) + return new FcvtFpUIntXSA(machInst, rd, rn); + case 2: // FCVTAU Wd = convertToIntExactTiesToAway(Dn) + return new FcvtFpUIntWDA(machInst, rd, rn); + case 3: // FCVTAU Xd = convertToIntExactTiesToAway(Dn) + return new FcvtFpUIntXDA(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x06: + switch (switchVal2) { + case 0: // FMOV Wd = Sn + if (rmode != 0) + return new Unknown64(machInst); + return new FmovRegCoreW(machInst, rd, rn); + case 3: // FMOV Xd = Dn + if (rmode != 0) + return new Unknown64(machInst); + return new FmovRegCoreX(machInst, rd, rn); + case 5: // FMOV Xd = Vn<127:64> + if (rmode != 1) + return new Unknown64(machInst); + return new FmovURegCoreX(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + break; + case 0x07: + switch (switchVal2) { + case 0: // FMOV Sd = Wn + if (rmode != 0) + return new Unknown64(machInst); + return new FmovCoreRegW(machInst, rd, rn); + case 3: // FMOV Xd = Dn + if (rmode != 0) + return new Unknown64(machInst); + return new FmovCoreRegX(machInst, rd, rn); + case 5: // FMOV Xd = Vn<127:64> + if (rmode != 1) + return new Unknown64(machInst); + return new FmovUCoreRegX(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + break; + default: // Warning! missing cases in switch statement above, that still need to be added + return new Unknown64(machInst); + } + } + case 0x1: + { + if (bits(machInst, 31) || + bits(machInst, 29) || + bits(machInst, 23)) { + return new Unknown64(machInst); + } + IntRegIndex rm = (IntRegIndex)(uint32_t) bits(machInst, 20, 16); + IntRegIndex rn = (IntRegIndex)(uint32_t) bits(machInst, 9, 5); + uint8_t imm = (IntRegIndex)(uint32_t) bits(machInst, 3, 0); + ConditionCode cond = + (ConditionCode)(uint8_t)(bits(machInst, 15, 12)); + uint8_t switchVal = (bits(machInst, 4) << 0) | + (bits(machInst, 22) << 1); + // 31:23=000111100, 21=1, 11:10=01 + switch (switchVal) { + case 0x0: + // FCCMP flags = if cond the compareQuiet(Sn,Sm) else #nzcv + return new FCCmpRegS(machInst, rn, rm, cond, imm); + case 0x1: + // FCCMP flags = if cond then compareSignaling(Sn,Sm) + // else #nzcv + return new FCCmpERegS(machInst, rn, rm, cond, imm); + case 0x2: + // FCCMP flags = if cond then compareQuiet(Dn,Dm) else #nzcv + return new FCCmpRegD(machInst, rn, rm, cond, imm); + case 0x3: + // FCCMP flags = if cond then compareSignaling(Dn,Dm) + // else #nzcv + return new FCCmpERegD(machInst, rn, rm, cond, imm); + default: + return new Unknown64(machInst); + } + } + case 0x2: + { + if (bits(machInst, 31) || + bits(machInst, 29) || + bits(machInst, 23)) { + return new Unknown64(machInst); + } + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16); + uint8_t switchVal = (bits(machInst, 15, 12) << 0) | + (bits(machInst, 22) << 4); + switch (switchVal) { + case 0x00: // FMUL Sd = Sn * Sm + return new FMulS(machInst, rd, rn, rm); + case 0x10: // FMUL Dd = Dn * Dm + return new FMulD(machInst, rd, rn, rm); + case 0x01: // FDIV Sd = Sn / Sm + return new FDivS(machInst, rd, rn, rm); + case 0x11: // FDIV Dd = Dn / Dm + return new FDivD(machInst, rd, rn, rm); + case 0x02: // FADD Sd = Sn + Sm + return new FAddS(machInst, rd, rn, rm); + case 0x12: // FADD Dd = Dn + Dm + return new FAddD(machInst, rd, rn, rm); + case 0x03: // FSUB Sd = Sn - Sm + return new FSubS(machInst, rd, rn, rm); + case 0x13: // FSUB Dd = Dn - Dm + return new FSubD(machInst, rd, rn, rm); + case 0x04: // FMAX Sd = max(Sn, Sm) + return new FMaxS(machInst, rd, rn, rm); + case 0x14: // FMAX Dd = max(Dn, Dm) + return new FMaxD(machInst, rd, rn, rm); + case 0x05: // FMIN Sd = min(Sn, Sm) + return new FMinS(machInst, rd, rn, rm); + case 0x15: // FMIN Dd = min(Dn, Dm) + return new FMinD(machInst, rd, rn, rm); + case 0x06: // FMAXNM Sd = maxNum(Sn, Sm) + return new FMaxNMS(machInst, rd, rn, rm); + case 0x16: // FMAXNM Dd = maxNum(Dn, Dm) + return new FMaxNMD(machInst, rd, rn, rm); + case 0x07: // FMINNM Sd = minNum(Sn, Sm) + return new FMinNMS(machInst, rd, rn, rm); + case 0x17: // FMINNM Dd = minNum(Dn, Dm) + return new FMinNMD(machInst, rd, rn, rm); + case 0x08: // FNMUL Sd = -(Sn * Sm) + return new FNMulS(machInst, rd, rn, rm); + case 0x18: // FNMUL Dd = -(Dn * Dm) + return new FNMulD(machInst, rd, rn, rm); + default: + return new Unknown64(machInst); + } + } + case 0x3: + { + if (bits(machInst, 31) || bits(machInst, 29)) + return new Unknown64(machInst); + uint8_t type = bits(machInst, 23, 22); + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16); + ConditionCode cond = + (ConditionCode)(uint8_t)(bits(machInst, 15, 12)); + if (type == 0) // FCSEL Sd = if cond then Sn else Sm + return new FCSelS(machInst, rd, rn, rm, cond); + else if (type == 1) // FCSEL Dd = if cond then Dn else Dm + return new FCSelD(machInst, rd, rn, rm, cond); + else + return new Unknown64(machInst); + } + } + } + return new FailUnimplemented("Unhandled Case4", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeAdvSIMDScalar(ExtMachInst machInst) + { + if (bits(machInst, 24) == 1) { + if (bits(machInst, 10) == 0) { + return decodeNeonScIndexedElem(machInst); + } else if (bits(machInst, 23) == 0) { + return decodeNeonScShiftByImm(machInst); + } + } else if (bits(machInst, 21) == 1) { + if (bits(machInst, 10) == 1) { + return decodeNeonSc3Same(machInst); + } else if (bits(machInst, 11) == 0) { + return decodeNeonSc3Diff(machInst); + } else if (bits(machInst, 20, 17) == 0x0) { + return decodeNeonSc2RegMisc(machInst); + } else if (bits(machInst, 20, 17) == 0x8) { + return decodeNeonScPwise(machInst); + } else { + return new Unknown64(machInst); + } + } else if (bits(machInst, 23, 22) == 0 && + bits(machInst, 15) == 0 && + bits(machInst, 10) == 1) { + return decodeNeonScCopy(machInst); + } else { + return new Unknown64(machInst); + } + return new FailUnimplemented("Unhandled Case6", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeFpAdvSIMD(ExtMachInst machInst) + { + + if (bits(machInst, 28) == 0) { + if (bits(machInst, 31) == 0) { + return decodeAdvSIMD(machInst); + } else { + return new Unknown64(machInst); + } + } else if (bits(machInst, 30) == 0) { + return decodeFp(machInst); + } else if (bits(machInst, 31) == 0) { + return decodeAdvSIMDScalar(machInst); + } else { + return new Unknown64(machInst); + } + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeGem5Ops(ExtMachInst machInst) + { + const uint32_t m5func = bits(machInst, 23, 16); + switch (m5func) { + case 0x00: return new Arm(machInst); + case 0x01: return new Quiesce(machInst); + case 0x02: return new QuiesceNs64(machInst); + case 0x03: return new QuiesceCycles64(machInst); + case 0x04: return new QuiesceTime64(machInst); + case 0x07: return new Rpns64(machInst); + case 0x09: return new WakeCPU64(machInst); + case 0x10: return new Deprecated_ivlb(machInst); + case 0x11: return new Deprecated_ivle(machInst); + case 0x20: return new Deprecated_exit (machInst); + case 0x21: return new M5exit64(machInst); + case 0x31: return new Loadsymbol(machInst); + case 0x30: return new Initparam64(machInst); + case 0x40: return new Resetstats64(machInst); + case 0x41: return new Dumpstats64(machInst); + case 0x42: return new Dumpresetstats64(machInst); + case 0x43: return new M5checkpoint64(machInst); + case 0x4F: return new M5writefile64(machInst); + case 0x50: return new M5readfile64(machInst); + case 0x51: return new M5break(machInst); + case 0x52: return new M5switchcpu(machInst); + case 0x53: return new M5addsymbol64(machInst); + case 0x54: return new M5panic(machInst); + case 0x5a: return new M5workbegin64(machInst); + case 0x5b: return new M5workend64(machInst); + default: return new Unknown64(machInst); + } + } +} +}}; + +def format Aarch64() {{ + decode_block = ''' + { + using namespace Aarch64; + if (bits(machInst, 27) == 0x0) { + if (bits(machInst, 28) == 0x0) + return new Unknown64(machInst); + else if (bits(machInst, 26) == 0) + // bit 28:26=100 + return decodeDataProcImm(machInst); + else + // bit 28:26=101 + return decodeBranchExcSys(machInst); + } else if (bits(machInst, 25) == 0) { + // bit 27=1, 25=0 + return decodeLoadsStores(machInst); + } else if (bits(machInst, 26) == 0) { + // bit 27:25=101 + return decodeDataProcReg(machInst); + } else if (bits(machInst, 24) == 1 && + bits(machInst, 31, 28) == 0xF) { + return decodeGem5Ops(machInst); + } else { + // bit 27:25=111 + return decodeFpAdvSIMD(machInst); + } + } + ''' +}}; diff --git a/src/arch/arm/isa/formats/branch.isa b/src/arch/arm/isa/formats/branch.isa index f1b17ec90..513506d31 100644 --- a/src/arch/arm/isa/formats/branch.isa +++ b/src/arch/arm/isa/formats/branch.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -101,7 +101,7 @@ def format Thumb16CondBranchAndSvc() {{ return new B(machInst, sext<9>(bits(machInst, 7, 0) << 1), (ConditionCode)(uint32_t)bits(machInst, 11, 8)); } else if (bits(machInst, 8)) { - return new Svc(machInst); + return new Svc(machInst, bits(machInst, 7, 0)); } else { // This space will not be allocated in the future. return new Unknown(machInst); @@ -127,7 +127,7 @@ def format Thumb32BranchesAndMiscCtrl() {{ // Permanently undefined. return new Unknown(machInst); } else { - return new WarnUnimplemented("smc", machInst); + return new Smc(machInst); } } else if ((op & 0x38) != 0x38) { const uint32_t s = bits(machInst, 26); @@ -141,20 +141,26 @@ def format Thumb32BranchesAndMiscCtrl() {{ return new B(machInst, imm, (ConditionCode)(uint32_t)bits(machInst, 25, 22)); } else { + // HIGH: 12-11=10, LOW: 15-14=00, 12=0 switch (op) { case 0x38: - { - const IntRegIndex rn = - (IntRegIndex)(uint32_t)bits(machInst, 19, 16); - const uint8_t byteMask = bits(machInst, 11, 8); - return new MsrCpsrReg(machInst, rn, byteMask); - } case 0x39: { const IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 19, 16); const uint8_t byteMask = bits(machInst, 11, 8); - return new MsrSpsrReg(machInst, rn, byteMask); + const bool r = bits(machInst, 20); + if (bits(machInst, 5)) { + const uint8_t sysM = (bits(machInst, 4) << 4) | + byteMask; + return new MsrBankedReg(machInst, rn, sysM, r); + } else { + if (r) { + return new MsrSpsrReg(machInst, rn, byteMask); + } else { + return new MsrCpsrReg(machInst, rn, byteMask); + } + } } case 0x3a: { @@ -196,11 +202,11 @@ def format Thumb32BranchesAndMiscCtrl() {{ case 0x2: return new Clrex(machInst); case 0x4: - return new Dsb(machInst); + return new Dsb(machInst, 0); case 0x5: - return new Dmb(machInst); + return new Dmb(machInst, 0); case 0x6: - return new Isb(machInst); + return new Isb(machInst, 0); default: break; } @@ -208,28 +214,44 @@ def format Thumb32BranchesAndMiscCtrl() {{ } case 0x3c: { - // On systems that don't support bxj, bxj == bx - return new BxReg(machInst, + return new BxjReg(machInst, (IntRegIndex)(uint32_t)bits(machInst, 19, 16), COND_UC); } case 0x3d: { const uint32_t imm32 = bits(machInst, 7, 0); - return new SubsImmPclr(machInst, INTREG_PC, INTREG_LR, - imm32, false); + if (imm32 == 0) { + return new Eret(machInst); + } else { + return new SubsImmPclr(machInst, INTREG_PC, + INTREG_LR, imm32, false); + } } case 0x3e: + case 0x3f: { + const IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 11, 8); - return new MrsCpsr(machInst, rd); + const bool r = bits(machInst, 20); + if (bits(machInst, 5)) { + const uint8_t sysM = (bits(machInst, 4) << 4) | + bits(machInst, 11, 8); + return new MrsBankedReg(machInst, rd, sysM, r); + } else { + if (r) { + return new MrsSpsr(machInst, rd); + } else { + return new MrsCpsr(machInst, rd); + } + } } - case 0x3f: + case 0xfe: { - const IntRegIndex rd = - (IntRegIndex)(uint32_t)bits(machInst, 11, 8); - return new MrsSpsr(machInst, rd); + uint32_t imm16 = (bits(machInst, 19, 16) << 12) | + (bits(machInst, 11, 0) << 0); + return new Hvc(machInst, imm16); } } break; diff --git a/src/arch/arm/isa/formats/formats.isa b/src/arch/arm/isa/formats/formats.isa index 90144c101..44e9c5b5e 100644 --- a/src/arch/arm/isa/formats/formats.isa +++ b/src/arch/arm/isa/formats/formats.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -44,6 +44,12 @@ //Include the basic format ##include "basic.isa" +//Include support for decoding AArch64 instructions +##include "aarch64.isa" + +//Include support for decoding AArch64 NEON instructions +##include "neon64.isa" + //Include support for predicated instructions ##include "pred.isa" diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa index 6d779e541..ccd4589a3 100644 --- a/src/arch/arm/isa/formats/fp.isa +++ b/src/arch/arm/isa/formats/fp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -151,8 +151,7 @@ let {{ if (singleAll) { size = bits(machInst, 7, 6); bool t = bits(machInst, 5); - unsigned eBytes = (1 << size); - align = (eBytes - 1) | TLB::AllowUnaligned; + align = size | TLB::AllowUnaligned; if (width == 1) { regs = t ? 2 : 1; inc = 1; @@ -164,7 +163,7 @@ let {{ case 1: case 2: if (bits(machInst, 4)) - align = width * eBytes - 1; + align = size + width - 1; break; case 3: break; @@ -173,20 +172,19 @@ let {{ if (bits(machInst, 4) == 0) return new Unknown(machInst); size = 2; - align = 0xf; + align = 0x4; } else if (size == 2) { if (bits(machInst, 4)) - align = 7; + align = 0x3; } else { if (bits(machInst, 4)) - align = 4 * eBytes - 1; + align = size + 2; } break; } } else { size = bits(machInst, 11, 10); - unsigned eBytes = (1 << size); - align = (eBytes - 1) | TLB::AllowUnaligned; + align = size | TLB::AllowUnaligned; regs = width; unsigned indexAlign = bits(machInst, 7, 4); // If width is 1, inc is always 1. That's overridden later. @@ -219,13 +217,13 @@ let {{ break; case 2: if (bits(indexAlign, 1, 0)) - align = 3; + align = 2; break; } break; case 2: if (bits(indexAlign, 0)) - align = (2 * eBytes) - 1; + align = size + 1; break; case 3: break; @@ -234,11 +232,11 @@ let {{ case 0: case 1: if (bits(indexAlign, 0)) - align = (4 * eBytes) - 1; + align = size + 2; break; case 2: if (bits(indexAlign, 0)) - align = (4 << bits(indexAlign, 1, 0)) - 1; + align = bits(indexAlign, 1, 0) + 2; break; } break; @@ -252,9 +250,9 @@ let {{ align = bits(machInst, 5, 4); if (align == 0) { // @align wasn't specified, so alignment can be turned off. - align = ((1 << size) - 1) | TLB::AllowUnaligned; + align = size | TLB::AllowUnaligned; } else { - align = ((4 << align) - 1); + align = align + 2; } switch (width) { case 1: @@ -588,6 +586,23 @@ let {{ } } case 0xc: + if (b) { + if (!u) { + if (bits(c, 1) == 0) { + if (q) { + return new NVfmaQFp(machInst, vd, vn, vm); + } else { + return new NVfmaDFp(machInst, vd, vn, vm); + } + } else { + if (q) { + return new NVfmsQFp(machInst, vd, vn, vm); + } else { + return new NVfmsDFp(machInst, vd, vn, vm); + } + } + } + } return new Unknown(machInst); case 0xd: if (b) { @@ -1827,7 +1842,7 @@ let {{ break; case 0x1: { - if (offset == 0 || vd + offset/2 > NumFloatArchRegs) { + if (offset == 0 || vd + offset/2 > NumFloatV7ArchRegs) { break; } switch (bits(opcode, 1, 0)) { @@ -1951,8 +1966,9 @@ let {{ } else if (a == 0x7) { const IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 15, 12); - uint32_t specReg = bits(machInst, 19, 16); - switch (specReg) { + uint32_t reg = bits(machInst, 19, 16); + uint32_t specReg; + switch (reg) { case 0: specReg = MISCREG_FPSID; break; @@ -1974,7 +1990,9 @@ let {{ if (specReg == MISCREG_FPSCR) { return new VmsrFpscr(machInst, (IntRegIndex)specReg, rt); } else { - return new Vmsr(machInst, (IntRegIndex)specReg, rt); + uint32_t iss = mcrMrcIssBuild(0, bits(machInst, 3, 0), rt, + reg, a, bits(machInst, 7, 5)); + return new Vmsr(machInst, (IntRegIndex)specReg, rt, iss); } } } else if (l == 0 && c == 1) { @@ -2041,8 +2059,9 @@ let {{ } else if (a == 7) { const IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 15, 12); - uint32_t specReg = bits(machInst, 19, 16); - switch (specReg) { + uint32_t reg = bits(machInst, 19, 16); + uint32_t specReg; + switch (reg) { case 0: specReg = MISCREG_FPSID; break; @@ -2070,7 +2089,9 @@ let {{ } else if (specReg == MISCREG_FPSCR) { return new VmrsFpscr(machInst, rt, (IntRegIndex)specReg); } else { - return new Vmrs(machInst, rt, (IntRegIndex)specReg); + uint32_t iss = mcrMrcIssBuild(l, bits(machInst, 3, 0), rt, + reg, a, bits(machInst, 7, 5)); + return new Vmrs(machInst, rt, (IntRegIndex)specReg, iss); } } } else { @@ -2235,6 +2256,44 @@ let {{ } } break; + case 0x9: + if ((opc3 & 0x1) == 0) { + if (single) { + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, false); + } else { + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, true); + } + } else { + if (single) { + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, false); + } else { + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, true); + } + } + break; + case 0xa: + if ((opc3 & 0x1) == 0) { + if (single) { + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, false); + } else { + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, true); + } + } else { + if (single) { + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, false); + } else { + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, true); + } + } + break; case 0xb: if ((opc3 & 0x1) == 0) { const uint32_t baseImm = diff --git a/src/arch/arm/isa/formats/mem.isa b/src/arch/arm/isa/formats/mem.isa index f7830eff3..abac27021 100644 --- a/src/arch/arm/isa/formats/mem.isa +++ b/src/arch/arm/isa/formats/mem.isa @@ -282,7 +282,7 @@ def format Thumb32SrsRfe() {{ } } else { const uint32_t mode = bits(machInst, 4, 0); - if (badMode((OperatingMode)mode)) + if (badMode32((OperatingMode)mode)) return new Unknown(machInst); if (!add && !wb) { return new %(srs)s(machInst, mode, diff --git a/src/arch/arm/isa/formats/misc.isa b/src/arch/arm/isa/formats/misc.isa index 00a37d17b..647f9846d 100644 --- a/src/arch/arm/isa/formats/misc.isa +++ b/src/arch/arm/isa/formats/misc.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2012 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -36,19 +36,42 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Authors: Gabe Black +// Giacomo Gabrielli + +def format ArmERet() {{ + decode_block = "return new Eret(machInst);" +}}; def format Svc() {{ - decode_block = "return new Svc(machInst);" + decode_block = "return new Svc(machInst, bits(machInst, 23, 0));" +}}; + +def format ArmSmcHyp() {{ + decode_block = ''' + { + if (bits(machInst, 21)) + { + return new Smc(machInst); + } else { + uint32_t imm16 = (bits(machInst, 19, 8) << 4) | + (bits(machInst, 3, 0) << 0); + return new Hvc(machInst, imm16); + } + } + ''' }}; def format ArmMsrMrs() {{ decode_block = ''' { const uint8_t byteMask = bits(machInst, 19, 16); + const uint8_t sysM = byteMask | (bits(machInst, 8) << 4); const IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 3, 0); const IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 15, 12); const uint32_t opcode = bits(machInst, 24, 21); const bool useImm = bits(machInst, 25); + const bool r = bits(machInst, 22); + const bool isBanked = bits(machInst, 9); const uint32_t unrotated = bits(machInst, 7, 0); const uint32_t rotation = (bits(machInst, 11, 8) << 1); @@ -56,20 +79,36 @@ def format ArmMsrMrs() {{ switch (opcode) { case 0x8: - return new MrsCpsr(machInst, rd); + if (isBanked) { + return new MrsBankedReg(machInst, rd, sysM, r!=0); + } else { + return new MrsCpsr(machInst, rd); + } case 0x9: if (useImm) { return new MsrCpsrImm(machInst, imm, byteMask); } else { - return new MsrCpsrReg(machInst, rn, byteMask); + if (isBanked) { + return new MsrBankedReg(machInst, rn, sysM, r!=0); + } else { + return new MsrCpsrReg(machInst, rn, byteMask); + } } case 0xa: - return new MrsSpsr(machInst, rd); + if (isBanked) { + return new MrsBankedReg(machInst, rd, sysM, r!=0); + } else { + return new MrsSpsr(machInst, rd); + } case 0xb: if (useImm) { return new MsrSpsrImm(machInst, imm, byteMask); } else { - return new MsrSpsrReg(machInst, rn, byteMask); + if (isBanked) { + return new MsrBankedReg(machInst, rn, sysM, r!=0); + } else { + return new MsrSpsrReg(machInst, rn, byteMask); + } } default: return new Unknown(machInst); @@ -99,16 +138,17 @@ let {{ switch (miscReg) { case MISCREG_NOP: return new NopInst(machInst); - case NUM_MISCREGS: + case MISCREG_CP14_UNIMPL: return new FailUnimplemented( csprintf("miscreg crn:%d opc1:%d crm:%d opc2:%d %s unknown", crn, opc1, crm, opc2, isRead ? "read" : "write").c_str(), machInst); default: + uint32_t iss = mcrMrcIssBuild(isRead, crm, rt, crn, opc1, opc2); if (isRead) { - return new Mrc14(machInst, rt, (IntRegIndex)miscReg); + return new Mrc14(machInst, rt, (IntRegIndex)miscReg, iss); } else { - return new Mcr14(machInst, (IntRegIndex)miscReg, rt); + return new Mcr14(machInst, (IntRegIndex)miscReg, rt, iss); } } } @@ -123,8 +163,8 @@ def format McrMrc14() {{ let {{ header_output = ''' - StaticInstPtr - decodeMcrMrc15(ExtMachInst machInst); + StaticInstPtr decodeMcrMrc14(ExtMachInst machInst); + StaticInstPtr decodeMcrMrc15(ExtMachInst machInst); ''' decoder_output = ''' StaticInstPtr @@ -136,107 +176,50 @@ let {{ const uint32_t crm = bits(machInst, 3, 0); const MiscRegIndex miscReg = decodeCP15Reg(crn, opc1, crm, opc2); const IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 15, 12); - const bool isRead = bits(machInst, 20); + uint32_t iss = mcrMrcIssBuild(isRead, crm, rt, crn, opc1, opc2); switch (miscReg) { case MISCREG_NOP: return new NopInst(machInst); - case NUM_MISCREGS: + case MISCREG_CP15_UNIMPL: return new FailUnimplemented( csprintf("miscreg crn:%d opc1:%d crm:%d opc2:%d %s unknown", crn, opc1, crm, opc2, isRead ? "read" : "write").c_str(), machInst); - case MISCREG_DCCISW: - return new WarnUnimplemented( - isRead ? "mrc dccisw" : "mcr dcisw", machInst); - case MISCREG_DCCIMVAC: - return new WarnUnimplemented( - isRead ? "mrc dccimvac" : "mcr dccimvac", machInst); - case MISCREG_DCIMVAC: - return new WarnUnimplemented( - isRead ? "mrc dcimvac" : "mcr dcimvac", machInst); case MISCREG_DCCMVAC: return new FlushPipeInst( isRead ? "mrc dccmvac" : "mcr dccmvac", machInst); - case MISCREG_DCCMVAU: - return new WarnUnimplemented( - isRead ? "mrc dccmvau" : "mcr dccmvau", machInst); case MISCREG_CP15ISB: - return new Isb(machInst); + return new Isb(machInst, iss); case MISCREG_CP15DSB: - return new Dsb(machInst); + return new Dsb(machInst, iss); case MISCREG_CP15DMB: - return new Dmb(machInst); - case MISCREG_ICIALLUIS: - return new WarnUnimplemented( - isRead ? "mrc icialluis" : "mcr icialluis", machInst); - case MISCREG_ICIMVAU: - return new WarnUnimplemented( - isRead ? "mrc icimvau" : "mcr icimvau", machInst); - case MISCREG_BPIMVA: - return new WarnUnimplemented( - isRead ? "mrc bpimva" : "mcr bpimva", machInst); - case MISCREG_BPIALLIS: - return new WarnUnimplemented( - isRead ? "mrc bpiallis" : "mcr bpiallis", machInst); - case MISCREG_BPIALL: - return new WarnUnimplemented( - isRead ? "mrc bpiall" : "mcr bpiall", machInst); - case MISCREG_L2LATENCY: - return new WarnUnimplemented( - isRead ? "mrc l2latency" : "mcr l2latency", machInst); - case MISCREG_CRN15: - return new WarnUnimplemented( - isRead ? "mrc crn15" : "mcr crn15", machInst); - - // Write only. - case MISCREG_TLBIALLIS: - case MISCREG_TLBIMVAIS: - case MISCREG_TLBIASIDIS: - case MISCREG_TLBIMVAAIS: - case MISCREG_ITLBIALL: - case MISCREG_ITLBIMVA: - case MISCREG_ITLBIASID: - case MISCREG_DTLBIALL: - case MISCREG_DTLBIMVA: - case MISCREG_DTLBIASID: - case MISCREG_TLBIALL: - case MISCREG_TLBIMVA: - case MISCREG_TLBIASID: - case MISCREG_TLBIMVAA: - if (isRead) { - return new Unknown(machInst); - } else { - return new Mcr15(machInst, (IntRegIndex)miscReg, rt); - } + return new Dmb(machInst, iss); + default: + if (miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL]) { + std::string full_mnem = csprintf("%s %s", + isRead ? "mrc" : "mcr", miscRegName[miscReg]); + warn("\\tinstruction '%s' unimplemented\\n", full_mnem); - // Read only in user mode. - case MISCREG_TPIDRURO: - if (isRead) { - return new Mrc15User(machInst, rt, (IntRegIndex)miscReg); - } else { - return new Mcr15(machInst, (IntRegIndex)miscReg, rt); + // Remove the warn flag and set the implemented flag. This + // prevents the instruction warning a second time, it also + // means the instruction is actually generated. Actually + // creating the instruction to access an register that isn't + // implemented sounds a bit silly, but its required to get + // the correct behaviour for hyp traps and undef exceptions. + miscRegInfo[miscReg][MISCREG_IMPLEMENTED] = true; + miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL] = false; } - // Read/write in user mode. - case MISCREG_TPIDRURW: - if (isRead) { - return new Mrc15User(machInst, rt, (IntRegIndex)miscReg); + if (miscRegInfo[miscReg][MISCREG_IMPLEMENTED]) { + if (isRead) + return new Mrc15(machInst, rt, (IntRegIndex)miscReg, iss); + return new Mcr15(machInst, (IntRegIndex)miscReg, rt, iss); } else { - return new Mcr15User(machInst, (IntRegIndex)miscReg, rt); - } - - // Read/write, priveleged only. - default: - if (miscReg >= MISCREG_CP15_UNIMP_START) return new FailUnimplemented(csprintf("%s %s", isRead ? "mrc" : "mcr", miscRegName[miscReg]).c_str(), machInst); - if (isRead) { - return new Mrc15(machInst, rt, (IntRegIndex)miscReg); - } else { - return new Mcr15(machInst, (IntRegIndex)miscReg, rt); } } } @@ -248,3 +231,70 @@ def format McrMrc15() {{ return decodeMcrMrc15(machInst); ''' }}; + +let {{ + header_output = ''' + StaticInstPtr + decodeMcrrMrrc15(ExtMachInst machInst); + ''' + decoder_output = ''' + StaticInstPtr + decodeMcrrMrrc15(ExtMachInst machInst) + { + const uint32_t crm = bits(machInst, 3, 0); + const uint32_t opc1 = bits(machInst, 7, 4); + const MiscRegIndex miscReg = decodeCP15Reg64(crm, opc1); + const IntRegIndex rt = (IntRegIndex) (uint32_t) bits(machInst, 15, 12); + const IntRegIndex rt2 = (IntRegIndex) (uint32_t) bits(machInst, 19, 16); + + const bool isRead = bits(machInst, 20); + + switch (miscReg) { + case MISCREG_CP15_UNIMPL: + return new FailUnimplemented( + csprintf("miscreg crm:%d opc1:%d 64-bit %s unknown", + crm, opc1, isRead ? "read" : "write").c_str(), + machInst); + default: + if (miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL]) { + std::string full_mnem = csprintf("%s %s", + isRead ? "mrrc" : "mcrr", miscRegName[miscReg]); + warn("\\tinstruction '%s' unimplemented\\n", full_mnem); + + // Remove the warn flag and set the implemented flag. This + // prevents the instruction warning a second time, it also + // means the instruction is actually generated. Actually + // creating the instruction to access an register that isn't + // implemented sounds a bit silly, but its required to get + // the correct behaviour for hyp traps and undef exceptions. + miscRegInfo[miscReg][MISCREG_IMPLEMENTED] = true; + miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL] = false; + } + + if (miscRegInfo[miscReg][MISCREG_IMPLEMENTED]) { + uint32_t iss = mcrrMrrcIssBuild(isRead, crm, rt, rt2, opc1); + + if (isRead) + return new Mrrc15(machInst, (IntRegIndex) miscReg, rt2, rt, iss); + return new Mcrr15(machInst, rt2, rt, (IntRegIndex) miscReg, iss); + } else { + return new FailUnimplemented(csprintf("%s %s", + isRead ? "mrrc" : "mcrr", miscRegName[miscReg]).c_str(), + machInst); + } + } + } + ''' +}}; + +def format Mcrr15() {{ + decode_block = ''' + return decodeMcrrMrrc15(machInst); + ''' +}}; + +def format Mrrc15() {{ + decode_block = ''' + return decodeMcrrMrrc15(machInst); + ''' +}}; diff --git a/src/arch/arm/isa/formats/neon64.isa b/src/arch/arm/isa/formats/neon64.isa new file mode 100644 index 000000000..72bbd0c60 --- /dev/null +++ b/src/arch/arm/isa/formats/neon64.isa @@ -0,0 +1,2626 @@ +// Copyright (c) 2012-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Giacomo Gabrielli +// Mbou Eyole + +output header {{ +namespace Aarch64 +{ + // AdvSIMD three same + StaticInstPtr decodeNeon3Same(ExtMachInst machInst); + // AdvSIMD three different + StaticInstPtr decodeNeon3Diff(ExtMachInst machInst); + // AdvSIMD two-reg misc + StaticInstPtr decodeNeon2RegMisc(ExtMachInst machInst); + // AdvSIMD across lanes + StaticInstPtr decodeNeonAcrossLanes(ExtMachInst machInst); + // AdvSIMD copy + StaticInstPtr decodeNeonCopy(ExtMachInst machInst); + // AdvSIMD vector x indexed element + StaticInstPtr decodeNeonIndexedElem(ExtMachInst machInst); + // AdvSIMD modified immediate + StaticInstPtr decodeNeonModImm(ExtMachInst machInst); + // AdvSIMD shift by immediate + StaticInstPtr decodeNeonShiftByImm(ExtMachInst machInst); + // AdvSIMD TBL/TBX + StaticInstPtr decodeNeonTblTbx(ExtMachInst machInst); + // AdvSIMD ZIP/UZP/TRN + StaticInstPtr decodeNeonZipUzpTrn(ExtMachInst machInst); + // AdvSIMD EXT + StaticInstPtr decodeNeonExt(ExtMachInst machInst); + + // AdvSIMD scalar three same + StaticInstPtr decodeNeonSc3Same(ExtMachInst machInst); + // AdvSIMD scalar three different + StaticInstPtr decodeNeonSc3Diff(ExtMachInst machInst); + // AdvSIMD scalar two-reg misc + StaticInstPtr decodeNeonSc2RegMisc(ExtMachInst machInst); + // AdvSIMD scalar pairwise + StaticInstPtr decodeNeonScPwise(ExtMachInst machInst); + // AdvSIMD scalar copy + StaticInstPtr decodeNeonScCopy(ExtMachInst machInst); + // AdvSIMD scalar x indexed element + StaticInstPtr decodeNeonScIndexedElem(ExtMachInst machInst); + // AdvSIMD scalar shift by immediate + StaticInstPtr decodeNeonScShiftByImm(ExtMachInst machInst); + + // AdvSIMD load/store + StaticInstPtr decodeNeonMem(ExtMachInst machInst); +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeNeon3Same(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 15, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + uint8_t size_q = (size << 1) | q; + uint8_t sz_q = size_q & 0x3; + + switch (opcode) { + case 0x00: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x01: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg( + q, size, machInst, vd, vn, vm); + case 0x02: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x03: + switch (size) { + case 0x0: + if (u) { + if (q) + return new EorQX(machInst, vd, vn, vm); + else + return new EorDX(machInst, vd, vn, vm); + } else { + if (q) + return new AndQX(machInst, vd, vn, vm); + else + return new AndDX(machInst, vd, vn, vm); + } + case 0x1: + if (u) { + if (q) + return new BslQX(machInst, vd, vn, vm); + else + return new BslDX(machInst, vd, vn, vm); + } else { + if (q) + return new BicQX(machInst, vd, vn, vm); + else + return new BicDX(machInst, vd, vn, vm); + } + case 0x2: + if (u) { + if (q) + return new BitQX(machInst, vd, vn, vm); + else + return new BitDX(machInst, vd, vn, vm); + } else { + if (q) + return new OrrQX(machInst, vd, vn, vm); + else + return new OrrDX(machInst, vd, vn, vm); + } + case 0x3: + if (u) { + if (q) + return new BifQX(machInst, vd, vn, vm); + else + return new BifDX(machInst, vd, vn, vm); + } else { + if (q) + return new OrnQX(machInst, vd, vn, vm); + else + return new OrnDX(machInst, vd, vn, vm); + } + } + case 0x04: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x05: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg( + q, size, machInst, vd, vn, vm); + case 0x06: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg( + q, size, machInst, vd, vn, vm); + case 0x07: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg( + q, size, machInst, vd, vn, vm); + case 0x08: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg( + q, size, machInst, vd, vn, vm); + case 0x09: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg( + q, size, machInst, vd, vn, vm); + case 0x0a: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg( + q, size, machInst, vd, vn, vm); + case 0x0b: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg( + q, size, machInst, vd, vn, vm); + case 0x0c: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x0d: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x0e: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x0f: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x10: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + case 0x11: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + case 0x12: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x13: + if (size == 0x3 || (size != 0x0 && bits(machInst, 29))) + return new Unknown64(machInst); + if (u) { + if (q) + return new PmulQX(machInst, vd, vn, vm); + else + return new PmulDX(machInst, vd, vn, vm); + } else { + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + } + case 0x14: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x15: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x16: + if (size == 0x3 || size == 0x0) + return new Unknown64(machInst); + if (u) { + if (q) + return decodeNeonSThreeHAndWReg( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeHAndWReg( + size, machInst, vd, vn, vm); + } else { + if (q) + return decodeNeonSThreeHAndWReg( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeHAndWReg( + size, machInst, vd, vn, vm); + } + case 0x17: + if (u || size_q == 0x6) + return new Unknown64(machInst); + else + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + case 0x18: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + } + case 0x19: + if (size < 0x2) { + if (u || sz_q == 0x2) + return new Unknown64(machInst); + else + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u || sz_q == 0x2) + return new Unknown64(machInst); + else + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + } + case 0x1a: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + } + case 0x1b: + if (size < 0x2 && sz_q != 0x2) { + if (u) + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + } else { + return new Unknown64(machInst); + } + case 0x1c: + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + else + return new Unknown64(machInst); + } + case 0x1d: + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + else + return new Unknown64(machInst); + } else { + if (u) + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + else + return new Unknown64(machInst); + } + case 0x1e: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + } + case 0x1f: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return new Unknown64(machInst); + else + return decodeNeonUThreeFpReg( + q, size & 0x1, machInst, vd, vn, vm); + } + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeon3Diff(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 15, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + switch (opcode) { + case 0x0: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x1: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x2: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x3: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x4: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x5: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x6: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x7: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x8: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + case 0x9: + if (u || (size == 0x0 || size == 0x3)) { + return new Unknown64(machInst); + } else { + if (q) { + return decodeNeonSThreeHAndWReg( + size, machInst, vd, vn, vm); + } else { + return decodeNeonSThreeHAndWReg( + size, machInst, vd, vn, vm); + } + } + case 0xa: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + case 0xb: + if (u || (size == 0x0 || size == 0x3)) { + return new Unknown64(machInst); + } else { + if (q) { + return decodeNeonSThreeHAndWReg( + size, machInst, vd, vn, vm); + } else { + return decodeNeonSThreeHAndWReg( + size, machInst, vd, vn, vm); + } + } + case 0xc: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + case 0xd: + if (u || (size == 0x0 || size == 0x3)) { + return new Unknown64(machInst); + } else { + if (q) { + return decodeNeonSThreeHAndWReg( + size, machInst, vd, vn, vm); + } else { + return decodeNeonSThreeHAndWReg( + size, machInst, vd, vn, vm); + } + } + case 0xe: + if (u || size != 0) { + return new Unknown64(machInst); + } else { + if (q) + return new Pmull2X(machInst, vd, vn, vm); + else + return new PmullX(machInst, vd, vn, vm); + } + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeon2RegMisc(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 16, 12); + + IntRegIndex vd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + + uint8_t size_q = (size << 1) | q; + uint8_t sz_q = size_q & 0x3; + uint8_t op = (uint8_t)((bits(machInst, 12) << 1) | + bits(machInst, 29)); + uint8_t switchVal = opcode | ((u ? 1 : 0) << 5); + + switch (switchVal) { + case 0x00: + if (op + size >= 3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg( + q, size, machInst, vd, vn); + case 0x01: + if (op + size >= 3) + return new Unknown64(machInst); + if (q) + return new Rev16QX(machInst, vd, vn); + else + return new Rev16DX(machInst, vd, vn); + case 0x02: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg( + q, size, machInst, vd, vn); + case 0x03: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonUTwoMiscXReg( + q, size, machInst, vd, vn); + case 0x04: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg( + q, size, machInst, vd, vn); + case 0x05: + if (size != 0x0) + return new Unknown64(machInst); + if (q) + return new CntQX(machInst, vd, vn); + else + return new CntDX(machInst, vd, vn); + case 0x06: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg( + q, size, machInst, vd, vn); + case 0x07: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg( + q, size, machInst, vd, vn); + case 0x08: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg( + q, size, machInst, vd, vn); + case 0x09: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg( + q, size, machInst, vd, vn); + case 0x0a: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg( + q, size, machInst, vd, vn); + case 0x0b: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg( + q, size, machInst, vd, vn); + case 0x0c: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + case 0x0d: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + case 0x0e: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + case 0x0f: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + case 0x12: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg( + q, size, machInst, vd, vn); + case 0x14: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg( + q, size, machInst, vd, vn); + case 0x16: + if (size > 0x1) + return new Unknown64(machInst); + if (q) { + if (size) + return new Fcvtn2X(machInst, vd, vn); + else + return new Fcvtn2X(machInst, vd, vn); + } else { + if (size) + return new FcvtnX(machInst, vd, vn); + else + return new FcvtnX(machInst, vd, vn); + } + case 0x17: + if (size > 0x1) + return new Unknown64(machInst); + if (q) { + if (size) + return new Fcvtl2X(machInst, vd, vn); + else + return new Fcvtl2X(machInst, vd, vn); + } else { + if (size) + return new FcvtlX(machInst, vd, vn); + else + return new FcvtlX(machInst, vd, vn); + } + case 0x18: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + case 0x19: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + case 0x1a: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + case 0x1b: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + case 0x1c: + if (size < 0x2) { + if (sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + } else { + if (size & 0x1) + return new Unknown64(machInst); + if (q) + return new UrecpeQX(machInst, vd, vn); + else + return new UrecpeDX(machInst, vd, vn); + } + case 0x1d: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (q) { + if (size & 0x1) + return new ScvtfIntDQX(machInst, vd, vn); + else + return new ScvtfIntSQX(machInst, vd, vn); + } else { + if (size & 0x1) + return new Unknown(machInst); + else + return new ScvtfIntDX(machInst, vd, vn); + } + } else { + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + } + case 0x20: + if (op + size >= 3) + return new Unknown64(machInst); + if (q) { + if (size & 0x1) + return new Rev32QX(machInst, vd, vn); + else + return new Rev32QX(machInst, vd, vn); + } else { + if (size & 0x1) + return new Rev32DX(machInst, vd, vn); + else + return new Rev32DX(machInst, vd, vn); + } + case 0x22: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg( + q, size, machInst, vd, vn); + case 0x23: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonUTwoMiscXReg( + q, size, machInst, vd, vn); + return new Unknown64(machInst); + case 0x24: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg( + q, size, machInst, vd, vn); + case 0x25: + if (size == 0x0) { + if (q) + return new MvnQX(machInst, vd, vn); + else + return new MvnDX(machInst, vd, vn); + } else if (size == 0x1) { + if (q) + return new RbitQX(machInst, vd, vn); + else + return new RbitDX(machInst, vd, vn); + } else { + return new Unknown64(machInst); + } + case 0x26: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg( + q, size, machInst, vd, vn); + case 0x27: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg( + q, size, machInst, vd, vn); + case 0x28: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg( + q, size, machInst, vd, vn); + case 0x29: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg( + q, size, machInst, vd, vn); + case 0x2b: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg( + q, size, machInst, vd, vn); + case 0x2c: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + case 0x2d: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + case 0x2f: + if (size < 0x2 || size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + case 0x32: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg( + q, size, machInst, vd, vn); + case 0x33: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg( + q, size, machInst, vd, vn); + case 0x34: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg( + q, size, machInst, vd, vn); + case 0x36: + if (size != 0x1) + return new Unknown64(machInst); + if (q) + return new Fcvtxn2X(machInst, vd, vn); + else + return new FcvtxnX(machInst, vd, vn); + case 0x38: + if (size > 0x1 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + case 0x39: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + case 0x3a: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + case 0x3b: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + case 0x3c: + if (size < 0x2) { + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + } else if (size == 0x2) { + if (q) + return new UrsqrteQX(machInst, vd, vn); + else + return new UrsqrteDX(machInst, vd, vn); + } else { + return new Unknown64(machInst); + } + case 0x3d: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + case 0x3f: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg( + q, size & 0x1, machInst, vd, vn); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonAcrossLanes(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 16, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t size_q = (size << 1) | q; + uint8_t sz_q = size_q & 0x3; + uint8_t switchVal = opcode | ((u ? 1 : 0) << 5); + + switch (switchVal) { + case 0x03: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonSAcrossLanesLongReg( + q, size, machInst, vd, vn); + case 0x0a: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonSAcrossLanesReg( + q, size, machInst, vd, vn); + case 0x1a: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonSAcrossLanesReg( + q, size, machInst, vd, vn); + case 0x1b: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonUAcrossLanesReg( + q, size, machInst, vd, vn); + case 0x23: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonUAcrossLanesLongReg( + q, size, machInst, vd, vn); + case 0x2a: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonUAcrossLanesReg( + q, size, machInst, vd, vn); + case 0x2c: + if (sz_q != 0x1) + return new Unknown64(machInst); + if (size < 0x2) { + if (q) + return new FmaxnmvQX(machInst, vd, vn); + else + return new Unknown64(machInst); + } else { + if (q) + return new FminnmvQX(machInst, vd, vn); + else + return new Unknown64(machInst); + } + case 0x2f: + if (sz_q != 0x1) + return new Unknown64(machInst); + if (size < 0x2) { + if (q) + return new FmaxvQX(machInst, vd, vn); + else + return new Unknown64(machInst); + } else { + if (q) + return new FminvQX(machInst, vd, vn); + else + return new Unknown64(machInst); + } + case 0x3a: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonUAcrossLanesReg( + q, size, machInst, vd, vn); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonCopy(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t op = bits(machInst, 29); + uint8_t imm5 = bits(machInst, 20, 16); + uint8_t imm4 = bits(machInst, 14, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t imm5_pos = findLsbSet(imm5); + uint8_t index1 = 0, index2 = 0; + + if (op) { + if (!q || (imm4 & mask(imm5_pos))) + return new Unknown64(machInst); + + index1 = bits(imm5, 4, imm5_pos + 1); // dst + index2 = bits(imm4, 3, imm5_pos); // src + + switch (imm5_pos) { + case 0: + return new InsElemX(machInst, vd, vn, index1, index2); + case 1: + return new InsElemX(machInst, vd, vn, index1, index2); + case 2: + return new InsElemX(machInst, vd, vn, index1, index2); + case 3: + return new InsElemX(machInst, vd, vn, index1, index2); + default: + return new Unknown64(machInst); + } + } + + switch (imm4) { + case 0x0: + index1 = bits(imm5, 4, imm5_pos + 1); + switch (imm5_pos) { + case 0: + if (q) + return new DupElemQX(machInst, vd, vn, index1); + else + return new DupElemDX(machInst, vd, vn, index1); + case 1: + if (q) + return new DupElemQX(machInst, vd, vn, index1); + else + return new DupElemDX(machInst, vd, vn, index1); + case 2: + if (q) + return new DupElemQX(machInst, vd, vn, index1); + else + return new DupElemDX(machInst, vd, vn, index1); + case 3: + if (q) + return new DupElemQX(machInst, vd, vn, index1); + else + return new Unknown64(machInst); + default: + return new Unknown64(machInst); + } + case 0x1: + switch (imm5) { + case 0x1: + if (q) + return new DupGprWQX(machInst, vd, vn); + else + return new DupGprWDX(machInst, vd, vn); + case 0x2: + if (q) + return new DupGprWQX(machInst, vd, vn); + else + return new DupGprWDX(machInst, vd, vn); + case 0x4: + if (q) + return new DupGprWQX(machInst, vd, vn); + else + return new DupGprWDX(machInst, vd, vn); + case 0x8: + if (q) + return new DupGprXQX(machInst, vd, vn); + else + return new Unknown64(machInst); + } + case 0x3: + index1 = imm5 >> (imm5_pos + 1); + switch (imm5_pos) { + case 0: + return new InsGprWX(machInst, vd, vn, index1); + case 1: + return new InsGprWX(machInst, vd, vn, index1); + case 2: + return new InsGprWX(machInst, vd, vn, index1); + case 3: + return new InsGprXX(machInst, vd, vn, index1); + default: + return new Unknown64(machInst); + } + case 0x5: + index1 = bits(imm5, 4, imm5_pos + 1); + switch (imm5_pos) { + case 0: + if (q) + return new SmovXX(machInst, vd, vn, index1); + else + return new SmovWX(machInst, vd, vn, index1); + case 1: + if (q) + return new SmovXX(machInst, vd, vn, index1); + else + return new SmovWX(machInst, vd, vn, index1); + case 2: + if (q) + return new SmovXX(machInst, vd, vn, index1); + else + return new Unknown64(machInst); + default: + return new Unknown64(machInst); + } + case 0x7: + index1 = imm5 >> (imm5_pos + 1); + + if ((q && imm5_pos != 3) || (!q && imm5_pos >= 3)) + return new Unknown64(machInst); + + switch (imm5_pos) { + case 0: + return new UmovWX(machInst, vd, vn, index1); + case 1: + return new UmovWX(machInst, vd, vn, index1); + case 2: + return new UmovWX(machInst, vd, vn, index1); + case 3: + return new UmovXX(machInst, vd, vn, index1); + default: + return new Unknown64(machInst); + } + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonIndexedElem(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t L = bits(machInst, 21); + uint8_t M = bits(machInst, 20); + uint8_t opcode = bits(machInst, 15, 12); + uint8_t H = bits(machInst, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm_bf = (IntRegIndex) (uint8_t) bits(machInst, 19, 16); + + uint8_t index = 0; + uint8_t index_fp = 0; + uint8_t vmh = 0; + uint8_t sz = size & 0x1; + uint8_t sz_q = (sz << 1) | bits(machInst, 30); + uint8_t sz_L = (sz << 1) | L; + + // Index and 2nd register operand for integer instructions + if (size == 0x1) { + index = (H << 2) | (L << 1) | M; + // vmh = 0; + } else if (size == 0x2) { + index = (H << 1) | L; + vmh = M; + } + IntRegIndex vm = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf); + + // Index and 2nd register operand for FP instructions + vmh = M; + if ((size & 0x1) == 0) { + index_fp = (H << 1) | L; + } else if (L == 0) { + index_fp = H; + } + IntRegIndex vm_fp = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf); + + switch (opcode) { + case 0x0: + if (!u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonUThreeImmHAndWReg( + q, size, machInst, vd, vn, vm, index); + case 0x1: + if (!u && size >= 2 && sz_q != 0x2 && sz_L != 0x3) + return decodeNeonUThreeImmFpReg( + q, sz, machInst, vd, vn, vm_fp, index_fp); + else + return new Unknown64(machInst); + case 0x2: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeImmHAndWReg( + q, size, machInst, vd, vn, vm, index); + else + return decodeNeonSThreeImmHAndWReg( + q, size, machInst, vd, vn, vm, index); + case 0x3: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg( + q, size, machInst, vd, vn, vm, index); + case 0x4: + if (u && !(size == 0x0 || size == 0x3)) + return decodeNeonUThreeImmHAndWReg( + q, size, machInst, vd, vn, vm, index); + else + return new Unknown64(machInst); + case 0x5: + if (!u && size >= 0x2 && sz_L != 0x3 && sz_q != 0x2) + return decodeNeonUThreeImmFpReg( + q, sz, machInst, vd, vn, vm_fp, index_fp); + else + return new Unknown64(machInst); + case 0x6: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeImmHAndWReg( + q, size, machInst, vd, vn, vm, index); + else + return decodeNeonSThreeImmHAndWReg( + q, size, machInst, vd, vn, vm, index); + case 0x7: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg( + q, size, machInst, vd, vn, vm, index); + case 0x8: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonUThreeImmHAndWReg( + q, size, machInst, vd, vn, vm, index); + case 0x9: + if (size >= 2 && sz_q != 0x2 && sz_L != 0x3) { + if (u) + return decodeNeonUThreeImmFpReg( + q, sz, machInst, vd, vn, vm_fp, index_fp); + else + return decodeNeonUThreeImmFpReg( + q, sz, machInst, vd, vn, vm_fp, index_fp); + } else { + return new Unknown64(machInst); + } + case 0xa: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeImmHAndWReg( + q, size, machInst, vd, vn, vm, index); + else + return decodeNeonSThreeImmHAndWReg( + q, size, machInst, vd, vn, vm, index); + case 0xb: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg( + q, size, machInst, vd, vn, vm, index); + case 0xc: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg( + q, size, machInst, vd, vn, vm, index); + case 0xd: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg( + q, size, machInst, vd, vn, vm, index); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonModImm(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t op = bits(machInst, 29); + uint8_t abcdefgh = (bits(machInst, 18, 16) << 5) | + bits(machInst, 9, 5); + uint8_t cmode = bits(machInst, 15, 12); + uint8_t o2 = bits(machInst, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + + if (o2 == 0x1 || (op == 0x1 && cmode == 0xf && !q)) + return new Unknown64(machInst); + + bool immValid = true; + const uint64_t bigImm = simd_modified_imm(op, cmode, abcdefgh, + immValid, + true /* isAarch64 */); + if (!immValid) { + return new Unknown(machInst); + } + + if (op) { + if (bits(cmode, 3) == 0) { + if (bits(cmode, 0) == 0) { + if (q) + return new MvniQX(machInst, vd, bigImm); + else + return new MvniDX(machInst, vd, bigImm); + } else { + if (q) + return new BicImmQX(machInst, vd, bigImm); + else + return new BicImmDX(machInst, vd, bigImm); + } + } else { + if (bits(cmode, 2) == 1) { + switch (bits(cmode, 1, 0)) { + case 0: + case 1: + if (q) + return new MvniQX(machInst, vd, bigImm); + else + return new MvniDX(machInst, vd, bigImm); + case 2: + if (q) + return new MoviQX(machInst, vd, bigImm); + else + return new MoviDX(machInst, vd, bigImm); + case 3: + if (q) + return new FmovQX(machInst, vd, bigImm); + else + return new MoviDX(machInst, vd, bigImm); + } + } else { + if (bits(cmode, 0) == 0) { + if (q) + return new MvniQX(machInst, vd, bigImm); + else + return new MvniDX(machInst, vd, bigImm); + } else { + if (q) + return new BicImmQX(machInst, vd, + bigImm); + else + return new BicImmDX(machInst, vd, + bigImm); + } + } + } + } else { + if (bits(cmode, 3) == 0) { + if (bits(cmode, 0) == 0) { + if (q) + return new MoviQX(machInst, vd, bigImm); + else + return new MoviDX(machInst, vd, bigImm); + } else { + if (q) + return new OrrImmQX(machInst, vd, bigImm); + else + return new OrrImmDX(machInst, vd, bigImm); + } + } else { + if (bits(cmode, 2) == 1) { + if (bits(cmode, 1, 0) == 0x3) { + if (q) + return new FmovQX(machInst, vd, bigImm); + else + return new FmovDX(machInst, vd, bigImm); + } else { + if (q) + return new MoviQX(machInst, vd, bigImm); + else + return new MoviDX(machInst, vd, bigImm); + } + } else { + if (bits(cmode, 0) == 0) { + if (q) + return new MoviQX(machInst, vd, bigImm); + else + return new MoviDX(machInst, vd, bigImm); + } else { + if (q) + return new OrrImmQX(machInst, vd, + bigImm); + else + return new OrrImmDX(machInst, vd, bigImm); + } + } + } + } + return new Unknown(machInst); + } + + StaticInstPtr + decodeNeonShiftByImm(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t immh = bits(machInst, 22, 19); + uint8_t immb = bits(machInst, 18, 16); + uint8_t opcode = bits(machInst, 15, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t immh3 = bits(machInst, 22); + uint8_t immh3_q = (immh3 << 1) | q; + uint8_t op_u = (bits(machInst, 12) << 1) | u; + uint8_t size = findMsbSet(immh); + int shiftAmt = 0; + + switch (opcode) { + case 0x00: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftXReg( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg( + q, size, machInst, vd, vn, shiftAmt); + case 0x02: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftXReg( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg( + q, size, machInst, vd, vn, shiftAmt); + case 0x04: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftXReg( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg( + q, size, machInst, vd, vn, shiftAmt); + case 0x06: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftXReg( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg( + q, size, machInst, vd, vn, shiftAmt); + case 0x08: + if (u && !(immh3_q == 0x2)) { + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + return decodeNeonUTwoShiftXReg( + q, size, machInst, vd, vn, shiftAmt); + } else { + return new Unknown64(machInst); + } + case 0x0a: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return decodeNeonUTwoShiftXReg( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftXReg( + q, size, machInst, vd, vn, shiftAmt); + case 0x0c: + if (u && !(immh3_q == 0x2 || op_u == 0x0)) { + shiftAmt = ((immh << 3) | immb) - (8 << size); + return decodeNeonSTwoShiftXReg( + q, size, machInst, vd, vn, shiftAmt); + } else { + return new Unknown64(machInst); + } + case 0x0e: + if (immh3_q == 0x2 || op_u == 0x0) + return new Unknown64(machInst); + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return decodeNeonUTwoShiftXReg( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg( + q, size, machInst, vd, vn, shiftAmt); + case 0x10: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonSTwoShiftSReg( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftSReg( + q, size, machInst, vd, vn, shiftAmt); + case 0x11: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonSTwoShiftSReg( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftSReg( + q, size, machInst, vd, vn, shiftAmt); + case 0x12: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftSReg( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftSReg( + q, size, machInst, vd, vn, shiftAmt); + case 0x13: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftSReg( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftSReg( + q, size, machInst, vd, vn, shiftAmt); + case 0x14: + if (immh3) + return new Unknown64(machInst); + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return decodeNeonUTwoShiftSReg( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftSReg( + q, size, machInst, vd, vn, shiftAmt); + case 0x1c: + if (immh < 0x4 || immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) { + return decodeNeonUTwoShiftFpReg( + q, size & 0x1, machInst, vd, vn, shiftAmt); + } else { + if (q) { + if (size & 0x1) + return new ScvtfFixedDQX(machInst, vd, vn, + shiftAmt); + else + return new ScvtfFixedSQX(machInst, vd, vn, + shiftAmt); + } else { + if (size & 0x1) + return new Unknown(machInst); + else + return new ScvtfFixedDX(machInst, vd, vn, + shiftAmt); + } + } + case 0x1f: + if (immh < 0x4 || immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftFpReg( + q, size & 0x1, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftFpReg( + q, size & 0x1, machInst, vd, vn, shiftAmt); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonTblTbx(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + uint8_t switchVal = bits(machInst, 14, 12); + + switch (switchVal) { + case 0x0: + if (q) + return new Tbl1QX(machInst, vd, vn, vm); + else + return new Tbl1DX(machInst, vd, vn, vm); + case 0x1: + if (q) + return new Tbx1QX(machInst, vd, vn, vm); + else + return new Tbx1DX(machInst, vd, vn, vm); + case 0x2: + if (q) + return new Tbl2QX(machInst, vd, vn, vm); + else + return new Tbl2DX(machInst, vd, vn, vm); + case 0x3: + if (q) + return new Tbx2QX(machInst, vd, vn, vm); + else + return new Tbx2DX(machInst, vd, vn, vm); + case 0x4: + if (q) + return new Tbl3QX(machInst, vd, vn, vm); + else + return new Tbl3DX(machInst, vd, vn, vm); + case 0x5: + if (q) + return new Tbx3QX(machInst, vd, vn, vm); + else + return new Tbx3DX(machInst, vd, vn, vm); + case 0x6: + if (q) + return new Tbl4QX(machInst, vd, vn, vm); + else + return new Tbl4DX(machInst, vd, vn, vm); + case 0x7: + if (q) + return new Tbx4QX(machInst, vd, vn, vm); + else + return new Tbx4DX(machInst, vd, vn, vm); + default: + return new Unknown64(machInst); + } + + return new Unknown64(machInst); + } + + StaticInstPtr + decodeNeonZipUzpTrn(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 14, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + switch (opcode) { + case 0x1: + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + case 0x2: + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + case 0x3: + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + case 0x5: + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + case 0x6: + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + case 0x7: + return decodeNeonUThreeXReg( + q, size, machInst, vd, vn, vm); + default: + return new Unknown64(machInst); + } + return new Unknown64(machInst); + } + + StaticInstPtr + decodeNeonExt(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t op2 = bits(machInst, 23, 22); + uint8_t imm4 = bits(machInst, 14, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + if (op2 != 0 || (q == 0x0 && bits(imm4, 3) == 0x1)) + return new Unknown64(machInst); + + uint8_t index = q ? imm4 : imm4 & 0x7; + + if (q) { + return new ExtQX(machInst, vd, vn, vm, index); + } else { + return new ExtDX(machInst, vd, vn, vm, index); + } + } + + StaticInstPtr + decodeNeonSc3Same(ExtMachInst machInst) + { + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 15, 11); + uint8_t s = bits(machInst, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + switch (opcode) { + case 0x01: + if (u) + return decodeNeonUThreeUReg( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeUReg( + size, machInst, vd, vn, vm); + case 0x05: + if (u) + return decodeNeonUThreeUReg( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeUReg( + size, machInst, vd, vn, vm); + case 0x06: + if (size != 0x3) + return new Unknown64(machInst); + if (u) + return new CmhiDX(machInst, vd, vn, vm); + else + return new CmgtDX(machInst, vd, vn, vm); + case 0x07: + if (size != 0x3) + return new Unknown64(machInst); + if (u) + return new CmhsDX(machInst, vd, vn, vm); + else + return new CmgeDX(machInst, vd, vn, vm); + case 0x08: + if (!s && size != 0x3) + return new Unknown64(machInst); + if (u) + return new UshlDX(machInst, vd, vn, vm); + else + return new SshlDX(machInst, vd, vn, vm); + case 0x09: + if (!s && size != 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeUReg( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeUReg( + size, machInst, vd, vn, vm); + case 0x0a: + if (!s && size != 0x3) + return new Unknown64(machInst); + if (u) + return new UrshlDX(machInst, vd, vn, vm); + else + return new SrshlDX(machInst, vd, vn, vm); + case 0x0b: + if (!s && size != 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeUReg( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeUReg( + size, machInst, vd, vn, vm); + case 0x10: + if (size != 0x3) + return new Unknown64(machInst); + if (u) + return new SubDX(machInst, vd, vn, vm); + else + return new AddDX(machInst, vd, vn, vm); + case 0x11: + if (size != 0x3) + return new Unknown64(machInst); + if (u) + return new CmeqDX(machInst, vd, vn, vm); + else + return new CmtstDX(machInst, vd, vn, vm); + case 0x16: + if (size == 0x3 || size == 0x0) + return new Unknown64(machInst); + if (u) + return decodeNeonSThreeHAndWReg( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeHAndWReg( + size, machInst, vd, vn, vm); + case 0x1a: + if (!u || size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUThreeScFpReg( + size & 0x1, machInst, vd, vn, vm); + case 0x1b: + if (u || size > 0x1) + return new Unknown64(machInst); + else + return decodeNeonUThreeScFpReg( + size & 0x1, machInst, vd, vn, vm); + case 0x1c: + if (size < 0x2) { + if (u) + return decodeNeonUThreeScFpReg( + size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeScFpReg( + size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeScFpReg( + size & 0x1, machInst, vd, vn, vm); + else + return new Unknown64(machInst); + } + case 0x1d: + if (!u) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUThreeScFpReg( + size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeScFpReg( + size & 0x1, machInst, vd, vn, vm); + case 0x1f: + if (u) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUThreeScFpReg( + size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeScFpReg( + size & 0x1, machInst, vd, vn, vm); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonSc3Diff(ExtMachInst machInst) + { + if (bits(machInst, 29)) + return new Unknown64(machInst); + + uint8_t size = bits(machInst, 23, 22); + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + + uint8_t opcode = bits(machInst, 15, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + switch (opcode) { + case 0x9: + return decodeNeonSThreeHAndWReg(size, machInst, vd, vn, vm); + case 0xb: + return decodeNeonSThreeHAndWReg(size, machInst, vd, vn, vm); + case 0xd: + return decodeNeonSThreeHAndWReg(size, machInst, vd, vn, vm); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonSc2RegMisc(ExtMachInst machInst) + { + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 16, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t switchVal = opcode | ((u ? 1 : 0) << 5); + switch (switchVal) { + case 0x03: + return decodeNeonUTwoMiscUReg(size, machInst, vd, vn); + case 0x07: + return decodeNeonSTwoMiscUReg(size, machInst, vd, vn); + case 0x08: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmgtZeroDX(machInst, vd, vn); + case 0x09: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmeqZeroDX(machInst, vd, vn); + case 0x0a: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmltZeroDX(machInst, vd, vn); + case 0x0b: + if (size != 0x3) + return new Unknown64(machInst); + else + return new AbsDX(machInst, vd, vn); + case 0x0c: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + case 0x0d: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + case 0x0e: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + case 0x14: + if (size == 0x3) { + return new Unknown64(machInst); + } else { + switch (size) { + case 0x0: + return new SqxtnScX(machInst, vd, vn); + case 0x1: + return new SqxtnScX(machInst, vd, vn); + case 0x2: + return new SqxtnScX(machInst, vd, vn); + } + } + case 0x1a: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + case 0x1b: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + case 0x1c: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + else + return new Unknown64(machInst); + case 0x1d: + if (size < 0x2) { + if (size & 0x1) + return new ScvtfIntScDX(machInst, vd, vn); + else + return new ScvtfIntScSX(machInst, vd, vn); + } else { + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + } + case 0x1f: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + case 0x23: + return decodeNeonUTwoMiscUReg(size, machInst, vd, vn); + case 0x27: + return decodeNeonSTwoMiscUReg(size, machInst, vd, vn); + case 0x28: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmgeZeroDX(machInst, vd, vn); + case 0x29: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmleZeroDX(machInst, vd, vn); + case 0x2b: + if (size != 0x3) + return new Unknown64(machInst); + else + return new NegDX(machInst, vd, vn); + case 0x2c: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + case 0x2d: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + case 0x32: + if (size == 0x3) { + return new Unknown64(machInst); + } else { + switch (size) { + case 0x0: + return new SqxtunScX(machInst, vd, vn); + case 0x1: + return new SqxtunScX(machInst, vd, vn); + case 0x2: + return new SqxtunScX(machInst, vd, vn); + } + } + case 0x34: + if (size == 0x3) { + return new Unknown64(machInst); + } else { + switch (size) { + case 0x0: + return new UqxtnScX(machInst, vd, vn); + case 0x1: + return new UqxtnScX(machInst, vd, vn); + case 0x2: + return new UqxtnScX(machInst, vd, vn); + } + } + case 0x36: + if (size != 0x1) { + return new Unknown64(machInst); + } else { + return new FcvtxnScX(machInst, vd, vn); + } + case 0x3a: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + case 0x3b: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + case 0x3c: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + else + return new Unknown64(machInst); + case 0x3d: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg( + size & 0x1, machInst, vd, vn); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonScPwise(ExtMachInst machInst) + { + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 16, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + if (!u) { + if (opcode == 0x1b && size == 0x3) + return new AddpScQX(machInst, vd, vn); + else + return new Unknown64(machInst); + } + + uint8_t switchVal = (opcode << 0) | (size << 5); + switch (switchVal) { + case 0x0c: + case 0x2c: + return decodeNeonUTwoMiscPwiseScFpReg( + size & 0x1, machInst, vd, vn); + case 0x0d: + case 0x2d: + return decodeNeonUTwoMiscPwiseScFpReg( + size & 0x1, machInst, vd, vn); + case 0x0f: + case 0x2f: + return decodeNeonUTwoMiscPwiseScFpReg( + size & 0x1, machInst, vd, vn); + case 0x4c: + case 0x6c: + return decodeNeonUTwoMiscPwiseScFpReg( + size & 0x1, machInst, vd, vn); + case 0x4f: + case 0x6f: + return decodeNeonUTwoMiscPwiseScFpReg( + size & 0x1, machInst, vd, vn); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonScCopy(ExtMachInst machInst) + { + if (bits(machInst, 14, 11) != 0 || bits(machInst, 29)) + return new Unknown64(machInst); + + uint8_t imm5 = bits(machInst, 20, 16); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t size = findLsbSet(imm5); + uint8_t index = bits(imm5, 4, size + 1); + + return decodeNeonUTwoShiftUReg( + size, machInst, vd, vn, index); + } + + StaticInstPtr + decodeNeonScIndexedElem(ExtMachInst machInst) + { + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t L = bits(machInst, 21); + uint8_t M = bits(machInst, 20); + uint8_t opcode = bits(machInst, 15, 12); + uint8_t H = bits(machInst, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm_bf = (IntRegIndex) (uint8_t) bits(machInst, 19, 16); + + uint8_t index = 0; + uint8_t index_fp = 0; + uint8_t vmh = 0; + uint8_t sz_L = bits(machInst, 22, 21); + + // Index and 2nd register operand for integer instructions + if (size == 0x1) { + index = (H << 2) | (L << 1) | M; + // vmh = 0; + } else if (size == 0x2) { + index = (H << 1) | L; + vmh = M; + } else if (size == 0x3) { + index = H; + vmh = M; + } + IntRegIndex vm = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf); + + // Index and 2nd register operand for FP instructions + vmh = M; + if ((size & 0x1) == 0) { + index_fp = (H << 1) | L; + } else if (L == 0) { + index_fp = H; + } + IntRegIndex vm_fp = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf); + + if (u && opcode != 9) + return new Unknown64(machInst); + + switch (opcode) { + case 0x1: + if (size < 2 || sz_L == 0x3) + return new Unknown64(machInst); + else + return decodeNeonUThreeImmScFpReg( + size & 0x1, machInst, vd, vn, vm_fp, index_fp); + case 0x3: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg( + size, machInst, vd, vn, vm, index); + case 0x5: + if (size < 2 || sz_L == 0x3) + return new Unknown64(machInst); + else + return decodeNeonUThreeImmScFpReg( + size & 0x1, machInst, vd, vn, vm_fp, index_fp); + case 0x7: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg( + size, machInst, vd, vn, vm, index); + case 0x9: + if (size < 2 || sz_L == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeImmScFpReg( + size & 0x1, machInst, vd, vn, vm_fp, index_fp); + else + return decodeNeonUThreeImmScFpReg( + size & 0x1, machInst, vd, vn, vm_fp, index_fp); + case 0xb: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg( + size, machInst, vd, vn, vm, index); + case 0xc: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg( + size, machInst, vd, vn, vm, index); + case 0xd: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg( + size, machInst, vd, vn, vm, index); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonScShiftByImm(ExtMachInst machInst) + { + bool u = bits(machInst, 29); + uint8_t immh = bits(machInst, 22, 19); + uint8_t immb = bits(machInst, 18, 16); + uint8_t opcode = bits(machInst, 15, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t immh3 = bits(machInst, 22); + uint8_t size = findMsbSet(immh); + int shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + + if (immh == 0x0) + return new Unknown64(machInst); + + switch (opcode) { + case 0x00: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new UshrDX(machInst, vd, vn, shiftAmt); + else + return new SshrDX(machInst, vd, vn, shiftAmt); + case 0x02: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new UsraDX(machInst, vd, vn, shiftAmt); + else + return new SsraDX(machInst, vd, vn, shiftAmt); + case 0x04: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new UrshrDX(machInst, vd, vn, shiftAmt); + else + return new SrshrDX(machInst, vd, vn, shiftAmt); + case 0x06: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new UrsraDX(machInst, vd, vn, shiftAmt); + else + return new SrsraDX(machInst, vd, vn, shiftAmt); + case 0x08: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new SriDX(machInst, vd, vn, shiftAmt); + else + return new Unknown64(machInst); + case 0x0a: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return new SliDX(machInst, vd, vn, shiftAmt); + else + return new ShlDX(machInst, vd, vn, shiftAmt); + case 0x0c: + if (u) { + shiftAmt = ((immh << 3) | immb) - (8 << size); + return decodeNeonSTwoShiftUReg( + size, machInst, vd, vn, shiftAmt); + } else { + return new Unknown64(machInst); + } + case 0x0e: + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return decodeNeonUTwoShiftUReg( + size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftUReg( + size, machInst, vd, vn, shiftAmt); + case 0x10: + if (!u || immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + return decodeNeonSTwoShiftUSReg( + size, machInst, vd, vn, shiftAmt); + case 0x11: + if (!u || immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + return decodeNeonSTwoShiftUSReg( + size, machInst, vd, vn, shiftAmt); + case 0x12: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftUSReg( + size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftUSReg( + size, machInst, vd, vn, shiftAmt); + case 0x13: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftUSReg( + size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftUSReg( + size, machInst, vd, vn, shiftAmt); + case 0x1c: + if (immh < 0x4) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) { + return decodeNeonUTwoShiftUFpReg( + size & 0x1, machInst, vd, vn, shiftAmt); + } else { + if (size & 0x1) + return new ScvtfFixedScDX(machInst, vd, vn, + shiftAmt); + else + return new ScvtfFixedScSX(machInst, vd, vn, + shiftAmt); + } + case 0x1f: + if (immh < 0x4) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftUFpReg( + size & 0x1, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftUFpReg( + size & 0x1, machInst, vd, vn, shiftAmt); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonMem(ExtMachInst machInst) + { + uint8_t dataSize = bits(machInst, 30) ? 128 : 64; + bool multiple = bits(machInst, 24, 23) < 0x2; + bool load = bits(machInst, 22); + + uint8_t numStructElems = 0; + uint8_t numRegs = 0; + + if (multiple) { // AdvSIMD load/store multiple structures + uint8_t opcode = bits(machInst, 15, 12); + uint8_t eSize = bits(machInst, 11, 10); + bool wb = !(bits(machInst, 20, 16) == 0x0 && !bits(machInst, 23)); + + switch (opcode) { + case 0x0: // LD/ST4 (4 regs) + numStructElems = 4; + numRegs = 4; + break; + case 0x2: // LD/ST1 (4 regs) + numStructElems = 1; + numRegs = 4; + break; + case 0x4: // LD/ST3 (3 regs) + numStructElems = 3; + numRegs = 3; + break; + case 0x6: // LD/ST1 (3 regs) + numStructElems = 1; + numRegs = 3; + break; + case 0x7: // LD/ST1 (1 reg) + numStructElems = 1; + numRegs = 1; + break; + case 0x8: // LD/ST2 (2 regs) + numStructElems = 2; + numRegs = 2; + break; + case 0xa: // LD/ST1 (2 regs) + numStructElems = 1; + numRegs = 2; + break; + default: + return new Unknown64(machInst); + } + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + if (load) { + return new VldMult64(machInst, rn, vd, rm, eSize, dataSize, + numStructElems, numRegs, wb); + } else { + return new VstMult64(machInst, rn, vd, rm, eSize, dataSize, + numStructElems, numRegs, wb); + } + } else { // AdvSIMD load/store single structure + uint8_t scale = bits(machInst, 15, 14); + uint8_t numStructElems = (((uint8_t) bits(machInst, 13) << 1) | + (uint8_t) bits(machInst, 21)) + 1; + uint8_t index = 0; + bool wb = !(bits(machInst, 20, 16) == 0x0 && !bits(machInst, 23)); + bool replicate = false; + + switch (scale) { + case 0x0: + index = ((uint8_t) bits(machInst, 30) << 3) | + ((uint8_t) bits(machInst, 12) << 2) | + (uint8_t) bits(machInst, 11, 10); + break; + case 0x1: + index = ((uint8_t) bits(machInst, 30) << 2) | + ((uint8_t) bits(machInst, 12) << 1) | + (uint8_t) bits(machInst, 11); + break; + case 0x2: + if (bits(machInst, 10) == 0x0) { + index = ((uint8_t) bits(machInst, 30) << 1) | + bits(machInst, 12); + } else { + index = (uint8_t) bits(machInst, 30); + scale = 0x3; + } + break; + case 0x3: + scale = bits(machInst, 11, 10); + replicate = true; + break; + default: + return new Unknown64(machInst); + } + + uint8_t eSize = scale; + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + if (load) { + return new VldSingle64(machInst, rn, vd, rm, eSize, dataSize, + numStructElems, index, wb, replicate); + } else { + return new VstSingle64(machInst, rn, vd, rm, eSize, dataSize, + numStructElems, index, wb, replicate); + } + } + } +} +}}; diff --git a/src/arch/arm/isa/formats/uncond.isa b/src/arch/arm/isa/formats/uncond.isa index 4a18a55bb..c376cd9ce 100644 --- a/src/arch/arm/isa/formats/uncond.isa +++ b/src/arch/arm/isa/formats/uncond.isa @@ -99,11 +99,11 @@ def format ArmUnconditional() {{ case 0x1: return new Clrex(machInst); case 0x4: - return new Dsb(machInst); + return new Dsb(machInst, 0); case 0x5: - return new Dmb(machInst); + return new Dmb(machInst, 0); case 0x6: - return new Isb(machInst); + return new Isb(machInst, 0); } } } else if (bits(op2, 0) == 0) { @@ -166,7 +166,7 @@ def format ArmUnconditional() {{ const uint32_t val = ((machInst >> 20) & 0x5); if (val == 0x4) { const uint32_t mode = bits(machInst, 4, 0); - if (badMode((OperatingMode)mode)) + if (badMode32((OperatingMode)mode)) return new Unknown(machInst); switch (bits(machInst, 24, 21)) { case 0x2: @@ -250,17 +250,10 @@ def format ArmUnconditional() {{ "ldc, ldc2 (immediate)", machInst); } } - if (op1 == 0xC5) { - return new WarnUnimplemented( - "mrrc, mrrc2", machInst); - } } else { if (bits(op1, 4, 3) != 0 || bits(op1, 1) == 1) { return new WarnUnimplemented( "stc, stc2", machInst); - } else if (op1 == 0xC4) { - return new WarnUnimplemented( - "mcrr, mcrrc", machInst); } } } diff --git a/src/arch/arm/isa/formats/unimp.isa b/src/arch/arm/isa/formats/unimp.isa index 1c9a4b402..8e346112c 100644 --- a/src/arch/arm/isa/formats/unimp.isa +++ b/src/arch/arm/isa/formats/unimp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -85,6 +85,9 @@ output header {{ private: /// Have we warned on this instruction yet? mutable bool warned; + /// Full mnemonic for MRC and MCR instructions including the + /// coproc. register name + std::string fullMnemonic; public: /// Constructor @@ -96,6 +99,16 @@ output header {{ flags[IsNonSpeculative] = true; } + WarnUnimplemented(const char *_mnemonic, ExtMachInst _machInst, + const std::string& _fullMnemonic) + : ArmStaticInst(_mnemonic, _machInst, No_OpClass), warned(false), + fullMnemonic(_fullMnemonic) + { + // don't call execute() (which panics) if we're on a + // speculative path + flags[IsNonSpeculative] = true; + } + %(BasicExecDeclare)s std::string @@ -147,10 +160,7 @@ output exec {{ FailUnimplemented::execute(%(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(machInst, false, mnemonic); + return new UndefinedInstruction(machInst, false, mnemonic); } Fault @@ -158,7 +168,8 @@ output exec {{ Trace::InstRecord *traceData) const { if (!warned) { - warn("\tinstruction '%s' unimplemented\n", mnemonic); + warn("\tinstruction '%s' unimplemented\n", + fullMnemonic.size() ? fullMnemonic.c_str() : mnemonic); warned = true; } diff --git a/src/arch/arm/isa/includes.isa b/src/arch/arm/isa/includes.isa index 5dd13d623..a2ce84345 100644 --- a/src/arch/arm/isa/includes.isa +++ b/src/arch/arm/isa/includes.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -50,10 +50,16 @@ output header {{ #include #include "arch/arm/insts/branch.hh" +#include "arch/arm/insts/branch64.hh" +#include "arch/arm/insts/data64.hh" +#include "arch/arm/insts/fplib.hh" #include "arch/arm/insts/macromem.hh" #include "arch/arm/insts/mem.hh" +#include "arch/arm/insts/mem64.hh" #include "arch/arm/insts/misc.hh" +#include "arch/arm/insts/misc64.hh" #include "arch/arm/insts/mult.hh" +#include "arch/arm/insts/neon64_mem.hh" #include "arch/arm/insts/pred_inst.hh" #include "arch/arm/insts/static_inst.hh" #include "arch/arm/insts/vfp.hh" @@ -63,6 +69,7 @@ output header {{ }}; output decoder {{ +#include #include "arch/arm/decoder.hh" #include "arch/arm/faults.hh" #include "arch/arm/intregs.hh" diff --git a/src/arch/arm/isa/insts/aarch64.isa b/src/arch/arm/isa/insts/aarch64.isa new file mode 100644 index 000000000..6fcf9b5d2 --- /dev/null +++ b/src/arch/arm/isa/insts/aarch64.isa @@ -0,0 +1,58 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + movzCode = 'Dest64 = ((uint64_t)imm1) << imm2;' + movzIop = InstObjParams("movz", "Movz", "RegImmImmOp", movzCode, []) + header_output += RegImmImmOpDeclare.subst(movzIop) + decoder_output += RegImmImmOpConstructor.subst(movzIop) + exec_output += BasicExecute.subst(movzIop) + + movkCode = 'Dest64 = insertBits(Dest64, imm2 + 15, imm2, imm1);' + movkIop = InstObjParams("movk", "Movk", "RegImmImmOp", movkCode, []) + header_output += RegImmImmOpDeclare.subst(movkIop) + decoder_output += RegImmImmOpConstructor.subst(movkIop) + exec_output += BasicExecute.subst(movkIop) + + movnCode = 'Dest64 = ~(((uint64_t)imm1) << imm2);' + movnIop = InstObjParams("movn", "Movn", "RegImmImmOp", movnCode, []) + header_output += RegImmImmOpDeclare.subst(movnIop) + decoder_output += RegImmImmOpConstructor.subst(movnIop) + exec_output += BasicExecute.subst(movnIop) +}}; diff --git a/src/arch/arm/isa/insts/branch.isa b/src/arch/arm/isa/insts/branch.isa index e360f4581..3ee9d88e4 100644 --- a/src/arch/arm/isa/insts/branch.isa +++ b/src/arch/arm/isa/insts/branch.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -48,7 +48,7 @@ let {{ bCode = ''' NPC = (uint32_t)(PC + imm); ''' - br_tgt_code = '''pcs.instNPC(branchPC.instPC() + imm);''' + br_tgt_code = '''pcs.instNPC((uint32_t)(branchPC.instPC() + imm));''' instFlags = ["IsDirectControl"] if (link): bCode += ''' @@ -86,9 +86,9 @@ let {{ Name += "Imm" # Since we're switching ISAs, the target ISA will be the opposite # of the current ISA. Thumb is whether the target is ARM. - newPC = '(Thumb ? (roundDown(PC, 4) + imm) : (PC + imm))' + newPC = '(uint32_t)(Thumb ? (roundDown(PC, 4) + imm) : (PC + imm))' br_tgt_code = ''' - pcs.instNPC((branchPC.thumb() ? (roundDown(branchPC.instPC(),4) + imm) : + pcs.instNPC((uint32_t)(branchPC.thumb() ? (roundDown(branchPC.instPC(),4) + imm) : (branchPC.instPC() + imm))); ''' base = "BranchImmCond" @@ -150,7 +150,26 @@ let {{ if imm: decoder_output += BranchTarget.subst(blxIop) - #Ignore BXJ for now + bxjcode = ''' + HSTR hstr = Hstr; + CPSR cpsr = Cpsr; + SCR scr = Scr; + + if (ArmSystem::haveVirtualization(xc->tcBase()) && hstr.tjdbx && + !inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) { + fault = new HypervisorTrap(machInst, op1, EC_TRAPPED_BXJ); + } + IWNPC = Op1; + ''' + + bxjIop = InstObjParams("bxj", "BxjReg", "BranchRegCond", + {"code": bxjcode, + "predicate_test": predicateTest, + "is_ras_pop": "op1 == INTREG_LR" }, + ["IsIndirectControl"]) + header_output += BranchRegCondDeclare.subst(bxjIop) + decoder_output += BranchRegCondConstructor.subst(bxjIop) + exec_output += PredOpExecute.subst(bxjIop) #CBNZ, CBZ. These are always unconditional as far as predicates for (mnem, test) in (("cbz", "=="), ("cbnz", "!=")): diff --git a/src/arch/arm/isa/insts/branch64.isa b/src/arch/arm/isa/insts/branch64.isa new file mode 100644 index 000000000..89cee6c22 --- /dev/null +++ b/src/arch/arm/isa/insts/branch64.isa @@ -0,0 +1,248 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black +// Giacomo Gabrielli + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + # B, BL + for (mnem, link) in (("b", False), ("bl", True)): + bCode = ('NPC = purifyTaggedAddr(RawPC + imm, xc->tcBase(), ' + 'currEL(xc->tcBase()));\n') + instFlags = ['IsDirectControl', 'IsUncondControl'] + if (link): + bCode += 'XLR = RawPC + 4;\n' + instFlags += ['IsCall'] + + bIop = InstObjParams(mnem, mnem.capitalize() + "64", + "BranchImm64", bCode, instFlags) + header_output += BranchImm64Declare.subst(bIop) + decoder_output += BranchImm64Constructor.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # BR, BLR + for (mnem, link) in (("br", False), ("blr", True)): + bCode = ('NPC = purifyTaggedAddr(XOp1, xc->tcBase(), ' + 'currEL(xc->tcBase()));\n') + instFlags = ['IsIndirectControl', 'IsUncondControl'] + if (link): + bCode += 'XLR = RawPC + 4;\n' + instFlags += ['IsCall'] + + bIop = InstObjParams(mnem, mnem.capitalize() + "64", + "BranchReg64", bCode, instFlags) + header_output += BranchReg64Declare.subst(bIop) + decoder_output += BranchReg64Constructor.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # B conditional + bCode = ''' + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) + NPC = purifyTaggedAddr(RawPC + imm, xc->tcBase(), + currEL(xc->tcBase())); + else + NPC = NPC; + ''' + bIop = InstObjParams("b", "BCond64", "BranchImmCond64", bCode, + ['IsCondControl', 'IsDirectControl']) + header_output += BranchImmCond64Declare.subst(bIop) + decoder_output += BranchImmCond64Constructor.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # RET + bCode = ('NPC = purifyTaggedAddr(XOp1, xc->tcBase(), ' + 'currEL(xc->tcBase()));\n') + instFlags = ['IsIndirectControl', 'IsUncondControl', 'IsReturn'] + + bIop = InstObjParams('ret', 'Ret64', "BranchRet64", bCode, instFlags) + header_output += BranchReg64Declare.subst(bIop) + decoder_output += BranchReg64Constructor.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # ERET + bCode = '''Addr newPc; + CPSR cpsr = Cpsr; + CPSR spsr = Spsr; + + ExceptionLevel curr_el = opModeToEL((OperatingMode) (uint8_t) cpsr.mode); + switch (curr_el) { + case EL3: + newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL3); + break; + case EL2: + newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL2); + break; + case EL1: + newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL1); + break; + default: + return new UndefinedInstruction(machInst, false, mnemonic); + break; + } + if (spsr.width && (newPc & mask(2))) { + // To avoid PC Alignment fault when returning to AArch32 + if (spsr.t) + newPc = newPc & ~mask(1); + else + newPc = newPc & ~mask(2); + } + spsr.q = 0; + spsr.it1 = 0; + spsr.j = 0; + spsr.res0_23_22 = 0; + spsr.ge = 0; + spsr.it2 = 0; + spsr.t = 0; + + OperatingMode mode = (OperatingMode) (uint8_t) spsr.mode; + bool illegal = false; + ExceptionLevel target_el; + if (badMode(mode)) { + illegal = true; + } else { + target_el = opModeToEL(mode); + if (((target_el == EL2) && + !ArmSystem::haveVirtualization(xc->tcBase())) || + (target_el > curr_el) || + (spsr.width == 1)) { + illegal = true; + } else { + bool known = true; + bool from32 = (spsr.width == 1); + bool to32 = false; + if (false) { // TODO: !haveAArch32EL + to32 = false; + } else if (!ArmSystem::highestELIs64(xc->tcBase())) { + to32 = true; + } else { + bool scr_rw, hcr_rw; + if (ArmSystem::haveSecurity(xc->tcBase())) { + SCR scr = xc->tcBase()->readMiscReg(MISCREG_SCR_EL3); + scr_rw = scr.rw; + } else { + scr_rw = true; + } + + if (ArmSystem::haveVirtualization(xc->tcBase())) { + HCR hcr = xc->tcBase()->readMiscReg(MISCREG_HCR_EL2); + hcr_rw = hcr.rw; + } else { + hcr_rw = scr_rw; + } + + switch (target_el) { + case EL3: + to32 = false; + break; + case EL2: + to32 = !scr_rw; + break; + case EL1: + to32 = !scr_rw || !hcr_rw; + break; + case EL0: + if (curr_el == EL0) { + to32 = cpsr.width; + } else if (!scr_rw || !hcr_rw) { + // EL0 using AArch32 if EL1 using AArch32 + to32 = true; + } else { + known = false; + to32 = false; + } + } + } + if (known) + illegal = (from32 != to32); + } + } + + if (illegal) { + uint8_t old_mode = cpsr.mode; + spsr.mode = old_mode; // Preserve old mode when invalid + spsr.il = 1; + } else { + if (cpsr.width != spsr.width) + panic("AArch32/AArch64 interprocessing not supported yet"); + } + Cpsr = spsr; + + CondCodesNZ = spsr.nz; + CondCodesC = spsr.c; + CondCodesV = spsr.v; + NPC = purifyTaggedAddr(newPc, xc->tcBase(), + opModeToEL((OperatingMode) (uint8_t) spsr.mode)); + LLSCLock = 0; // Clear exclusive monitor + SevMailbox = 1; //Set Event Register + ''' + instFlags = ['IsSerializeAfter', 'IsNonSpeculative', 'IsSquashAfter'] + bIop = InstObjParams('eret', 'Eret64', "BranchEret64", bCode, instFlags) + header_output += BasicDeclare.subst(bIop) + decoder_output += BasicConstructor64.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # CBNZ, CBZ + for (mnem, test) in (("cbz", "=="), ("cbnz", "!=")): + code = ('NPC = (Op164 %(test)s 0) ? ' + 'purifyTaggedAddr(RawPC + imm, xc->tcBase(), ' + 'currEL(xc->tcBase())) : NPC;\n') + code = code % {"test": test} + iop = InstObjParams(mnem, mnem.capitalize() + "64", + "BranchImmReg64", code, + ['IsCondControl', 'IsDirectControl']) + header_output += BranchImmReg64Declare.subst(iop) + decoder_output += BranchImmReg64Constructor.subst(iop) + exec_output += BasicExecute.subst(iop) + + # TBNZ, TBZ + for (mnem, test) in (("tbz", "=="), ("tbnz", "!=")): + code = ('NPC = ((Op164 & imm1) %(test)s 0) ? ' + 'purifyTaggedAddr(RawPC + imm2, xc->tcBase(), ' + 'currEL(xc->tcBase())) : NPC;\n') + code = code % {"test": test} + iop = InstObjParams(mnem, mnem.capitalize() + "64", + "BranchImmImmReg64", code, + ['IsCondControl', 'IsDirectControl']) + header_output += BranchImmImmReg64Declare.subst(iop) + decoder_output += BranchImmImmReg64Constructor.subst(iop) + exec_output += BasicExecute.subst(iop) +}}; diff --git a/src/arch/arm/isa/insts/data.isa b/src/arch/arm/isa/insts/data.isa index be56554b0..881676496 100644 --- a/src/arch/arm/isa/insts/data.isa +++ b/src/arch/arm/isa/insts/data.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -257,7 +257,8 @@ let {{ CPSR old_cpsr = Cpsr; CPSR new_cpsr = - cpsrWriteByInstr(old_cpsr, Spsr, 0xF, true, sctlr.nmfi); + cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF, true, + sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; CondCodesNZ = new_cpsr.nz; CondCodesC = new_cpsr.c; diff --git a/src/arch/arm/isa/insts/data64.isa b/src/arch/arm/isa/insts/data64.isa new file mode 100644 index 000000000..77d7541ca --- /dev/null +++ b/src/arch/arm/isa/insts/data64.isa @@ -0,0 +1,465 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + def createCcCode64(carry, overflow): + code = "" + code += ''' + uint16_t _iz, _in; + _in = bits(resTemp, intWidth - 1); + _iz = ((resTemp & mask(intWidth)) == 0); + CondCodesNZ = (_in << 1) | _iz; + DPRINTF(Arm, "(in, iz) = (%%d, %%d)\\n", _in, _iz); + ''' + if overflow and overflow != "none": + code += ''' + uint16_t _iv; + _iv = %s & 1; + CondCodesV = _iv; + DPRINTF(Arm, "(iv) = (%%d)\\n", _iv); + ''' % overflow + if carry and carry != "none": + code += ''' + uint16_t _ic; + _ic = %s & 1; + CondCodesC = _ic; + DPRINTF(Arm, "(ic) = (%%d)\\n", _ic); + ''' % carry + return code + + oldC = 'CondCodesC' + oldV = 'CondCodesV' + # Dicts of ways to set the carry flag. + carryCode64 = { + "none": "none", + "add": 'findCarry(intWidth, resTemp, Op164, secOp)', + "sub": 'findCarry(intWidth, resTemp, Op164, ~secOp)', + "logic": '0' + } + # Dict of ways to set the overflow flag. + overflowCode64 = { + "none": "none", + "add": 'findOverflow(intWidth, resTemp, Op164, secOp)', + "sub": 'findOverflow(intWidth, resTemp, Op164, ~secOp)', + "logic": '0' + } + + immOp2 = "uint64_t secOp M5_VAR_USED = imm;" + sRegOp2 = "uint64_t secOp M5_VAR_USED = " + \ + "shiftReg64(Op264, shiftAmt, shiftType, intWidth);" + eRegOp2 = "uint64_t secOp M5_VAR_USED = " + \ + "extendReg64(Op264, extendType, shiftAmt, intWidth);" + + def buildDataWork(mnem, code, flagType, suffix, buildCc, buildNonCc, + base, templateBase): + code = ''' + uint64_t resTemp M5_VAR_USED = 0; + ''' + code + ccCode = createCcCode64(carryCode64[flagType], overflowCode64[flagType]) + Name = mnem.capitalize() + suffix + iop = InstObjParams(mnem, Name, base, code) + iopCc = InstObjParams(mnem + "s", Name + "Cc", base, code + ccCode) + + def subst(iop): + global header_output, decoder_output, exec_output + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + if buildNonCc: + subst(iop) + if buildCc: + subst(iopCc) + + def buildXImmDataInst(mnem, code, flagType = "logic", \ + buildCc = True, buildNonCc = True, \ + suffix = "XImm"): + buildDataWork(mnem, immOp2 + code, flagType, suffix, + buildCc, buildNonCc, "DataXImmOp", "DataXImm") + + def buildXSRegDataInst(mnem, code, flagType = "logic", \ + buildCc = True, buildNonCc = True, \ + suffix = "XSReg"): + buildDataWork(mnem, sRegOp2 + code, flagType, suffix, + buildCc, buildNonCc, "DataXSRegOp", "DataXSReg") + + def buildXERegDataInst(mnem, code, flagType = "logic", \ + buildCc = True, buildNonCc = True, \ + suffix = "XEReg"): + buildDataWork(mnem, eRegOp2 + code, flagType, suffix, + buildCc, buildNonCc, "DataXERegOp", "DataXEReg") + + def buildDataInst(mnem, code, flagType = "logic", + buildCc = True, buildNonCc = True): + buildXImmDataInst(mnem, code, flagType, buildCc, buildNonCc) + buildXSRegDataInst(mnem, code, flagType, buildCc, buildNonCc) + buildXERegDataInst(mnem, code, flagType, buildCc, buildNonCc) + + buildXImmDataInst("adr", "Dest64 = RawPC + imm", buildCc = False); + buildXImmDataInst("adrp", "Dest64 = (RawPC & ~mask(12)) + imm", + buildCc = False); + buildDataInst("and", "Dest64 = resTemp = Op164 & secOp;") + buildDataInst("eor", "Dest64 = Op164 ^ secOp;", buildCc = False) + buildXSRegDataInst("eon", "Dest64 = Op164 ^ ~secOp;", buildCc = False) + buildDataInst("sub", "Dest64 = resTemp = Op164 - secOp;", "sub") + buildDataInst("add", "Dest64 = resTemp = Op164 + secOp;", "add") + buildXSRegDataInst("adc", + "Dest64 = resTemp = Op164 + secOp + %s;" % oldC, "add") + buildXSRegDataInst("sbc", + "Dest64 = resTemp = Op164 - secOp - !%s;" % oldC, "sub") + buildDataInst("orr", "Dest64 = Op164 | secOp;", buildCc = False) + buildXSRegDataInst("orn", "Dest64 = Op164 | ~secOp;", buildCc = False) + buildXSRegDataInst("bic", "Dest64 = resTemp = Op164 & ~secOp;") + + def buildDataXImmInst(mnem, code, optArgs = []): + global header_output, decoder_output, exec_output + classNamePrefix = mnem[0].upper() + mnem[1:] + templateBase = "DataXImm" + iop = InstObjParams(mnem, classNamePrefix + "64", + templateBase + "Op", code, optArgs) + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + def buildDataXRegInst(mnem, regOps, code, optArgs = [], + overrideOpClass=None): + global header_output, decoder_output, exec_output + templateBase = "DataX%dReg" % regOps + classNamePrefix = mnem[0].upper() + mnem[1:] + if overrideOpClass: + iop = InstObjParams(mnem, classNamePrefix + "64", + templateBase + "Op", + { 'code': code, 'op_class': overrideOpClass}, + optArgs) + else: + iop = InstObjParams(mnem, classNamePrefix + "64", + templateBase + "Op", code, optArgs) + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + buildDataXRegInst("madd", 3, "Dest64 = Op164 + Op264 * Op364", + overrideOpClass="IntMultOp") + buildDataXRegInst("msub", 3, "Dest64 = Op164 - Op264 * Op364", + overrideOpClass="IntMultOp") + buildDataXRegInst("smaddl", 3, + "XDest = XOp1 + sext<32>(WOp2) * sext<32>(WOp3)", + overrideOpClass="IntMultOp") + buildDataXRegInst("smsubl", 3, + "XDest = XOp1 - sext<32>(WOp2) * sext<32>(WOp3)", + overrideOpClass="IntMultOp") + buildDataXRegInst("smulh", 2, ''' + uint64_t op1H = (int32_t)(XOp1 >> 32); + uint64_t op1L = (uint32_t)XOp1; + uint64_t op2H = (int32_t)(XOp2 >> 32); + uint64_t op2L = (uint32_t)XOp2; + uint64_t mid1 = ((op1L * op2L) >> 32) + op1H * op2L; + uint64_t mid2 = op1L * op2H; + uint64_t result = ((uint64_t)(uint32_t)mid1 + (uint32_t)mid2) >> 32; + result += shiftReg64(mid1, 32, ASR, intWidth); + result += shiftReg64(mid2, 32, ASR, intWidth); + XDest = result + op1H * op2H; + ''', overrideOpClass="IntMultOp") + buildDataXRegInst("umaddl", 3, "XDest = XOp1 + WOp2 * WOp3", + overrideOpClass="IntMultOp") + buildDataXRegInst("umsubl", 3, "XDest = XOp1 - WOp2 * WOp3", + overrideOpClass="IntMultOp") + buildDataXRegInst("umulh", 2, ''' + uint64_t op1H = (uint32_t)(XOp1 >> 32); + uint64_t op1L = (uint32_t)XOp1; + uint64_t op2H = (uint32_t)(XOp2 >> 32); + uint64_t op2L = (uint32_t)XOp2; + uint64_t mid1 = ((op1L * op2L) >> 32) + op1H * op2L; + uint64_t mid2 = op1L * op2H; + uint64_t result = ((uint64_t)(uint32_t)mid1 + (uint32_t)mid2) >> 32; + result += mid1 >> 32; + result += mid2 >> 32; + XDest = result + op1H * op2H; + ''', overrideOpClass="IntMultOp") + + buildDataXRegInst("asrv", 2, + "Dest64 = shiftReg64(Op164, Op264, ASR, intWidth)") + buildDataXRegInst("lslv", 2, + "Dest64 = shiftReg64(Op164, Op264, LSL, intWidth)") + buildDataXRegInst("lsrv", 2, + "Dest64 = shiftReg64(Op164, Op264, LSR, intWidth)") + buildDataXRegInst("rorv", 2, + "Dest64 = shiftReg64(Op164, Op264, ROR, intWidth)") + buildDataXRegInst("sdiv", 2, ''' + int64_t op1 = Op164; + int64_t op2 = Op264; + if (intWidth == 32) { + op1 = sext<32>(op1); + op2 = sext<32>(op2); + } + Dest64 = op2 == -1 ? -op1 : op2 ? op1 / op2 : 0; + ''', overrideOpClass="IntDivOp") + buildDataXRegInst("udiv", 2, "Dest64 = Op264 ? Op164 / Op264 : 0", + overrideOpClass="IntDivOp") + + buildDataXRegInst("cls", 1, ''' + uint64_t op1 = Op164; + if (bits(op1, intWidth - 1)) + op1 ^= mask(intWidth); + Dest64 = (op1 == 0) ? intWidth - 1 : (intWidth - 2 - findMsbSet(op1)); + ''') + buildDataXRegInst("clz", 1, ''' + Dest64 = (Op164 == 0) ? intWidth : (intWidth - 1 - findMsbSet(Op164)); + ''') + buildDataXRegInst("rbit", 1, ''' + uint64_t result = Op164; + uint64_t lBit = 1ULL << (intWidth - 1); + uint64_t rBit = 1ULL; + while (lBit > rBit) { + uint64_t maskBits = lBit | rBit; + uint64_t testBits = result & maskBits; + // If these bits are different, swap them by toggling them. + if (testBits && testBits != maskBits) + result ^= maskBits; + lBit >>= 1; rBit <<= 1; + } + Dest64 = result; + ''') + buildDataXRegInst("rev", 1, ''' + if (intWidth == 32) + Dest64 = betole(Op164); + else + Dest64 = betole(Op164); + ''') + buildDataXRegInst("rev16", 1, ''' + int count = intWidth / 16; + uint64_t result = 0; + for (unsigned i = 0; i < count; i++) { + uint16_t hw = Op164 >> (i * 16); + result |= (uint64_t)betole(hw) << (i * 16); + } + Dest64 = result; + ''') + buildDataXRegInst("rev32", 1, ''' + int count = intWidth / 32; + uint64_t result = 0; + for (unsigned i = 0; i < count; i++) { + uint32_t hw = Op164 >> (i * 32); + result |= (uint64_t)betole(hw) << (i * 32); + } + Dest64 = result; + ''') + + msrMrs64EnabledCheckCode = ''' + // Check for read/write access right + if (!can%sAArch64SysReg(flat_idx, Scr64, cpsr, xc->tcBase())) { + if (flat_idx == MISCREG_DAIF || + flat_idx == MISCREG_DC_ZVA_Xt || + flat_idx == MISCREG_DC_CVAC_Xt || + flat_idx == MISCREG_DC_CIVAC_Xt + ) + return new UndefinedInstruction(machInst, 0, EC_TRAPPED_MSR_MRS_64); + return new UndefinedInstruction(machInst, false, mnemonic); + } + + // Check for traps to supervisor (FP/SIMD regs) + if (el <= EL1 && msrMrs64TrapToSup(flat_idx, el, Cpacr64)) + return new SupervisorTrap(machInst, 0x1E00000, EC_TRAPPED_SIMD_FP); + + bool is_vfp_neon = false; + + // Check for traps to hypervisor + if ((ArmSystem::haveVirtualization(xc->tcBase()) && el <= EL2) && + msrMrs64TrapToHyp(flat_idx, %s, CptrEl264, Hcr64, &is_vfp_neon)) { + return new HypervisorTrap(machInst, is_vfp_neon ? 0x1E00000 : imm, + is_vfp_neon ? EC_TRAPPED_SIMD_FP : EC_TRAPPED_MSR_MRS_64); + } + + // Check for traps to secure monitor + if ((ArmSystem::haveSecurity(xc->tcBase()) && el <= EL3) && + msrMrs64TrapToMon(flat_idx, CptrEl364, el, &is_vfp_neon)) { + return new SecureMonitorTrap(machInst, + is_vfp_neon ? 0x1E00000 : imm, + is_vfp_neon ? EC_TRAPPED_SIMD_FP : EC_TRAPPED_MSR_MRS_64); + } + ''' + + buildDataXImmInst("mrs", ''' + MiscRegIndex flat_idx = (MiscRegIndex) xc->tcBase()-> + flattenMiscIndex(op1); + CPSR cpsr = Cpsr; + ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + %s + XDest = MiscOp1_ud; + ''' % (msrMrs64EnabledCheckCode % ('Read', 'true'),), + ["IsSerializeBefore"]) + + buildDataXRegInst("mrsNZCV", 1, ''' + CPSR cpsr = 0; + cpsr.nz = CondCodesNZ; + cpsr.c = CondCodesC; + cpsr.v = CondCodesV; + XDest = cpsr; + ''') + + buildDataXImmInst("msr", ''' + MiscRegIndex flat_idx = (MiscRegIndex) xc->tcBase()-> + flattenMiscIndex(dest); + CPSR cpsr = Cpsr; + ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + %s + MiscDest_ud = XOp1; + ''' % (msrMrs64EnabledCheckCode % ('Write', 'false'),), + ["IsSerializeAfter", "IsNonSpeculative"]) + + buildDataXRegInst("msrNZCV", 1, ''' + CPSR cpsr = XOp1; + CondCodesNZ = cpsr.nz; + CondCodesC = cpsr.c; + CondCodesV = cpsr.v; + ''') + + msrdczva_ea_code = ''' + MiscRegIndex flat_idx = (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest); + CPSR cpsr = Cpsr; + ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + ''' + + msrdczva_ea_code += msrMrs64EnabledCheckCode % ('Write', 'false') + msrdczva_ea_code += ''' + Request::Flags memAccessFlags = Request::CACHE_BLOCK_ZERO|ArmISA::TLB::MustBeOne; + EA = XBase; + assert(!(Dczid & 0x10)); + uint64_t op_size = power(2, Dczid + 2); + EA &= ~(op_size - 1); + + ''' + + msrDCZVAIop = InstObjParams("dczva", "Dczva", "SysDC64", + { "ea_code" : msrdczva_ea_code, + "memacc_code" : ";", "use_uops" : 0, + "op_wb" : ";", "fa_code" : ";"}, ['IsStore', 'IsMemRef']); + header_output += DCStore64Declare.subst(msrDCZVAIop); + decoder_output += DCStore64Constructor.subst(msrDCZVAIop); + exec_output += DCStore64Execute.subst(msrDCZVAIop); + exec_output += DCStore64InitiateAcc.subst(msrDCZVAIop); + exec_output += Store64CompleteAcc.subst(msrDCZVAIop); + + + + buildDataXImmInst("msrSP", ''' + if (!canWriteAArch64SysReg( + (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest), + Scr64, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + MiscDest_ud = imm; + ''', optArgs = ["IsSerializeAfter", "IsNonSpeculative"]) + + buildDataXImmInst("msrDAIFSet", ''' + if (!canWriteAArch64SysReg( + (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest), + Scr64, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, 0, EC_TRAPPED_MSR_MRS_64); + } + CPSR cpsr = Cpsr; + cpsr.daif = cpsr.daif | imm; + Cpsr = cpsr; + ''', optArgs = ["IsSerializeAfter", "IsNonSpeculative"]) + + buildDataXImmInst("msrDAIFClr", ''' + if (!canWriteAArch64SysReg( + (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest), + Scr64, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, 0, EC_TRAPPED_MSR_MRS_64); + } + CPSR cpsr = Cpsr; + cpsr.daif = cpsr.daif & ~imm; + Cpsr = cpsr; + ''', optArgs = ["IsSerializeAfter", "IsNonSpeculative"]) + + def buildDataXCompInst(mnem, instType, suffix, code): + global header_output, decoder_output, exec_output + templateBase = "DataXCond%s" % instType + iop = InstObjParams(mnem, mnem.capitalize() + suffix + "64", + templateBase + "Op", code) + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + def buildDataXCondImmInst(mnem, code): + buildDataXCompInst(mnem, "CompImm", "Imm", code) + def buildDataXCondRegInst(mnem, code): + buildDataXCompInst(mnem, "CompReg", "Reg", code) + def buildDataXCondSelInst(mnem, code): + buildDataXCompInst(mnem, "Sel", "", code) + + def condCompCode(flagType, op, imm): + ccCode = createCcCode64(carryCode64[flagType], overflowCode64[flagType]) + opDecl = "uint64_t secOp M5_VAR_USED = imm;" + if not imm: + opDecl = "uint64_t secOp M5_VAR_USED = Op264;" + return opDecl + ''' + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) { + uint64_t resTemp = Op164 ''' + op + ''' secOp; + ''' + ccCode + ''' + } else { + CondCodesNZ = (defCc >> 2) & 0x3; + CondCodesC = (defCc >> 1) & 0x1; + CondCodesV = defCc & 0x1; + } + ''' + + buildDataXCondImmInst("ccmn", condCompCode("add", "+", True)) + buildDataXCondImmInst("ccmp", condCompCode("sub", "-", True)) + buildDataXCondRegInst("ccmn", condCompCode("add", "+", False)) + buildDataXCondRegInst("ccmp", condCompCode("sub", "-", False)) + + condSelCode = ''' + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) { + Dest64 = Op164; + } else { + Dest64 = %(altVal)s; + } + ''' + buildDataXCondSelInst("csel", condSelCode % {"altVal" : "Op264"}) + buildDataXCondSelInst("csinc", condSelCode % {"altVal" : "Op264 + 1"}) + buildDataXCondSelInst("csinv", condSelCode % {"altVal" : "~Op264"}) + buildDataXCondSelInst("csneg", condSelCode % {"altVal" : "-Op264"}) +}}; diff --git a/src/arch/arm/isa/insts/div.isa b/src/arch/arm/isa/insts/div.isa index 1ff6ef9e4..0896ea94f 100644 --- a/src/arch/arm/isa/insts/div.isa +++ b/src/arch/arm/isa/insts/div.isa @@ -40,12 +40,6 @@ let {{ sdivCode = ''' if (Op2_sw == 0) { - if (((SCTLR)Sctlr).dz) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); - } Dest_sw = 0; } else if (Op1_sw == INT_MIN && Op2_sw == -1) { Dest_sw = INT_MIN; @@ -63,12 +57,6 @@ let {{ udivCode = ''' if (Op2_uw == 0) { - if (((SCTLR)Sctlr).dz) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); - } Dest_uw = 0; } else { Dest_uw = Op1_uw / Op2_uw; diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa index b701995f4..60f030c3d 100644 --- a/src/arch/arm/isa/insts/fp.isa +++ b/src/arch/arm/isa/insts/fp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -191,14 +191,17 @@ let {{ decoder_output = "" exec_output = "" - vmsrIop = InstObjParams("vmsr", "Vmsr", "FpRegRegOp", - { "code": vmsrEnabledCheckCode + \ - "MiscDest = Op1;", + vmsrCode = vmsrEnabledCheckCode + ''' + MiscDest = Op1; + ''' + + vmsrIop = InstObjParams("vmsr", "Vmsr", "FpRegRegImmOp", + { "code": vmsrCode, "predicate_test": predicateTest, "op_class": "SimdFloatMiscOp" }, ["IsSerializeAfter","IsNonSpeculative"]) - header_output += FpRegRegOpDeclare.subst(vmsrIop); - decoder_output += FpRegRegOpConstructor.subst(vmsrIop); + header_output += FpRegRegImmOpDeclare.subst(vmsrIop); + decoder_output += FpRegRegImmOpConstructor.subst(vmsrIop); exec_output += PredOpExecute.subst(vmsrIop); vmsrFpscrCode = vmsrEnabledCheckCode + ''' @@ -215,14 +218,36 @@ let {{ decoder_output += FpRegRegOpConstructor.subst(vmsrFpscrIop); exec_output += PredOpExecute.subst(vmsrFpscrIop); - vmrsIop = InstObjParams("vmrs", "Vmrs", "FpRegRegOp", - { "code": vmrsEnabledCheckCode + \ - "Dest = MiscOp1;", + vmrsCode = vmrsEnabledCheckCode + ''' + CPSR cpsr = Cpsr; + SCR scr = Scr; + if (!inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) { + HCR hcr = Hcr; + bool hypTrap = false; + switch(xc->tcBase()->flattenMiscIndex(op1)) { + case MISCREG_FPSID: + hypTrap = hcr.tid0; + break; + case MISCREG_MVFR0: + case MISCREG_MVFR1: + hypTrap = hcr.tid3; + break; + } + if (hypTrap) { + return new HypervisorTrap(machInst, imm, + EC_TRAPPED_CP10_MRC_VMRS); + } + } + Dest = MiscOp1; + ''' + + vmrsIop = InstObjParams("vmrs", "Vmrs", "FpRegRegImmOp", + { "code": vmrsCode, "predicate_test": predicateTest, "op_class": "SimdFloatMiscOp" }, ["IsSerializeBefore"]) - header_output += FpRegRegOpDeclare.subst(vmrsIop); - decoder_output += FpRegRegOpConstructor.subst(vmrsIop); + header_output += FpRegRegImmOpDeclare.subst(vmrsIop); + decoder_output += FpRegRegImmOpConstructor.subst(vmrsIop); exec_output += PredOpExecute.subst(vmrsIop); vmrsFpscrIop = InstObjParams("vmrs", "VmrsFpscr", "FpRegRegOp", @@ -323,7 +348,7 @@ let {{ decoder_output += FpRegRegOpConstructor.subst(vmovRegQIop); exec_output += PredOpExecute.subst(vmovRegQIop); - vmovCoreRegBCode = vfpEnabledCheckCode + ''' + vmovCoreRegBCode = simdEnabledCheckCode + ''' FpDest_uw = insertBits(FpDest_uw, imm * 8 + 7, imm * 8, Op1_ub); ''' vmovCoreRegBIop = InstObjParams("vmov", "VmovCoreRegB", "FpRegRegImmOp", @@ -334,7 +359,7 @@ let {{ decoder_output += FpRegRegImmOpConstructor.subst(vmovCoreRegBIop); exec_output += PredOpExecute.subst(vmovCoreRegBIop); - vmovCoreRegHCode = vfpEnabledCheckCode + ''' + vmovCoreRegHCode = simdEnabledCheckCode + ''' FpDest_uw = insertBits(FpDest_uw, imm * 16 + 15, imm * 16, Op1_uh); ''' vmovCoreRegHIop = InstObjParams("vmov", "VmovCoreRegH", "FpRegRegImmOp", @@ -453,6 +478,17 @@ let {{ singleCode = singleSimpleCode + ''' FpscrExc = fpscr; ''' + singleTernOp = vfpEnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + VfpSavedState state = prepFpState(fpscr.rMode); + float cOp1 = FpOp1; + float cOp2 = FpOp2; + float cOp3 = FpDestP0; + FpDestP0 = ternaryOp(fpscr, %(palam)s, %(op)s, + fpscr.fz, fpscr.dn, fpscr.rMode); + finishVfp(fpscr, state, fpscr.fz); + FpscrExc = fpscr; + ''' singleBinOp = "binaryOp(fpscr, FpOp1, FpOp2," + \ "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)" singleUnaryOp = "unaryOp(fpscr, FpOp1, %(func)s, fpscr.fz, fpscr.rMode)" @@ -463,6 +499,19 @@ let {{ FpDestP1_uw = dblHi(dest); FpscrExc = fpscr; ''' + doubleTernOp = vfpEnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + VfpSavedState state = prepFpState(fpscr.rMode); + double cOp1 = dbl(FpOp1P0_uw, FpOp1P1_uw); + double cOp2 = dbl(FpOp2P0_uw, FpOp2P1_uw); + double cOp3 = dbl(FpDestP0_uw, FpDestP1_uw); + double cDest = ternaryOp(fpscr, %(palam)s, %(op)s, + fpscr.fz, fpscr.dn, fpscr.rMode); + FpDestP0_uw = dblLow(cDest); + FpDestP1_uw = dblHi(cDest); + finishVfp(fpscr, state, fpscr.fz); + FpscrExc = fpscr; + ''' doubleBinOp = ''' binaryOp(fpscr, dbl(FpOp1P0_uw, FpOp1P1_uw), dbl(FpOp2P0_uw, FpOp2P1_uw), @@ -473,6 +522,37 @@ let {{ fpscr.fz, fpscr.rMode) ''' + def buildTernaryFpOp(Name, base, opClass, singleOp, doubleOp, paramStr): + global header_output, decoder_output, exec_output + + code = singleTernOp % { "op": singleOp, "palam": paramStr } + sIop = InstObjParams(Name.lower() + "s", Name + "S", base, + { "code": code, + "predicate_test": predicateTest, + "op_class": opClass }, []) + code = doubleTernOp % { "op": doubleOp, "palam": paramStr } + dIop = InstObjParams(Name.lower() + "d", Name + "D", base, + { "code": code, + "predicate_test": predicateTest, + "op_class": opClass }, []) + + declareTempl = eval(base + "Declare"); + constructorTempl = eval(base + "Constructor"); + + for iop in sIop, dIop: + header_output += declareTempl.subst(iop) + decoder_output += constructorTempl.subst(iop) + exec_output += PredOpExecute.subst(iop) + + buildTernaryFpOp("Vfma", "FpRegRegRegOp", "SimdFloatMultAccOp", + "fpMulAdd", "fpMulAdd", " cOp1, cOp2, cOp3" ) + buildTernaryFpOp("Vfms", "FpRegRegRegOp", "SimdFloatMultAccOp", + "fpMulAdd", "fpMulAdd", "-cOp1, cOp2, cOp3" ) + buildTernaryFpOp("Vfnma", "FpRegRegRegOp", "SimdFloatMultAccOp", + "fpMulAdd", "fpMulAdd", "-cOp1, cOp2, -cOp3" ) + buildTernaryFpOp("Vfnms", "FpRegRegRegOp", "SimdFloatMultAccOp", + "fpMulAdd", "fpMulAdd", " cOp1, cOp2, -cOp3" ) + def buildBinFpOp(name, Name, base, opClass, singleOp, doubleOp): global header_output, decoder_output, exec_output @@ -830,7 +910,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); vfpFlushToZero(fpscr, FpOp1); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_uw = vfpFpSToFixed(FpOp1, false, false, 0, false); + FpDest_uw = vfpFpToFixed(FpOp1, false, 32, 0, false); __asm__ __volatile__("" :: "m" (FpDest_uw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -849,7 +929,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t result = vfpFpDToFixed(cOp1, false, false, 0, false); + uint64_t result = vfpFpToFixed(cOp1, false, 32, 0, false); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -868,7 +948,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); vfpFlushToZero(fpscr, FpOp1); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_sw = vfpFpSToFixed(FpOp1, true, false, 0, false); + FpDest_sw = vfpFpToFixed(FpOp1, true, 32, 0, false); __asm__ __volatile__("" :: "m" (FpDest_sw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -887,7 +967,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - int64_t result = vfpFpDToFixed(cOp1, true, false, 0, false); + int64_t result = vfpFpToFixed(cOp1, true, 32, 0, false); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -907,7 +987,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_uw = vfpFpSToFixed(FpOp1, false, false, 0); + FpDest_uw = vfpFpToFixed(FpOp1, false, 32, 0); __asm__ __volatile__("" :: "m" (FpDest_uw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -927,7 +1007,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t result = vfpFpDToFixed(cOp1, false, false, 0); + uint64_t result = vfpFpToFixed(cOp1, false, 32, 0); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -947,7 +1027,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_sw = vfpFpSToFixed(FpOp1, true, false, 0); + FpDest_sw = vfpFpToFixed(FpOp1, true, 32, 0); __asm__ __volatile__("" :: "m" (FpDest_sw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -967,7 +1047,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - int64_t result = vfpFpDToFixed(cOp1, true, false, 0); + int64_t result = vfpFpToFixed(cOp1, true, 32, 0); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -1333,7 +1413,7 @@ let {{ vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_sw = vfpFpSToFixed(FpOp1, true, false, imm); + FpDest_sw = vfpFpToFixed(FpOp1, true, 32, imm); __asm__ __volatile__("" :: "m" (FpDest_sw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1352,7 +1432,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t mid = vfpFpDToFixed(cOp1, true, false, imm); + uint64_t mid = vfpFpToFixed(cOp1, true, 32, imm); __asm__ __volatile__("" :: "m" (mid)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = mid; @@ -1372,7 +1452,7 @@ let {{ vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_uw = vfpFpSToFixed(FpOp1, false, false, imm); + FpDest_uw = vfpFpToFixed(FpOp1, false, 32, imm); __asm__ __volatile__("" :: "m" (FpDest_uw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1391,7 +1471,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t mid = vfpFpDToFixed(cOp1, false, false, imm); + uint64_t mid = vfpFpToFixed(cOp1, false, 32, imm); __asm__ __volatile__("" :: "m" (mid)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = mid; @@ -1410,7 +1490,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1_sw) : "m" (FpOp1_sw)); - FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sw, false, imm); + FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sw, 32, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1428,7 +1508,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm); + double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, 32, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = dblLow(cDest); @@ -1447,7 +1527,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1_uw) : "m" (FpOp1_uw)); - FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uw, false, imm); + FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uw, 32, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1465,7 +1545,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm); + double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, 32, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = dblLow(cDest); @@ -1485,7 +1565,7 @@ let {{ vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_sh = vfpFpSToFixed(FpOp1, true, true, imm); + FpDest_sh = vfpFpToFixed(FpOp1, true, 16, imm); __asm__ __volatile__("" :: "m" (FpDest_sh)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1505,7 +1585,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t result = vfpFpDToFixed(cOp1, true, true, imm); + uint64_t result = vfpFpToFixed(cOp1, true, 16, imm); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -1526,7 +1606,7 @@ let {{ vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_uh = vfpFpSToFixed(FpOp1, false, true, imm); + FpDest_uh = vfpFpToFixed(FpOp1, false, 16, imm); __asm__ __volatile__("" :: "m" (FpDest_uh)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1546,7 +1626,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t mid = vfpFpDToFixed(cOp1, false, true, imm); + uint64_t mid = vfpFpToFixed(cOp1, false, 16, imm); __asm__ __volatile__("" :: "m" (mid)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = mid; @@ -1566,7 +1646,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1_sh) : "m" (FpOp1_sh)); - FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sh, true, imm); + FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sh, 16, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1585,7 +1665,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm); + double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, 16, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = dblLow(cDest); @@ -1605,7 +1685,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1_uh) : "m" (FpOp1_uh)); - FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uh, true, imm); + FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uh, 16, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1624,7 +1704,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm); + double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, 16, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = dblLow(cDest); diff --git a/src/arch/arm/isa/insts/fp64.isa b/src/arch/arm/isa/insts/fp64.isa new file mode 100644 index 000000000..95dec5062 --- /dev/null +++ b/src/arch/arm/isa/insts/fp64.isa @@ -0,0 +1,811 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2012-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Thomas Grocutt +// Edmund Grimley Evans + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + fmovImmSCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = bits(imm, 31, 0); + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovImmSIop = InstObjParams("fmov", "FmovImmS", "FpRegImmOp", + { "code": fmovImmSCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegImmOpDeclare.subst(fmovImmSIop); + decoder_output += FpRegImmOpConstructor.subst(fmovImmSIop); + exec_output += BasicExecute.subst(fmovImmSIop); + + fmovImmDCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = bits(imm, 31, 0); + AA64FpDestP1_uw = bits(imm, 63, 32); + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovImmDIop = InstObjParams("fmov", "FmovImmD", "FpRegImmOp", + { "code": fmovImmDCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegImmOpDeclare.subst(fmovImmDIop); + decoder_output += AA64FpRegImmOpConstructor.subst(fmovImmDIop); + exec_output += BasicExecute.subst(fmovImmDIop); + + fmovRegSCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = AA64FpOp1P0_uw; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovRegSIop = InstObjParams("fmov", "FmovRegS", "FpRegRegOp", + { "code": fmovRegSCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovRegSIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegSIop); + exec_output += BasicExecute.subst(fmovRegSIop); + + fmovRegDCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = AA64FpOp1P0_uw; + AA64FpDestP1_uw = AA64FpOp1P1_uw; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovRegDIop = InstObjParams("fmov", "FmovRegD", "FpRegRegOp", + { "code": fmovRegDCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovRegDIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegDIop); + exec_output += BasicExecute.subst(fmovRegDIop); + + fmovCoreRegWCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = WOp1_uw; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovCoreRegWIop = InstObjParams("fmov", "FmovCoreRegW", "FpRegRegOp", + { "code": fmovCoreRegWCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovCoreRegWIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegWIop); + exec_output += BasicExecute.subst(fmovCoreRegWIop); + + fmovCoreRegXCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = XOp1_ud; + AA64FpDestP1_uw = XOp1_ud >> 32; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovCoreRegXIop = InstObjParams("fmov", "FmovCoreRegX", "FpRegRegOp", + { "code": fmovCoreRegXCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovCoreRegXIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegXIop); + exec_output += BasicExecute.subst(fmovCoreRegXIop); + + fmovUCoreRegXCode = vfp64EnabledCheckCode + ''' + AA64FpDestP2_uw = XOp1_ud; + AA64FpDestP3_uw = XOp1_ud >> 32; + ''' + fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp", + { "code": fmovUCoreRegXCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovUCoreRegXIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovUCoreRegXIop); + exec_output += BasicExecute.subst(fmovUCoreRegXIop); + + fmovRegCoreWCode = vfp64EnabledCheckCode + ''' + WDest = AA64FpOp1P0_uw; + ''' + fmovRegCoreWIop = InstObjParams("fmov", "FmovRegCoreW", "FpRegRegOp", + { "code": fmovRegCoreWCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovRegCoreWIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreWIop); + exec_output += BasicExecute.subst(fmovRegCoreWIop); + + fmovRegCoreXCode = vfp64EnabledCheckCode + ''' + XDest = ( ((uint64_t) AA64FpOp1P1_uw) << 32) | AA64FpOp1P0_uw; + ''' + fmovRegCoreXIop = InstObjParams("fmov", "FmovRegCoreX", "FpRegRegOp", + { "code": fmovRegCoreXCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovRegCoreXIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreXIop); + exec_output += BasicExecute.subst(fmovRegCoreXIop); + + fmovURegCoreXCode = vfp64EnabledCheckCode + ''' + XDest = ( ((uint64_t) AA64FpOp1P3_uw) << 32) | AA64FpOp1P2_uw; + ''' + fmovURegCoreXIop = InstObjParams("fmov", "FmovURegCoreX", "FpRegRegOp", + { "code": fmovURegCoreXCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovURegCoreXIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovURegCoreXIop); + exec_output += BasicExecute.subst(fmovURegCoreXIop); +}}; + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + singleIntConvCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint32_t cOp1 = AA64FpOp1P0_uw; + uint32_t cDest = %(op)s; + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + singleIntConvCode2 = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint32_t cOp1 = AA64FpOp1P0_uw; + uint32_t cOp2 = AA64FpOp2P0_uw; + uint32_t cDest = %(op)s; + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + singleBinOp = "binaryOp(fpscr, AA64FpOp1P0, AA64FpOp2P0," + \ + "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)" + singleUnaryOp = "unaryOp(fpscr, AA64FpOp1P0, %(func)s, fpscr.fz, fpscr.rMode)" + + doubleIntConvCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint64_t cOp1 = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw; + uint64_t cDest = %(op)s; + AA64FpDestP0_uw = cDest & 0xFFFFFFFF; + AA64FpDestP1_uw = cDest >> 32; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + doubleIntConvCode2 = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint64_t cOp1 = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw; + uint64_t cOp2 = ((uint64_t) AA64FpOp2P1_uw) << 32 | AA64FpOp2P0_uw; + uint64_t cDest = %(op)s; + AA64FpDestP0_uw = cDest & 0xFFFFFFFF; + AA64FpDestP1_uw = cDest >> 32; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + doubleBinOp = ''' + binaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw), + dbl(AA64FpOp2P0_uw, AA64FpOp2P1_uw), + %(func)s, fpscr.fz, fpscr.dn, fpscr.rMode); + ''' + doubleUnaryOp = ''' + unaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw), %(func)s, + fpscr.fz, fpscr.rMode) + ''' + + def buildTernaryFpOp(name, opClass, sOp, dOp): + global header_output, decoder_output, exec_output + for isDouble in True, False: + code = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + ''' + if isDouble: + code += ''' + uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; + uint64_t cOp2 = AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32; + uint64_t cOp3 = AA64FpOp3P0_uw | (uint64_t)AA64FpOp3P1_uw << 32; + uint64_t cDest; + ''' "cDest = " + dOp + ";" + ''' + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = cDest >> 32; + ''' + else: + code += ''' + uint32_t cOp1 = AA64FpOp1P0_uw; + uint32_t cOp2 = AA64FpOp2P0_uw; + uint32_t cOp3 = AA64FpOp3P0_uw; + uint32_t cDest; + ''' "cDest = " + sOp + ";" + ''' + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + ''' + code += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + iop = InstObjParams(name.lower(), name + ("D" if isDouble else "S"), + "FpRegRegRegRegOp", + { "code": code, "op_class": opClass }, []) + + header_output += AA64FpRegRegRegRegOpDeclare.subst(iop) + decoder_output += AA64FpRegRegRegRegOpConstructor.subst(iop) + exec_output += BasicExecute.subst(iop) + + buildTernaryFpOp("FMAdd", "SimdFloatMultAccOp", + "fplibMulAdd(cOp3, cOp1, cOp2, fpscr)", + "fplibMulAdd(cOp3, cOp1, cOp2, fpscr)" ) + buildTernaryFpOp("FMSub", "SimdFloatMultAccOp", + "fplibMulAdd(cOp3, fplibNeg(cOp1), cOp2, fpscr)", + "fplibMulAdd(cOp3, fplibNeg(cOp1), cOp2, fpscr)" ) + buildTernaryFpOp("FNMAdd", "SimdFloatMultAccOp", + "fplibMulAdd(fplibNeg(cOp3), fplibNeg(cOp1), cOp2, fpscr)", + "fplibMulAdd(fplibNeg(cOp3), fplibNeg(cOp1), cOp2, fpscr)" ) + buildTernaryFpOp("FNMSub", "SimdFloatMultAccOp", + "fplibMulAdd(fplibNeg(cOp3), cOp1, cOp2, fpscr)", + "fplibMulAdd(fplibNeg(cOp3), cOp1, cOp2, fpscr)" ) + + def buildBinFpOp(name, Name, base, opClass, singleOp, doubleOp): + global header_output, decoder_output, exec_output + + code = singleIntConvCode2 % { "op": singleOp } + sIop = InstObjParams(name, Name + "S", base, + { "code": code, + "op_class": opClass }, []) + + code = doubleIntConvCode2 % { "op": doubleOp } + dIop = InstObjParams(name, Name + "D", base, + { "code": code, + "op_class": opClass }, []) + + declareTempl = eval( base + "Declare"); + constructorTempl = eval("AA64" + base + "Constructor"); + + for iop in sIop, dIop: + header_output += declareTempl.subst(iop) + decoder_output += constructorTempl.subst(iop) + exec_output += BasicExecute.subst(iop) + + buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "SimdFloatAddOp", + "fplibAdd(cOp1, cOp2, fpscr)", + "fplibAdd(cOp1, cOp2, fpscr)") + buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "SimdFloatAddOp", + "fplibSub(cOp1, cOp2, fpscr)", + "fplibSub(cOp1, cOp2, fpscr)") + buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "SimdFloatDivOp", + "fplibDiv(cOp1, cOp2, fpscr)", + "fplibDiv(cOp1, cOp2, fpscr)") + buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "SimdFloatMultOp", + "fplibMul(cOp1, cOp2, fpscr)", + "fplibMul(cOp1, cOp2, fpscr)") + buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "SimdFloatMultOp", + "fplibNeg(fplibMul(cOp1, cOp2, fpscr))", + "fplibNeg(fplibMul(cOp1, cOp2, fpscr))") + buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "SimdFloatCmpOp", + "fplibMin(cOp1, cOp2, fpscr)", + "fplibMin(cOp1, cOp2, fpscr)") + buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "SimdFloatCmpOp", + "fplibMax(cOp1, cOp2, fpscr)", + "fplibMax(cOp1, cOp2, fpscr)") + buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "SimdFloatCmpOp", + "fplibMinNum(cOp1, cOp2, fpscr)", + "fplibMinNum(cOp1, cOp2, fpscr)") + buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "SimdFloatCmpOp", + "fplibMaxNum(cOp1, cOp2, fpscr)", + "fplibMaxNum(cOp1, cOp2, fpscr)") + + def buildUnaryFpOp(name, Name, base, opClass, singleOp, doubleOp = None): + if doubleOp is None: + doubleOp = singleOp + global header_output, decoder_output, exec_output + + code = singleIntConvCode % { "op": singleOp } + sIop = InstObjParams(name, Name + "S", base, + { "code": code, + "op_class": opClass }, []) + code = doubleIntConvCode % { "op": doubleOp } + dIop = InstObjParams(name, Name + "D", base, + { "code": code, + "op_class": opClass }, []) + + declareTempl = eval( base + "Declare"); + constructorTempl = eval("AA64" + base + "Constructor"); + + for iop in sIop, dIop: + header_output += declareTempl.subst(iop) + decoder_output += constructorTempl.subst(iop) + exec_output += BasicExecute.subst(iop) + + buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "SimdFloatSqrtOp", + "fplibSqrt(cOp1, fpscr)", "fplibSqrt(cOp1, fpscr)") + + def buildSimpleUnaryFpOp(name, Name, base, opClass, singleOp, + doubleOp = None, isIntConv = True): + if doubleOp is None: + doubleOp = singleOp + global header_output, decoder_output, exec_output + + if isIntConv: + sCode = singleIntConvCode + dCode = doubleIntConvCode + else: + sCode = singleCode + dCode = doubleCode + + for code, op, suffix in [[sCode, singleOp, "S"], + [dCode, doubleOp, "D"]]: + iop = InstObjParams(name, Name + suffix, base, + { "code": code % { "op": op }, + "op_class": opClass }, []) + + declareTempl = eval( base + "Declare"); + constructorTempl = eval("AA64" + base + "Constructor"); + + header_output += declareTempl.subst(iop) + decoder_output += constructorTempl.subst(iop) + exec_output += BasicExecute.subst(iop) + + buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "SimdFloatMiscOp", + "fplibNeg(cOp1)", "fplibNeg(cOp1)") + buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "SimdFloatMiscOp", + "fplibAbs(cOp1)", "fplibAbs(cOp1)") + buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt(cOp1, FPRounding_TIEEVEN, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_TIEEVEN, false, fpscr)") + buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt(cOp1, FPRounding_POSINF, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_POSINF, false, fpscr)") + buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt(cOp1, FPRounding_NEGINF, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_NEGINF, false, fpscr)") + buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt(cOp1, FPRounding_ZERO, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_ZERO, false, fpscr)") + buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt(cOp1, FPRounding_TIEAWAY, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_TIEAWAY, false, fpscr)") + buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt(cOp1, FPCRRounding(fpscr), false, fpscr)", + "fplibRoundInt(cOp1, FPCRRounding(fpscr), false, fpscr)") + buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt(cOp1, FPCRRounding(fpscr), true, fpscr)", + "fplibRoundInt(cOp1, FPCRRounding(fpscr), true, fpscr)") +}}; + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + # Creates the integer to floating point instructions, including variants for + # signed/unsigned, float/double, etc + for regL, regOpL, width in [["W", "w", 32], + ["X", "d", 64]]: + for isDouble in True, False: + for us, usCode in [["U", "uint%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)], + ["S", "int%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)]]: + fcvtIntFpDCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s + ''' %(usCode) + + if isDouble: + fcvtIntFpDCode += ''' + uint64_t cDest = fplibFixedToFP(cSrc, 0, + %s, FPCRRounding(fpscr), fpscr); + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = cDest >> 32; + ''' % ("true" if us == "U" else "false") + else: + fcvtIntFpDCode += ''' + uint32_t cDest = fplibFixedToFP(cSrc, 0, + %s, FPCRRounding(fpscr), fpscr); + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + ''' % ("true" if us == "U" else "false") + fcvtIntFpDCode += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + instName = "Fcvt%s%sIntFp%s" %(regL, us, "D" if isDouble else "S") + mnem = "%scvtf" %(us.lower()) + fcvtIntFpDIop = InstObjParams(mnem, instName, "FpRegRegOp", + { "code": fcvtIntFpDCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtIntFpDIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtIntFpDIop); + exec_output += BasicExecute.subst(fcvtIntFpDIop); + + # Generates the floating point to integer conversion instructions in various + # variants, eg signed/unsigned + def buildFpCvtIntOp(isDouble, isSigned, isXReg): + global header_output, decoder_output, exec_output + + for rmode, roundingMode in [["N", "FPRounding_TIEEVEN"], + ["P", "FPRounding_POSINF"], + ["M", "FPRounding_NEGINF"], + ["Z", "FPRounding_ZERO"], + ["A", "FPRounding_TIEAWAY"]]: + fcvtFpIntCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc;''' + if isDouble: + fcvtFpIntCode += ''' + uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; + ''' + else: + fcvtFpIntCode += "uint32_t cOp1 = AA64FpOp1P0_uw;" + + fcvtFpIntCode += ''' + %sDest = fplibFPToFixed(cOp1, 0, %s, %s, fpscr); + FpscrExc = fpscr; + ''' %("X" if isXReg else "W", + "64" if isDouble else "32", + "64" if isXReg else "32", + "false" if isSigned else "true", + roundingMode) + + instName = "FcvtFp%sInt%s%s%s" %("S" if isSigned else "U", + "X" if isXReg else "W", + "D" if isDouble else "S", rmode) + mnem = "fcvt%s%s" %(rmode, "s" if isSigned else "u") + fcvtFpIntIop = InstObjParams(mnem, instName, "FpRegRegOp", + { "code": fcvtFpIntCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpIntIop); + decoder_output += FpRegRegOpConstructor.subst(fcvtFpIntIop); + exec_output += BasicExecute.subst(fcvtFpIntIop); + + # Now actually do the building with the different variants + for isDouble in True, False: + for isSigned in True, False: + for isXReg in True, False: + buildFpCvtIntOp(isDouble, isSigned, isXReg) + + fcvtFpSFpDCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint64_t cDest = fplibConvert(AA64FpOp1P0_uw, + FPCRRounding(fpscr), fpscr); + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = cDest >> 32; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + fcvtFpSFpDIop = InstObjParams("fcvt", "FCvtFpSFpD", "FpRegRegOp", + { "code": fcvtFpSFpDCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpSFpDIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpSFpDIop); + exec_output += BasicExecute.subst(fcvtFpSFpDIop); + + fcvtFpDFpSCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; + AA64FpDestP0_uw = fplibConvert(cOp1, + FPCRRounding(fpscr), fpscr); + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + fcvtFpDFpSIop = InstObjParams("fcvt", "FcvtFpDFpS", "FpRegRegOp", + {"code": fcvtFpDFpSCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpDFpSIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpDFpSIop); + exec_output += BasicExecute.subst(fcvtFpDFpSIop); + + # Half precision to single or double precision conversion + for isDouble in True, False: + code = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s cDest = fplibConvert(AA64FpOp1P0_uw, + FPCRRounding(fpscr), fpscr); + ''' % ("uint64_t" if isDouble else "uint32_t", + "64" if isDouble else "32") + if isDouble: + code += ''' + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = cDest >> 32; + ''' + else: + code += ''' + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + ''' + code += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + instName = "FcvtFpHFp%s" %("D" if isDouble else "S") + fcvtFpHFpIop = InstObjParams("fcvt", instName, "FpRegRegOp", + { "code": code, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpHFpIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpHFpIop); + exec_output += BasicExecute.subst(fcvtFpHFpIop); + + # single or double precision to Half precision conversion + for isDouble in True, False: + code = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s; + AA64FpDestP0_uw = fplibConvert(cOp1, + FPCRRounding(fpscr), fpscr); + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' % ("uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32" + if isDouble else "uint32_t cOp1 = AA64FpOp1P0_uw", + "64" if isDouble else "32") + + instName = "FcvtFp%sFpH" %("D" if isDouble else "S") + fcvtFpFpHIop = InstObjParams("fcvt", instName, "FpRegRegOp", + { "code": code, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpFpHIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpFpHIop); + exec_output += BasicExecute.subst(fcvtFpFpHIop); + + # Build the various versions of the floating point compare instructions + def buildFCmpOp(isQuiet, isDouble, isImm): + global header_output, decoder_output, exec_output + + fcmpCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s cOp1 = %s; + ''' % ("uint64_t" if isDouble else "uint32_t", + "AA64FpDestP0_uw | (uint64_t)AA64FpDestP1_uw << 32" + if isDouble else "AA64FpDestP0_uw") + if isImm: + fcmpCode += ''' + %s cOp2 = imm; + ''' % ("uint64_t" if isDouble else "uint32_t") + else: + fcmpCode += ''' + %s cOp2 = %s; + ''' % ("uint64_t" if isDouble else "uint32_t", + "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32" + if isDouble else "AA64FpOp1P0_uw") + fcmpCode += ''' + int cc = fplibCompare(cOp1, cOp2, %s, fpscr); + CondCodesNZ = cc >> 2 & 3; + CondCodesC = cc >> 1 & 1; + CondCodesV = cc & 1; + FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; + ''' % ("64" if isDouble else "32", "false" if isQuiet else "true") + + typeName = "Imm" if isImm else "Reg" + instName = "FCmp%s%s%s" %("" if isQuiet else "E", typeName, + "D" if isDouble else "S") + fcmpIop = InstObjParams("fcmp%s" %("" if isQuiet else "e"), instName, + "FpReg%sOp" %(typeName), + {"code": fcmpCode, + "op_class": "SimdFloatCmpOp"}, []) + + declareTemp = eval("FpReg%sOpDeclare" %(typeName)); + constructorTemp = eval("AA64FpReg%sOpConstructor" %(typeName)); + header_output += declareTemp.subst(fcmpIop); + decoder_output += constructorTemp.subst(fcmpIop); + exec_output += BasicExecute.subst(fcmpIop); + + for isQuiet in True, False: + for isDouble in True, False: + for isImm in True, False: + buildFCmpOp(isQuiet, isDouble, isImm) + + # Build the various versions of the conditional floating point compare + # instructions + def buildFCCmpOp(isQuiet, isDouble): + global header_output, decoder_output, exec_output + + fccmpCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) { + %s cOp1 = %s; + %s cOp2 = %s; + int cc = fplibCompare(cOp1, cOp2, %s, fpscr); + CondCodesNZ = cc >> 2 & 3; + CondCodesC = cc >> 1 & 1; + CondCodesV = cc & 1; + } else { + CondCodesNZ = (defCc >> 2) & 0x3; + CondCodesC = (defCc >> 1) & 0x1; + CondCodesV = defCc & 0x1; + } + FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; + ''' % ("uint64_t" if isDouble else "uint32_t", + "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32" + if isDouble else "AA64FpOp1P0_uw", + "uint64_t" if isDouble else "uint32_t", + "AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32" + if isDouble else "AA64FpOp2P0_uw", + "64" if isDouble else "32", "false" if isQuiet else "true") + + instName = "FCCmp%sReg%s" %("" if isQuiet else "E", + "D" if isDouble else "S") + fccmpIop = InstObjParams("fccmp%s" %("" if isQuiet else "e"), + instName, "FpCondCompRegOp", + {"code": fccmpCode, + "op_class": "SimdFloatCmpOp"}, []) + header_output += DataXCondCompRegDeclare.subst(fccmpIop); + decoder_output += DataXCondCompRegConstructor.subst(fccmpIop); + exec_output += BasicExecute.subst(fccmpIop); + + for isQuiet in True, False: + for isDouble in True, False: + buildFCCmpOp(isQuiet, isDouble) + +}}; + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + # Generates the variants of the floating to fixed point instructions + def buildFpCvtFixedOp(isSigned, isDouble, isXReg): + global header_output, decoder_output, exec_output + + fcvtFpFixedCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + ''' + if isDouble: + fcvtFpFixedCode += ''' + uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; + ''' + else: + fcvtFpFixedCode += "uint32_t cOp1 = AA64FpOp1P0_uw;" + fcvtFpFixedCode += ''' + %sDest = fplibFPToFixed(cOp1, 64 - imm, %s, + FPRounding_ZERO, fpscr); + FpscrExc = fpscr; + ''' %("X" if isXReg else "W", + "64" if isDouble else "32", + "64" if isXReg else "32", + "false" if isSigned else "true") + + instName = "FcvtFp%sFixed%s%s" %("S" if isSigned else "U", + "D" if isDouble else "S", + "X" if isXReg else "W") + mnem = "fcvtz%s" %("s" if isSigned else "u") + fcvtFpFixedIop = InstObjParams(mnem, instName, "FpRegRegImmOp", + { "code": fcvtFpFixedCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegImmOpDeclare.subst(fcvtFpFixedIop); + decoder_output += AA64FpRegRegImmOpConstructor.subst(fcvtFpFixedIop); + exec_output += BasicExecute.subst(fcvtFpFixedIop); + + # Generates the variants of the fixed to floating point instructions + def buildFixedCvtFpOp(isSigned, isDouble, isXReg): + global header_output, decoder_output, exec_output + + srcRegType = "X" if isXReg else "W" + fcvtFixedFpCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s result = fplibFixedToFP((%s%s_t)%sOp1, 64 - imm, + %s, FPCRRounding(fpscr), fpscr); + ''' %("uint64_t" if isDouble else "uint32_t", + "64" if isDouble else "32", + "int" if isSigned else "uint", "64" if isXReg else "32", + srcRegType, + "false" if isSigned else "true") + if isDouble: + fcvtFixedFpCode += ''' + AA64FpDestP0_uw = result; + AA64FpDestP1_uw = result >> 32; + ''' + else: + fcvtFixedFpCode += ''' + AA64FpDestP0_uw = result; + AA64FpDestP1_uw = 0; + ''' + fcvtFixedFpCode += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + instName = "Fcvt%sFixedFp%s%s" %("S" if isSigned else "U", + "D" if isDouble else "S", + srcRegType) + mnem = "%scvtf" %("s" if isSigned else "u") + fcvtFixedFpIop = InstObjParams(mnem, instName, "FpRegRegImmOp", + { "code": fcvtFixedFpCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegImmOpDeclare.subst(fcvtFixedFpIop); + decoder_output += FpRegRegImmOpConstructor.subst(fcvtFixedFpIop); + exec_output += BasicExecute.subst(fcvtFixedFpIop); + + # loop over the variants building the instructions for each + for isXReg in True, False: + for isDouble in True, False: + for isSigned in True, False: + buildFpCvtFixedOp(isSigned, isDouble, isXReg) + buildFixedCvtFpOp(isSigned, isDouble, isXReg) +}}; + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + for isDouble in True, False: + code = ''' + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) { + AA64FpDestP0_uw = AA64FpOp1P0_uw; + ''' + if isDouble: + code += ''' + AA64FpDestP1_uw = AA64FpOp1P1_uw; + } else { + AA64FpDestP0_uw = AA64FpOp2P0_uw; + AA64FpDestP1_uw = AA64FpOp2P1_uw; + } + ''' + else: + code += ''' + } else { + AA64FpDestP0_uw = AA64FpOp2P0_uw; + } + AA64FpDestP1_uw = 0; + ''' + code += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + + iop = InstObjParams("fcsel", "FCSel%s" %("D" if isDouble else "S"), + "FpCondSelOp", code) + header_output += DataXCondSelDeclare.subst(iop) + decoder_output += DataXCondSelConstructor.subst(iop) + exec_output += BasicExecute.subst(iop) +}}; diff --git a/src/arch/arm/isa/insts/insts.isa b/src/arch/arm/isa/insts/insts.isa index c01e87df8..9d90f7779 100644 --- a/src/arch/arm/isa/insts/insts.isa +++ b/src/arch/arm/isa/insts/insts.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -37,6 +37,9 @@ // // Authors: Gabe Black +//AArch64 instructions +##include "aarch64.isa" + //Basic forms of various templates ##include "basic.isa" @@ -46,8 +49,15 @@ //Loads of a single item ##include "ldr.isa" +//Loads of a single item, AArch64 +##include "ldr64.isa" + //Miscellaneous instructions that don't fit elsewhere ##include "misc.isa" +##include "misc64.isa" + +//Stores of a single item, AArch64 +##include "str64.isa" //Stores of a single item ##include "str.isa" @@ -61,8 +71,12 @@ //Data processing instructions ##include "data.isa" +//AArch64 data processing instructions +##include "data64.isa" + //Branches ##include "branch.isa" +##include "branch64.isa" //Multiply ##include "mult.isa" @@ -72,9 +86,14 @@ //VFP ##include "fp.isa" +##include "fp64.isa" //Neon ##include "neon.isa" +//AArch64 Neon +##include "neon64.isa" +##include "neon64_mem.isa" + //m5 Psuedo-ops ##include "m5ops.isa" diff --git a/src/arch/arm/isa/insts/ldr.isa b/src/arch/arm/isa/insts/ldr.isa index f599fa4b9..6bfe40118 100644 --- a/src/arch/arm/isa/insts/ldr.isa +++ b/src/arch/arm/isa/insts/ldr.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -38,6 +38,7 @@ // Authors: Gabe Black let {{ + import math header_output = "" decoder_output = "" @@ -78,7 +79,8 @@ let {{ newDecoder, newExec) = self.fillTemplates(self.name, self.Name, codeBlobs, self.memFlags, instFlags, base, - wbDecl, pcDecl, self.rasPop) + wbDecl, pcDecl, self.rasPop, + self.size, self.sign) header_output += newHeader decoder_output += newDecoder @@ -160,7 +162,7 @@ let {{ self.size, self.sign, self.user) # Add memory request flags where necessary - self.memFlags.append("%d" % (self.size - 1)) + self.memFlags.append("%d" % int(math.log(self.size, 2))) if self.user: self.memFlags.append("ArmISA::TLB::UserMode") diff --git a/src/arch/arm/isa/insts/ldr64.isa b/src/arch/arm/isa/insts/ldr64.isa new file mode 100644 index 000000000..78460f661 --- /dev/null +++ b/src/arch/arm/isa/insts/ldr64.isa @@ -0,0 +1,446 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + class LoadInst64(LoadStoreInst): + execBase = 'Load64' + micro = False + + def __init__(self, mnem, Name, size=4, sign=False, user=False, + literal=False, flavor="normal", top=False): + super(LoadInst64, self).__init__() + + self.name = mnem + self.Name = Name + self.size = size + self.sign = sign + self.user = user + self.literal = literal + self.flavor = flavor + self.top = top + + self.memFlags = ["ArmISA::TLB::MustBeOne"] + self.instFlags = [] + self.codeBlobs = {"postacc_code" : ""} + + # Add memory request flags where necessary + if self.user: + self.memFlags.append("ArmISA::TLB::UserMode") + + if self.flavor == "dprefetch": + self.memFlags.append("Request::PREFETCH") + self.instFlags = ['IsDataPrefetch'] + elif self.flavor == "iprefetch": + self.memFlags.append("Request::PREFETCH") + self.instFlags = ['IsInstPrefetch'] + if self.micro: + self.instFlags.append("IsMicroop") + + if self.flavor in ("acexp", "exp"): + # For exclusive pair ops alignment check is based on total size + self.memFlags.append("%d" % int(math.log(self.size, 2) + 1)) + elif not (self.size == 16 and self.top): + # Only the first microop should perform alignment checking. + self.memFlags.append("%d" % int(math.log(self.size, 2))) + + if self.flavor not in ("acquire", "acex", "exclusive", + "acexp", "exp"): + self.memFlags.append("ArmISA::TLB::AllowUnaligned") + + if self.flavor in ("acquire", "acex", "acexp"): + self.instFlags.extend(["IsMemBarrier", + "IsWriteBarrier", + "IsReadBarrier"]) + if self.flavor in ("acex", "exclusive", "exp", "acexp"): + self.memFlags.append("Request::LLSC") + + def buildEACode(self): + # Address computation code + eaCode = "" + if self.flavor == "fp": + eaCode += vfp64EnabledCheckCode + + if self.literal: + eaCode += "EA = RawPC" + else: + eaCode += SPAlignmentCheckCode + "EA = XBase" + + if self.size == 16: + if self.top: + eaCode += " + (isBigEndian64(xc->tcBase()) ? 0 : 8)" + else: + eaCode += " + (isBigEndian64(xc->tcBase()) ? 8 : 0)" + if not self.post: + eaCode += self.offset + eaCode += ";" + + self.codeBlobs["ea_code"] = eaCode + + def emitHelper(self, base='Memory64', wbDecl=None): + global header_output, decoder_output, exec_output + + # If this is a microop itself, don't allow anything that would + # require further microcoding. + if self.micro: + assert not wbDecl + + fa_code = None + if not self.micro and self.flavor in ("normal", "widen", "acquire"): + fa_code = ''' + fault->annotate(ArmFault::SAS, %s); + fault->annotate(ArmFault::SSE, %s); + fault->annotate(ArmFault::SRT, dest); + fault->annotate(ArmFault::SF, %s); + fault->annotate(ArmFault::AR, %s); + ''' % ("0" if self.size == 1 else + "1" if self.size == 2 else + "2" if self.size == 4 else "3", + "true" if self.sign else "false", + "true" if (self.size == 8 or + self.flavor == "widen") else "false", + "true" if self.flavor == "acquire" else "false") + + (newHeader, newDecoder, newExec) = \ + self.fillTemplates(self.name, self.Name, self.codeBlobs, + self.memFlags, self.instFlags, + base, wbDecl, faCode=fa_code) + + header_output += newHeader + decoder_output += newDecoder + exec_output += newExec + + class LoadImmInst64(LoadInst64): + def __init__(self, *args, **kargs): + super(LoadImmInst64, self).__init__(*args, **kargs) + self.offset = " + imm" + + self.wbDecl = "MicroAddXiUop(machInst, base, base, imm);" + + class LoadRegInst64(LoadInst64): + def __init__(self, *args, **kargs): + super(LoadRegInst64, self).__init__(*args, **kargs) + self.offset = " + extendReg64(XOffset, type, shiftAmt, 64)" + + self.wbDecl = \ + "MicroAddXERegUop(machInst, base, base, " + \ + " offset, type, shiftAmt);" + + class LoadRawRegInst64(LoadInst64): + def __init__(self, *args, **kargs): + super(LoadRawRegInst64, self).__init__(*args, **kargs) + self.offset = "" + + class LoadSingle64(LoadInst64): + def emit(self): + self.buildEACode() + + # Code that actually handles the access + if self.flavor in ("dprefetch", "iprefetch"): + accCode = 'uint64_t temp M5_VAR_USED = Mem%s;' + elif self.flavor == "fp": + if self.size in (1, 2, 4): + accCode = ''' + AA64FpDestP0_uw = cSwap(Mem%s, + isBigEndian64(xc->tcBase())); + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + elif self.size == 8 or (self.size == 16 and not self.top): + accCode = ''' + uint64_t data = cSwap(Mem%s, + isBigEndian64(xc->tcBase())); + AA64FpDestP0_uw = (uint32_t)data; + AA64FpDestP1_uw = (data >> 32); + ''' + # Only zero out the other half if this isn't part of a + # pair of 8 byte loads implementing a 16 byte load. + if self.size == 8: + accCode += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + elif self.size == 16 and self.top: + accCode = ''' + uint64_t data = cSwap(Mem%s, + isBigEndian64(xc->tcBase())); + AA64FpDestP2_uw = (uint32_t)data; + AA64FpDestP3_uw = (data >> 32); + ''' + elif self.flavor == "widen" or self.size == 8: + accCode = "XDest = cSwap(Mem%s, isBigEndian64(xc->tcBase()));" + else: + accCode = "WDest = cSwap(Mem%s, isBigEndian64(xc->tcBase()));" + if self.size == 16: + accCode = accCode % buildMemSuffix(self.sign, 8) + else: + accCode = accCode % buildMemSuffix(self.sign, self.size) + + self.codeBlobs["memacc_code"] = accCode + + # Push it out to the output files + wbDecl = None + if self.writeback and not self.micro: + wbDecl = self.wbDecl + self.emitHelper(self.base, wbDecl) + + class LoadDouble64(LoadInst64): + def emit(self): + self.buildEACode() + + # Code that actually handles the access + if self.flavor == "fp": + accCode = ''' + uint64_t data = cSwap(Mem_ud, isBigEndian64(xc->tcBase())); + AA64FpDestP0_uw = (uint32_t)data; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + AA64FpDest2P0_uw = (data >> 32); + AA64FpDest2P1_uw = 0; + AA64FpDest2P2_uw = 0; + AA64FpDest2P3_uw = 0; + ''' + else: + if self.sign: + if self.size == 4: + accCode = ''' + uint64_t data = cSwap(Mem_ud, + isBigEndian64(xc->tcBase())); + XDest = sext<32>((uint32_t)data); + XDest2 = sext<32>(data >> 32); + ''' + elif self.size == 8: + accCode = ''' + XDest = sext<64>(Mem_tud.a); + XDest2 = sext<64>(Mem_tud.b); + ''' + else: + if self.size == 4: + accCode = ''' + uint64_t data = cSwap(Mem_ud, + isBigEndian64(xc->tcBase())); + XDest = (uint32_t)data; + XDest2 = data >> 32; + ''' + elif self.size == 8: + accCode = ''' + XDest = Mem_tud.a; + XDest2 = Mem_tud.b; + ''' + self.codeBlobs["memacc_code"] = accCode + + # Push it out to the output files + wbDecl = None + if self.writeback and not self.micro: + wbDecl = self.wbDecl + self.emitHelper(self.base, wbDecl) + + class LoadImm64(LoadImmInst64, LoadSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryImm64' + writeback = False + post = False + + class LoadPre64(LoadImmInst64, LoadSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryPreIndex64' + writeback = True + post = False + + class LoadPost64(LoadImmInst64, LoadSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryPostIndex64' + writeback = True + post = True + + class LoadReg64(LoadRegInst64, LoadSingle64): + decConstBase = 'LoadStoreReg64' + base = 'ArmISA::MemoryReg64' + writeback = False + post = False + + class LoadRaw64(LoadRawRegInst64, LoadSingle64): + decConstBase = 'LoadStoreRaw64' + base = 'ArmISA::MemoryRaw64' + writeback = False + post = False + + class LoadEx64(LoadRawRegInst64, LoadSingle64): + decConstBase = 'LoadStoreEx64' + base = 'ArmISA::MemoryEx64' + writeback = False + post = False + + class LoadLit64(LoadImmInst64, LoadSingle64): + decConstBase = 'LoadStoreLit64' + base = 'ArmISA::MemoryLiteral64' + writeback = False + post = False + + def buildLoads64(mnem, NameBase, size, sign, flavor="normal"): + LoadImm64(mnem, NameBase + "_IMM", size, sign, flavor=flavor).emit() + LoadPre64(mnem, NameBase + "_PRE", size, sign, flavor=flavor).emit() + LoadPost64(mnem, NameBase + "_POST", size, sign, flavor=flavor).emit() + LoadReg64(mnem, NameBase + "_REG", size, sign, flavor=flavor).emit() + + buildLoads64("ldrb", "LDRB64", 1, False) + buildLoads64("ldrsb", "LDRSBW64", 1, True) + buildLoads64("ldrsb", "LDRSBX64", 1, True, flavor="widen") + buildLoads64("ldrh", "LDRH64", 2, False) + buildLoads64("ldrsh", "LDRSHW64", 2, True) + buildLoads64("ldrsh", "LDRSHX64", 2, True, flavor="widen") + buildLoads64("ldrsw", "LDRSW64", 4, True, flavor="widen") + buildLoads64("ldr", "LDRW64", 4, False) + buildLoads64("ldr", "LDRX64", 8, False) + buildLoads64("ldr", "LDRBFP64", 1, False, flavor="fp") + buildLoads64("ldr", "LDRHFP64", 2, False, flavor="fp") + buildLoads64("ldr", "LDRSFP64", 4, False, flavor="fp") + buildLoads64("ldr", "LDRDFP64", 8, False, flavor="fp") + + LoadImm64("prfm", "PRFM64_IMM", 8, flavor="dprefetch").emit() + LoadReg64("prfm", "PRFM64_REG", 8, flavor="dprefetch").emit() + LoadLit64("prfm", "PRFM64_LIT", 8, literal=True, flavor="dprefetch").emit() + LoadImm64("prfum", "PRFUM64_IMM", 8, flavor="dprefetch").emit() + + LoadImm64("ldurb", "LDURB64_IMM", 1, False).emit() + LoadImm64("ldursb", "LDURSBW64_IMM", 1, True).emit() + LoadImm64("ldursb", "LDURSBX64_IMM", 1, True, flavor="widen").emit() + LoadImm64("ldurh", "LDURH64_IMM", 2, False).emit() + LoadImm64("ldursh", "LDURSHW64_IMM", 2, True).emit() + LoadImm64("ldursh", "LDURSHX64_IMM", 2, True, flavor="widen").emit() + LoadImm64("ldursw", "LDURSW64_IMM", 4, True, flavor="widen").emit() + LoadImm64("ldur", "LDURW64_IMM", 4, False).emit() + LoadImm64("ldur", "LDURX64_IMM", 8, False).emit() + LoadImm64("ldur", "LDURBFP64_IMM", 1, flavor="fp").emit() + LoadImm64("ldur", "LDURHFP64_IMM", 2, flavor="fp").emit() + LoadImm64("ldur", "LDURSFP64_IMM", 4, flavor="fp").emit() + LoadImm64("ldur", "LDURDFP64_IMM", 8, flavor="fp").emit() + + LoadImm64("ldtrb", "LDTRB64_IMM", 1, False, True).emit() + LoadImm64("ldtrsb", "LDTRSBW64_IMM", 1, True, True).emit() + LoadImm64("ldtrsb", "LDTRSBX64_IMM", 1, True, True, flavor="widen").emit() + LoadImm64("ldtrh", "LDTRH64_IMM", 2, False, True).emit() + LoadImm64("ldtrsh", "LDTRSHW64_IMM", 2, True, True).emit() + LoadImm64("ldtrsh", "LDTRSHX64_IMM", 2, True, True, flavor="widen").emit() + LoadImm64("ldtrsw", "LDTRSW64_IMM", 4, True, flavor="widen").emit() + LoadImm64("ldtr", "LDTRW64_IMM", 4, False, True).emit() + LoadImm64("ldtr", "LDTRX64_IMM", 8, False, True).emit() + + LoadLit64("ldrsw", "LDRSWL64_LIT", 4, True, \ + literal=True, flavor="widen").emit() + LoadLit64("ldr", "LDRWL64_LIT", 4, False, literal=True).emit() + LoadLit64("ldr", "LDRXL64_LIT", 8, False, literal=True).emit() + LoadLit64("ldr", "LDRSFP64_LIT", 4, literal=True, flavor="fp").emit() + LoadLit64("ldr", "LDRDFP64_LIT", 8, literal=True, flavor="fp").emit() + + LoadRaw64("ldar", "LDARX64", 8, flavor="acquire").emit() + LoadRaw64("ldar", "LDARW64", 4, flavor="acquire").emit() + LoadRaw64("ldarh", "LDARH64", 2, flavor="acquire").emit() + LoadRaw64("ldarb", "LDARB64", 1, flavor="acquire").emit() + + LoadEx64("ldaxr", "LDAXRX64", 8, flavor="acex").emit() + LoadEx64("ldaxr", "LDAXRW64", 4, flavor="acex").emit() + LoadEx64("ldaxrh", "LDAXRH64", 2, flavor="acex").emit() + LoadEx64("ldaxrb", "LDAXRB64", 1, flavor="acex").emit() + + LoadEx64("ldxr", "LDXRX64", 8, flavor="exclusive").emit() + LoadEx64("ldxr", "LDXRW64", 4, flavor="exclusive").emit() + LoadEx64("ldxrh", "LDXRH64", 2, flavor="exclusive").emit() + LoadEx64("ldxrb", "LDXRB64", 1, flavor="exclusive").emit() + + class LoadImmU64(LoadImm64): + decConstBase = 'LoadStoreImmU64' + micro = True + + class LoadImmDU64(LoadImmInst64, LoadDouble64): + decConstBase = 'LoadStoreImmDU64' + base = 'ArmISA::MemoryDImm64' + micro = True + post = False + writeback = False + + class LoadImmDouble64(LoadImmInst64, LoadDouble64): + decConstBase = 'LoadStoreImmDU64' + base = 'ArmISA::MemoryDImm64' + micro = False + post = False + writeback = False + + class LoadRegU64(LoadReg64): + decConstBase = 'LoadStoreRegU64' + micro = True + + class LoadLitU64(LoadLit64): + decConstBase = 'LoadStoreLitU64' + micro = True + + LoadImmDouble64("ldaxp", "LDAXPW64", 4, flavor="acexp").emit() + LoadImmDouble64("ldaxp", "LDAXPX64", 8, flavor="acexp").emit() + LoadImmDouble64("ldxp", "LDXPW64", 4, flavor="exp").emit() + LoadImmDouble64("ldxp", "LDXPX64", 8, flavor="exp").emit() + + LoadImmU64("ldrxi_uop", "MicroLdrXImmUop", 8).emit() + LoadRegU64("ldrxr_uop", "MicroLdrXRegUop", 8).emit() + LoadLitU64("ldrxl_uop", "MicroLdrXLitUop", 8, literal=True).emit() + LoadImmU64("ldrfpxi_uop", "MicroLdrFpXImmUop", 8, flavor="fp").emit() + LoadRegU64("ldrfpxr_uop", "MicroLdrFpXRegUop", 8, flavor="fp").emit() + LoadLitU64("ldrfpxl_uop", "MicroLdrFpXLitUop", 8, literal=True, + flavor="fp").emit() + LoadImmU64("ldrqbfpxi_uop", "MicroLdrQBFpXImmUop", + 16, flavor="fp", top = False).emit() + LoadRegU64("ldrqbfpxr_uop", "MicroLdrQBFpXRegUop", + 16, flavor="fp", top = False).emit() + LoadLitU64("ldrqbfpxl_uop", "MicroLdrQBFpXLitUop", + 16, literal=True, flavor="fp", top = False).emit() + LoadImmU64("ldrqtfpxi_uop", "MicroLdrQTFpXImmUop", + 16, flavor="fp", top = True).emit() + LoadRegU64("ldrqtfpxr_uop", "MicroLdrQTFpXRegUop", + 16, flavor="fp", top = True).emit() + LoadLitU64("ldrqtfpxl_uop", "MicroLdrQTFpXLitUop", + 16, literal=True, flavor="fp", top = True).emit() + LoadImmDU64("ldrduxi_uop", "MicroLdrDUXImmUop", 4, sign=False).emit() + LoadImmDU64("ldrdsxi_uop", "MicroLdrDSXImmUop", 4, sign=True).emit() + LoadImmDU64("ldrdfpxi_uop", "MicroLdrDFpXImmUop", 4, flavor="fp").emit() +}}; diff --git a/src/arch/arm/isa/insts/m5ops.isa b/src/arch/arm/isa/insts/m5ops.isa index 06ed34af8..928d1be0d 100644 --- a/src/arch/arm/isa/insts/m5ops.isa +++ b/src/arch/arm/isa/insts/m5ops.isa @@ -1,5 +1,5 @@ // -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -58,6 +58,7 @@ let {{ armCode = ''' PseudoInst::arm(xc->tcBase()); ''' + armIop = InstObjParams("arm", "Arm", "PredOp", { "code": armCode, "predicate_test": predicateTest }, @@ -69,6 +70,7 @@ let {{ quiesceCode = ''' PseudoInst::quiesce(xc->tcBase()); ''' + quiesceIop = InstObjParams("quiesce", "Quiesce", "PredOp", { "code": quiesceCode, "predicate_test": predicateTest }, @@ -81,6 +83,10 @@ let {{ PseudoInst::quiesceNs(xc->tcBase(), join32to64(R1, R0)); ''' + quiesceNsCode64 = ''' + PseudoInst::quiesceNs(xc->tcBase(), X0); + ''' + quiesceNsIop = InstObjParams("quiesceNs", "QuiesceNs", "PredOp", { "code": quiesceNsCode, "predicate_test": predicateTest }, @@ -89,10 +95,22 @@ let {{ decoder_output += BasicConstructor.subst(quiesceNsIop) exec_output += QuiescePredOpExecute.subst(quiesceNsIop) + quiesceNsIop = InstObjParams("quiesceNs", "QuiesceNs64", "PredOp", + { "code": quiesceNsCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsQuiesce"]) + header_output += BasicDeclare.subst(quiesceNsIop) + decoder_output += BasicConstructor.subst(quiesceNsIop) + exec_output += QuiescePredOpExecute.subst(quiesceNsIop) + quiesceCyclesCode = ''' PseudoInst::quiesceCycles(xc->tcBase(), join32to64(R1, R0)); ''' + quiesceCyclesCode64 = ''' + PseudoInst::quiesceCycles(xc->tcBase(), X0); + ''' + quiesceCyclesIop = InstObjParams("quiesceCycles", "QuiesceCycles", "PredOp", { "code": quiesceCyclesCode, "predicate_test": predicateTest }, @@ -101,12 +119,23 @@ let {{ decoder_output += BasicConstructor.subst(quiesceCyclesIop) exec_output += QuiescePredOpExecute.subst(quiesceCyclesIop) + quiesceCyclesIop = InstObjParams("quiesceCycles", "QuiesceCycles64", "PredOp", + { "code": quiesceCyclesCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsQuiesce", "IsUnverifiable"]) + header_output += BasicDeclare.subst(quiesceCyclesIop) + decoder_output += BasicConstructor.subst(quiesceCyclesIop) + exec_output += QuiescePredOpExecute.subst(quiesceCyclesIop) + quiesceTimeCode = ''' uint64_t qt_val = PseudoInst::quiesceTime(xc->tcBase()); R0 = bits(qt_val, 31, 0); R1 = bits(qt_val, 63, 32); ''' + quiesceTimeCode64 = ''' + X0 = PseudoInst::quiesceTime(xc->tcBase()); + ''' quiesceTimeIop = InstObjParams("quiesceTime", "QuiesceTime", "PredOp", { "code": quiesceTimeCode, "predicate_test": predicateTest }, @@ -115,12 +144,23 @@ let {{ decoder_output += BasicConstructor.subst(quiesceTimeIop) exec_output += PredOpExecute.subst(quiesceTimeIop) + quiesceTimeIop = InstObjParams("quiesceTime", "QuiesceTime64", "PredOp", + { "code": quiesceTimeCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(quiesceTimeIop) + decoder_output += BasicConstructor.subst(quiesceTimeIop) + exec_output += PredOpExecute.subst(quiesceTimeIop) + rpnsCode = ''' uint64_t rpns_val = PseudoInst::rpns(xc->tcBase()); R0 = bits(rpns_val, 31, 0); R1 = bits(rpns_val, 63, 32); ''' + rpnsCode64 = ''' + X0 = PseudoInst::rpns(xc->tcBase()); + ''' rpnsIop = InstObjParams("rpns", "Rpns", "PredOp", { "code": rpnsCode, "predicate_test": predicateTest }, @@ -129,10 +169,22 @@ let {{ decoder_output += BasicConstructor.subst(rpnsIop) exec_output += PredOpExecute.subst(rpnsIop) + rpnsIop = InstObjParams("rpns", "Rpns64", "PredOp", + { "code": rpnsCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(rpnsIop) + decoder_output += BasicConstructor.subst(rpnsIop) + exec_output += PredOpExecute.subst(rpnsIop) + wakeCpuCode = ''' PseudoInst::wakeCPU(xc->tcBase(), join32to64(R1,R0)); ''' + wakeCpuCode64 = ''' + PseudoInst::wakeCPU(xc->tcBase(), X0); + ''' + wakeCPUIop = InstObjParams("wakeCPU", "WakeCPU", "PredOp", { "code": wakeCpuCode, "predicate_test": predicateTest }, @@ -141,6 +193,14 @@ let {{ decoder_output += BasicConstructor.subst(wakeCPUIop) exec_output += PredOpExecute.subst(wakeCPUIop) + wakeCPUIop = InstObjParams("wakeCPU", "WakeCPU64", "PredOp", + { "code": wakeCpuCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(wakeCPUIop) + decoder_output += BasicConstructor.subst(wakeCPUIop) + exec_output += PredOpExecute.subst(wakeCPUIop) + deprecated_ivlbIop = InstObjParams("deprecated_ivlb", "Deprecated_ivlb", "PredOp", { "code": '''warn_once("Obsolete M5 ivlb instruction encountered.\\n");''', "predicate_test": predicateTest }) @@ -171,6 +231,11 @@ let {{ m5exit_code = ''' PseudoInst::m5exit(xc->tcBase(), join32to64(R1, R0)); ''' + + m5exit_code64 = ''' + PseudoInst::m5exit(xc->tcBase(), X0); + ''' + m5exitIop = InstObjParams("m5exit", "M5exit", "PredOp", { "code": m5exit_code, "predicate_test": predicateTest }, @@ -190,6 +255,14 @@ let {{ decoder_output += BasicConstructor.subst(m5failIop) exec_output += PredOpExecute.subst(m5failIop) + m5exitIop = InstObjParams("m5exit", "M5exit64", "PredOp", + { "code": m5exit_code64, + "predicate_test": predicateTest }, + ["No_OpClass", "IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5exitIop) + decoder_output += BasicConstructor.subst(m5exitIop) + exec_output += PredOpExecute.subst(m5exitIop) + loadsymbolCode = ''' PseudoInst::loadsymbol(xc->tcBase()); ''' @@ -208,6 +281,10 @@ let {{ R1 = bits(ip_val, 63, 32); ''' + initparamCode64 = ''' + X0 = PseudoInst::initParam(xc->tcBase()); + ''' + initparamIop = InstObjParams("initparam", "Initparam", "PredOp", { "code": initparamCode, "predicate_test": predicateTest }, @@ -216,10 +293,21 @@ let {{ decoder_output += BasicConstructor.subst(initparamIop) exec_output += PredOpExecute.subst(initparamIop) + initparamIop = InstObjParams("initparam", "Initparam64", "PredOp", + { "code": initparamCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(initparamIop) + decoder_output += BasicConstructor.subst(initparamIop) + exec_output += PredOpExecute.subst(initparamIop) + resetstats_code = ''' PseudoInst::resetstats(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2)); ''' + resetstats_code64 = ''' + PseudoInst::resetstats(xc->tcBase(), X0, X1); + ''' resetstatsIop = InstObjParams("resetstats", "Resetstats", "PredOp", { "code": resetstats_code, "predicate_test": predicateTest }, @@ -228,9 +316,22 @@ let {{ decoder_output += BasicConstructor.subst(resetstatsIop) exec_output += PredOpExecute.subst(resetstatsIop) + resetstatsIop = InstObjParams("resetstats", "Resetstats64", "PredOp", + { "code": resetstats_code64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(resetstatsIop) + decoder_output += BasicConstructor.subst(resetstatsIop) + exec_output += PredOpExecute.subst(resetstatsIop) + dumpstats_code = ''' PseudoInst::dumpstats(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2)); ''' + + dumpstats_code64 = ''' + PseudoInst::dumpstats(xc->tcBase(), X0, X1); + ''' + dumpstatsIop = InstObjParams("dumpstats", "Dumpstats", "PredOp", { "code": dumpstats_code, "predicate_test": predicateTest }, @@ -239,9 +340,22 @@ let {{ decoder_output += BasicConstructor.subst(dumpstatsIop) exec_output += PredOpExecute.subst(dumpstatsIop) + dumpstatsIop = InstObjParams("dumpstats", "Dumpstats64", "PredOp", + { "code": dumpstats_code64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(dumpstatsIop) + decoder_output += BasicConstructor.subst(dumpstatsIop) + exec_output += PredOpExecute.subst(dumpstatsIop) + dumpresetstats_code = ''' PseudoInst::dumpresetstats(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2)); ''' + + dumpresetstats_code64 = ''' + PseudoInst::dumpresetstats(xc->tcBase(), X0, X1); + ''' + dumpresetstatsIop = InstObjParams("dumpresetstats", "Dumpresetstats", "PredOp", { "code": dumpresetstats_code, "predicate_test": predicateTest }, @@ -250,9 +364,22 @@ let {{ decoder_output += BasicConstructor.subst(dumpresetstatsIop) exec_output += PredOpExecute.subst(dumpresetstatsIop) + dumpresetstatsIop = InstObjParams("dumpresetstats", "Dumpresetstats64", "PredOp", + { "code": dumpresetstats_code64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(dumpresetstatsIop) + decoder_output += BasicConstructor.subst(dumpresetstatsIop) + exec_output += PredOpExecute.subst(dumpresetstatsIop) + m5checkpoint_code = ''' PseudoInst::m5checkpoint(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2)); ''' + + m5checkpoint_code64 = ''' + PseudoInst::m5checkpoint(xc->tcBase(), X0, X1); + ''' + m5checkpointIop = InstObjParams("m5checkpoint", "M5checkpoint", "PredOp", { "code": m5checkpoint_code, "predicate_test": predicateTest }, @@ -261,11 +388,27 @@ let {{ decoder_output += BasicConstructor.subst(m5checkpointIop) exec_output += PredOpExecute.subst(m5checkpointIop) + m5checkpointIop = InstObjParams("m5checkpoint", "M5checkpoint64", "PredOp", + { "code": m5checkpoint_code64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(m5checkpointIop) + decoder_output += BasicConstructor.subst(m5checkpointIop) + exec_output += PredOpExecute.subst(m5checkpointIop) + m5readfileCode = ''' int n = 4; uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false); R0 = PseudoInst::readfile(xc->tcBase(), R0, join32to64(R3,R2), offset); ''' + + m5readfileCode64 = ''' + int n = 4; + uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false); + n = 6; + X0 = PseudoInst::readfile(xc->tcBase(), (uint32_t)X0, X1, offset); + ''' + m5readfileIop = InstObjParams("m5readfile", "M5readfile", "PredOp", { "code": m5readfileCode, "predicate_test": predicateTest }, @@ -274,6 +417,14 @@ let {{ decoder_output += BasicConstructor.subst(m5readfileIop) exec_output += PredOpExecute.subst(m5readfileIop) + m5readfileIop = InstObjParams("m5readfile", "M5readfile64", "PredOp", + { "code": m5readfileCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(m5readfileIop) + decoder_output += BasicConstructor.subst(m5readfileIop) + exec_output += PredOpExecute.subst(m5readfileIop) + m5writefileCode = ''' int n = 4; uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false); @@ -282,6 +433,16 @@ let {{ R0 = PseudoInst::writefile(xc->tcBase(), R0, join32to64(R3,R2), offset, filenameAddr); ''' + + m5writefileCode64 = ''' + int n = 4; + uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false); + n = 6; + Addr filenameAddr = getArgument(xc->tcBase(), n, sizeof(Addr), false); + X0 = PseudoInst::writefile(xc->tcBase(), (uint32_t)X0, X1, offset, + filenameAddr); + ''' + m5writefileIop = InstObjParams("m5writefile", "M5writefile", "PredOp", { "code": m5writefileCode, "predicate_test": predicateTest }, @@ -290,6 +451,14 @@ let {{ decoder_output += BasicConstructor.subst(m5writefileIop) exec_output += PredOpExecute.subst(m5writefileIop) + m5writefileIop = InstObjParams("m5writefile", "M5writefile64", "PredOp", + { "code": m5writefileCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5writefileIop) + decoder_output += BasicConstructor.subst(m5writefileIop) + exec_output += PredOpExecute.subst(m5writefileIop) + m5breakIop = InstObjParams("m5break", "M5break", "PredOp", { "code": "PseudoInst::debugbreak(xc->tcBase());", "predicate_test": predicateTest }, @@ -309,6 +478,9 @@ let {{ m5addsymbolCode = ''' PseudoInst::addsymbol(xc->tcBase(), join32to64(R1, R0), R2); ''' + m5addsymbolCode64 = ''' + PseudoInst::addsymbol(xc->tcBase(), X0, (uint32_t)X1); + ''' m5addsymbolIop = InstObjParams("m5addsymbol", "M5addsymbol", "PredOp", { "code": m5addsymbolCode, "predicate_test": predicateTest }, @@ -317,8 +489,17 @@ let {{ decoder_output += BasicConstructor.subst(m5addsymbolIop) exec_output += PredOpExecute.subst(m5addsymbolIop) + m5addsymbolIop = InstObjParams("m5addsymbol", "M5addsymbol64", "PredOp", + { "code": m5addsymbolCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5addsymbolIop) + decoder_output += BasicConstructor.subst(m5addsymbolIop) + exec_output += PredOpExecute.subst(m5addsymbolIop) + m5panicCode = '''panic("M5 panic instruction called at pc=%#x.", xc->pcState().pc());''' + m5panicIop = InstObjParams("m5panic", "M5panic", "PredOp", { "code": m5panicCode, "predicate_test": predicateTest }, @@ -332,6 +513,13 @@ let {{ join32to64(R1, R0), join32to64(R3, R2) );''' + + m5workbeginCode64 = '''PseudoInst::workbegin( + xc->tcBase(), + X0, + X1 + );''' + m5workbeginIop = InstObjParams("m5workbegin", "M5workbegin", "PredOp", { "code": m5workbeginCode, "predicate_test": predicateTest }, @@ -340,11 +528,26 @@ let {{ decoder_output += BasicConstructor.subst(m5workbeginIop) exec_output += PredOpExecute.subst(m5workbeginIop) + m5workbeginIop = InstObjParams("m5workbegin", "M5workbegin64", "PredOp", + { "code": m5workbeginCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5workbeginIop) + decoder_output += BasicConstructor.subst(m5workbeginIop) + exec_output += PredOpExecute.subst(m5workbeginIop) + m5workendCode = '''PseudoInst::workend( xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2) );''' + + m5workendCode64 = '''PseudoInst::workend( + xc->tcBase(), + X0, + X1 + );''' + m5workendIop = InstObjParams("m5workend", "M5workend", "PredOp", { "code": m5workendCode, "predicate_test": predicateTest }, @@ -353,4 +556,11 @@ let {{ decoder_output += BasicConstructor.subst(m5workendIop) exec_output += PredOpExecute.subst(m5workendIop) + m5workendIop = InstObjParams("m5workend", "M5workend64", "PredOp", + { "code": m5workendCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5workendIop) + decoder_output += BasicConstructor.subst(m5workendIop) + exec_output += PredOpExecute.subst(m5workendIop) }}; diff --git a/src/arch/arm/isa/insts/macromem.isa b/src/arch/arm/isa/insts/macromem.isa index db36a3fff..f164595dd 100644 --- a/src/arch/arm/isa/insts/macromem.isa +++ b/src/arch/arm/isa/insts/macromem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -91,7 +91,8 @@ let {{ SCTLR sctlr = Sctlr; CPSR new_cpsr = - cpsrWriteByInstr(old_cpsr, Spsr, 0xF, true, sctlr.nmfi); + cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF, true, + sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; CondCodesNZ = new_cpsr.nz; CondCodesC = new_cpsr.c; @@ -158,8 +159,8 @@ let {{ header_output = decoder_output = exec_output = '' - loadIops = (microLdrUopIop, microLdrRetUopIop, - microLdrFpUopIop, microLdrDBFpUopIop, microLdrDTFpUopIop) + loadIops = (microLdrUopIop, microLdrRetUopIop, microLdrFpUopIop, + microLdrDBFpUopIop, microLdrDTFpUopIop) storeIops = (microStrUopIop, microStrFpUopIop, microStrDBFpUopIop, microStrDTFpUopIop) for iop in loadIops + storeIops: @@ -178,7 +179,7 @@ let {{ let {{ exec_output = header_output = '' - eaCode = 'EA = URa + imm;' + eaCode = 'EA = XURa + imm;' for size in (1, 2, 3, 4, 6, 8, 12, 16): # Set up the memory access. @@ -592,6 +593,26 @@ let {{ URa = URb + shift_rm_imm(URc, shiftAmt, shiftType, OptShiftRmCondCodesC); ''' + microAddXiUopIop = InstObjParams('addxi_uop', 'MicroAddXiUop', + 'MicroIntImmXOp', + 'XURa = XURb + imm;', + ['IsMicroop']) + + microAddXiSpAlignUopIop = InstObjParams('addxi_uop', 'MicroAddXiSpAlignUop', + 'MicroIntImmXOp', ''' + if (isSP((IntRegIndex) urb) && bits(XURb, 3, 0) && + SPAlignmentCheckEnabled(xc->tcBase())) { + return new SPAlignmentFault(); + } + XURa = XURb + imm; + ''', ['IsMicroop']) + + microAddXERegUopIop = InstObjParams('addxr_uop', 'MicroAddXERegUop', + 'MicroIntRegXOp', + 'XURa = XURb + ' + \ + 'extendReg64(XURc, type, shiftAmt, 64);', + ['IsMicroop']) + microAddUopIop = InstObjParams('add_uop', 'MicroAddUop', 'MicroIntRegOp', {'code': microAddUopCode, @@ -604,6 +625,11 @@ let {{ 'predicate_test': predicateTest}, ['IsMicroop']) + microSubXiUopIop = InstObjParams('subxi_uop', 'MicroSubXiUop', + 'MicroIntImmXOp', + 'XURa = XURb - imm;', + ['IsMicroop']) + microSubUopCode = ''' URa = URb - shift_rm_imm(URc, shiftAmt, shiftType, OptShiftRmCondCodesC); ''' @@ -631,8 +657,8 @@ let {{ SCTLR sctlr = Sctlr; pNPC = URa; CPSR new_cpsr = - cpsrWriteByInstr(cpsrOrCondCodes, URb, - 0xF, true, sctlr.nmfi); + cpsrWriteByInstr(cpsrOrCondCodes, URb, Scr, Nsacr, + 0xF, true, sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; NextThumb = new_cpsr.t; NextJazelle = new_cpsr.j; @@ -651,25 +677,37 @@ let {{ ['IsMicroop']) header_output = MicroIntImmDeclare.subst(microAddiUopIop) + \ + MicroIntImmDeclare.subst(microAddXiUopIop) + \ + MicroIntImmDeclare.subst(microAddXiSpAlignUopIop) + \ MicroIntImmDeclare.subst(microSubiUopIop) + \ + MicroIntImmDeclare.subst(microSubXiUopIop) + \ MicroIntRegDeclare.subst(microAddUopIop) + \ MicroIntRegDeclare.subst(microSubUopIop) + \ + MicroIntXERegDeclare.subst(microAddXERegUopIop) + \ MicroIntMovDeclare.subst(microUopRegMovIop) + \ MicroIntMovDeclare.subst(microUopRegMovRetIop) + \ MicroSetPCCPSRDeclare.subst(microUopSetPCCPSRIop) decoder_output = MicroIntImmConstructor.subst(microAddiUopIop) + \ + MicroIntImmXConstructor.subst(microAddXiUopIop) + \ + MicroIntImmXConstructor.subst(microAddXiSpAlignUopIop) + \ MicroIntImmConstructor.subst(microSubiUopIop) + \ + MicroIntImmXConstructor.subst(microSubXiUopIop) + \ MicroIntRegConstructor.subst(microAddUopIop) + \ MicroIntRegConstructor.subst(microSubUopIop) + \ + MicroIntXERegConstructor.subst(microAddXERegUopIop) + \ MicroIntMovConstructor.subst(microUopRegMovIop) + \ MicroIntMovConstructor.subst(microUopRegMovRetIop) + \ MicroSetPCCPSRConstructor.subst(microUopSetPCCPSRIop) exec_output = PredOpExecute.subst(microAddiUopIop) + \ + BasicExecute.subst(microAddXiUopIop) + \ + BasicExecute.subst(microAddXiSpAlignUopIop) + \ PredOpExecute.subst(microSubiUopIop) + \ + BasicExecute.subst(microSubXiUopIop) + \ PredOpExecute.subst(microAddUopIop) + \ PredOpExecute.subst(microSubUopIop) + \ + BasicExecute.subst(microAddXERegUopIop) + \ PredOpExecute.subst(microUopRegMovIop) + \ PredOpExecute.subst(microUopRegMovRetIop) + \ PredOpExecute.subst(microUopSetPCCPSRIop) @@ -681,6 +719,25 @@ let {{ header_output = MacroMemDeclare.subst(iop) decoder_output = MacroMemConstructor.subst(iop) + iop = InstObjParams("ldpstp", "LdpStp", 'PairMemOp', "", []) + header_output += PairMemDeclare.subst(iop) + decoder_output += PairMemConstructor.subst(iop) + + iopImm = InstObjParams("bigfpmemimm", "BigFpMemImm", "BigFpMemImmOp", "") + iopPre = InstObjParams("bigfpmempre", "BigFpMemPre", "BigFpMemPreOp", "") + iopPost = InstObjParams("bigfpmempost", "BigFpMemPost", "BigFpMemPostOp", "") + for iop in (iopImm, iopPre, iopPost): + header_output += BigFpMemImmDeclare.subst(iop) + decoder_output += BigFpMemImmConstructor.subst(iop) + + iop = InstObjParams("bigfpmemreg", "BigFpMemReg", "BigFpMemRegOp", "") + header_output += BigFpMemRegDeclare.subst(iop) + decoder_output += BigFpMemRegConstructor.subst(iop) + + iop = InstObjParams("bigfpmemlit", "BigFpMemLit", "BigFpMemLitOp", "") + header_output += BigFpMemLitDeclare.subst(iop) + decoder_output += BigFpMemLitConstructor.subst(iop) + iop = InstObjParams("vldmult", "VldMult", 'VldMultOp', "", []) header_output += VMemMultDeclare.subst(iop) decoder_output += VMemMultConstructor.subst(iop) diff --git a/src/arch/arm/isa/insts/mem.isa b/src/arch/arm/isa/insts/mem.isa index c39f1b14f..aed6bab0d 100644 --- a/src/arch/arm/isa/insts/mem.isa +++ b/src/arch/arm/isa/insts/mem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -48,8 +48,8 @@ let {{ self.constructTemplate = eval(self.decConstBase + 'Constructor') def fillTemplates(self, name, Name, codeBlobs, memFlags, instFlags, - base = 'Memory', wbDecl = None, pcDecl = None, - rasPop = False): + base='Memory', wbDecl=None, pcDecl=None, + rasPop=False, size=4, sign=False, faCode=None): # Make sure flags are in lists (convert to lists if not). memFlags = makeList(memFlags) instFlags = makeList(instFlags) @@ -63,6 +63,22 @@ let {{ codeBlobs["ea_code"] = eaCode + if faCode: + # For AArch64 the fa_code snippet comes already assembled here + codeBlobs["fa_code"] = faCode + elif wbDecl == None: + codeBlobs["fa_code"] = ''' + if (dest != INTREG_PC) { + fault->annotate(ArmFault::SAS, %s); + fault->annotate(ArmFault::SSE, %s); + fault->annotate(ArmFault::SRT, dest); + } + ''' %("0" if size == 1 else + "1" if size == 2 else "2", + "true" if sign else "false") + else: + codeBlobs["fa_code"] = '' + macroName = Name instFlagsCopy = list(instFlags) codeBlobsCopy = dict(codeBlobs) @@ -108,6 +124,7 @@ let {{ "use_uops" : use_uops, "use_pc" : use_pc, "use_wb" : use_wb, + "fa_code" : '', "is_ras_pop" : is_ras_pop }, ['IsMacroop']) header_output += self.declareTemplate.subst(iop) @@ -176,8 +193,13 @@ let {{ return Name def buildMemSuffix(sign, size): - if size == 4: - memSuffix = '' + if size == 8: + memSuffix = '_ud' + elif size == 4: + if sign: + memSuffix = '_sw' + else: + memSuffix = '_uw' elif size == 2: if sign: memSuffix = '_sh' diff --git a/src/arch/arm/isa/insts/misc.isa b/src/arch/arm/isa/insts/misc.isa index b8425a240..678a125fb 100644 --- a/src/arch/arm/isa/insts/misc.isa +++ b/src/arch/arm/isa/insts/misc.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2012 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -40,21 +40,102 @@ let {{ svcCode = ''' - if (FullSystem) { - fault = new SupervisorCall; - } else { - fault = new SupervisorCall(machInst); - } + fault = new SupervisorCall(machInst, imm); ''' - svcIop = InstObjParams("svc", "Svc", "PredOp", + svcIop = InstObjParams("svc", "Svc", "ImmOp", { "code": svcCode, "predicate_test": predicateTest }, ["IsSyscall", "IsNonSpeculative", "IsSerializeAfter"]) - header_output = BasicDeclare.subst(svcIop) - decoder_output = BasicConstructor.subst(svcIop) + header_output = ImmOpDeclare.subst(svcIop) + decoder_output = ImmOpConstructor.subst(svcIop) exec_output = PredOpExecute.subst(svcIop) + smcCode = ''' + HCR hcr = Hcr; + CPSR cpsr = Cpsr; + SCR scr = Scr; + + if ((cpsr.mode != MODE_USER) && FullSystem) { + if (ArmSystem::haveVirtualization(xc->tcBase()) && + !inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP) && hcr.tsc) { + fault = new HypervisorTrap(machInst, 0, EC_SMC_TO_HYP); + } else { + if (scr.scd) { + fault = disabledFault(); + } else { + fault = new SecureMonitorCall(machInst); + } + } + } else { + fault = disabledFault(); + } + ''' + + smcIop = InstObjParams("smc", "Smc", "PredOp", + { "code": smcCode, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsSerializeAfter"]) + header_output += BasicDeclare.subst(smcIop) + decoder_output += BasicConstructor.subst(smcIop) + exec_output += PredOpExecute.subst(smcIop) + + hvcCode = ''' + CPSR cpsr = Cpsr; + SCR scr = Scr; + + // Filter out the various cases where this instruction isn't defined + if (!FullSystem || !ArmSystem::haveVirtualization(xc->tcBase()) || + (cpsr.mode == MODE_USER) || + (ArmSystem::haveSecurity(xc->tcBase()) && (!scr.ns || !scr.hce))) { + fault = disabledFault(); + } else { + fault = new HypervisorCall(machInst, imm); + } + ''' + + hvcIop = InstObjParams("hvc", "Hvc", "ImmOp", + { "code": hvcCode, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsSerializeAfter"]) + header_output += ImmOpDeclare.subst(hvcIop) + decoder_output += ImmOpConstructor.subst(hvcIop) + exec_output += PredOpExecute.subst(hvcIop) + + eretCode = ''' + SCTLR sctlr = Sctlr; + CPSR old_cpsr = Cpsr; + old_cpsr.nz = CondCodesNZ; + old_cpsr.c = CondCodesC; + old_cpsr.v = CondCodesV; + old_cpsr.ge = CondCodesGE; + + CPSR new_cpsr = cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF, + true, sctlr.nmfi, xc->tcBase()); + Cpsr = ~CondCodesMask & new_cpsr; + CondCodesNZ = new_cpsr.nz; + CondCodesC = new_cpsr.c; + CondCodesV = new_cpsr.v; + CondCodesGE = new_cpsr.ge; + + NextThumb = (new_cpsr).t; + NextJazelle = (new_cpsr).j; + NextItState = (((new_cpsr).it2 << 2) & 0xFC) + | ((new_cpsr).it1 & 0x3); + + NPC = (old_cpsr.mode == MODE_HYP) ? ElrHyp : LR; + ''' + + eretIop = InstObjParams("eret", "Eret", "PredOp", + { "code": eretCode, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsSerializeAfter"]) + header_output += BasicDeclare.subst(eretIop) + decoder_output += BasicConstructor.subst(eretIop) + exec_output += PredOpExecute.subst(eretIop) + + + }}; let {{ @@ -87,6 +168,59 @@ let {{ decoder_output += MrsConstructor.subst(mrsSpsrIop) exec_output += PredOpExecute.subst(mrsSpsrIop) + mrsBankedRegCode = ''' + bool isIntReg; + int regIdx; + + if (decodeMrsMsrBankedReg(byteMask, r, isIntReg, regIdx, Cpsr, Scr, Nsacr)) { + if (isIntReg) { + Dest = DecodedBankedIntReg; + } else { + Dest = xc->readMiscReg(regIdx); + } + } else { + return new UndefinedInstruction(machInst, false, mnemonic); + } + ''' + mrsBankedRegIop = InstObjParams("mrs", "MrsBankedReg", "MrsOp", + { "code": mrsBankedRegCode, + "predicate_test": predicateTest }, + ["IsSerializeBefore"]) + header_output += MrsBankedRegDeclare.subst(mrsBankedRegIop) + decoder_output += MrsBankedRegConstructor.subst(mrsBankedRegIop) + exec_output += PredOpExecute.subst(mrsBankedRegIop) + + msrBankedRegCode = ''' + bool isIntReg; + int regIdx; + + if (decodeMrsMsrBankedReg(byteMask, r, isIntReg, regIdx, Cpsr, Scr, Nsacr)) { + if (isIntReg) { + // This is a bit nasty, you would have thought that + // DecodedBankedIntReg wouldn't be written to unless the + // conditions on the IF statements above are met, however if + // you look at the generated C code you'll find that they are. + // However this is safe as DecodedBankedIntReg (which is used + // in operands.isa to get the index of DecodedBankedIntReg) + // will return INTREG_DUMMY if its not a valid integer + // register, so redirecting the write to somewhere we don't + // care about. + DecodedBankedIntReg = Op1; + } else { + xc->setMiscReg(regIdx, Op1); + } + } else { + return new UndefinedInstruction(machInst, false, mnemonic); + } + ''' + msrBankedRegIop = InstObjParams("msr", "MsrBankedReg", "MsrRegOp", + { "code": msrBankedRegCode, + "predicate_test": predicateTest }, + ["IsSerializeAfter"]) + header_output += MsrBankedRegDeclare.subst(msrBankedRegIop) + decoder_output += MsrBankedRegConstructor.subst(msrBankedRegIop) + exec_output += PredOpExecute.subst(msrBankedRegIop) + msrCpsrRegCode = ''' SCTLR sctlr = Sctlr; CPSR old_cpsr = Cpsr; @@ -96,7 +230,8 @@ let {{ old_cpsr.ge = CondCodesGE; CPSR new_cpsr = - cpsrWriteByInstr(old_cpsr, Op1, byteMask, false, sctlr.nmfi); + cpsrWriteByInstr(old_cpsr, Op1, Scr, Nsacr, byteMask, false, + sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; CondCodesNZ = new_cpsr.nz; CondCodesC = new_cpsr.c; @@ -128,7 +263,8 @@ let {{ old_cpsr.v = CondCodesV; old_cpsr.ge = CondCodesGE; CPSR new_cpsr = - cpsrWriteByInstr(old_cpsr, imm, byteMask, false, sctlr.nmfi); + cpsrWriteByInstr(old_cpsr, imm, Scr, Nsacr, byteMask, false, + sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; CondCodesNZ = new_cpsr.nz; CondCodesC = new_cpsr.c; @@ -488,12 +624,10 @@ let {{ decoder_output += BasicConstructor.subst(bkptIop) exec_output += BasicExecute.subst(bkptIop) - nopIop = InstObjParams("nop", "NopInst", "PredOp", \ - { "code" : "", "predicate_test" : predicateTest }, - ['IsNop']) + nopIop = InstObjParams("nop", "NopInst", "ArmStaticInst", "", ['IsNop']) header_output += BasicDeclare.subst(nopIop) - decoder_output += BasicConstructor.subst(nopIop) - exec_output += PredOpExecute.subst(nopIop) + decoder_output += BasicConstructor64.subst(nopIop) + exec_output += BasicExecute.subst(nopIop) yieldIop = InstObjParams("yield", "YieldInst", "PredOp", \ { "code" : "", "predicate_test" : predicateTest }) @@ -502,14 +636,31 @@ let {{ exec_output += PredOpExecute.subst(yieldIop) wfeCode = ''' - // WFE Sleeps if SevMailbox==0 and no unmasked interrupts are pending + HCR hcr = Hcr; + CPSR cpsr = Cpsr; + SCR scr = Scr64; + SCTLR sctlr = Sctlr; + + // WFE Sleeps if SevMailbox==0 and no unmasked interrupts are pending, + ThreadContext *tc = xc->tcBase(); if (SevMailbox == 1) { SevMailbox = 0; - PseudoInst::quiesceSkip(xc->tcBase()); - } else if (xc->tcBase()->getCpuPtr()->getInterruptController()->checkInterrupts(xc->tcBase())) { - PseudoInst::quiesceSkip(xc->tcBase()); + PseudoInst::quiesceSkip(tc); + } else if (tc->getCpuPtr()->getInterruptController()->checkInterrupts(tc)) { + PseudoInst::quiesceSkip(tc); + } else if (cpsr.el == EL0 && !sctlr.ntwe) { + PseudoInst::quiesceSkip(tc); + fault = new SupervisorTrap(machInst, 0x1E00001, EC_TRAPPED_WFI_WFE); + } else if (ArmSystem::haveVirtualization(tc) && + !inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP) && + hcr.twe) { + PseudoInst::quiesceSkip(tc); + fault = new HypervisorTrap(machInst, 0x1E00001, EC_TRAPPED_WFI_WFE); + } else if (ArmSystem::haveSecurity(tc) && cpsr.el != EL3 && scr.twe) { + PseudoInst::quiesceSkip(tc); + fault = new SecureMonitorTrap(machInst, 0x1E00001, EC_TRAPPED_WFI_WFE); } else { - PseudoInst::quiesce(xc->tcBase()); + PseudoInst::quiesce(tc); } ''' wfePredFixUpCode = ''' @@ -528,12 +679,30 @@ let {{ exec_output += QuiescePredOpExecuteWithFixup.subst(wfeIop) wfiCode = ''' + HCR hcr = Hcr; + CPSR cpsr = Cpsr; + SCR scr = Scr64; + SCTLR sctlr = Sctlr; + // WFI doesn't sleep if interrupts are pending (masked or not) - if (xc->tcBase()->getCpuPtr()->getInterruptController()->checkRaw()) { - PseudoInst::quiesceSkip(xc->tcBase()); + ThreadContext *tc = xc->tcBase(); + if (tc->getCpuPtr()->getInterruptController()->checkWfiWake(hcr, cpsr, + scr)) { + PseudoInst::quiesceSkip(tc); + } else if (cpsr.el == EL0 && !sctlr.ntwi) { + PseudoInst::quiesceSkip(tc); + fault = new SupervisorTrap(machInst, 0x1E00000, EC_TRAPPED_WFI_WFE); + } else if (ArmSystem::haveVirtualization(tc) && hcr.twi && + (cpsr.mode != MODE_HYP) && !inSecureState(scr, cpsr)) { + PseudoInst::quiesceSkip(tc); + fault = new HypervisorTrap(machInst, 0x1E00000, EC_TRAPPED_WFI_WFE); + } else if (ArmSystem::haveSecurity(tc) && cpsr.el != EL3 && scr.twi) { + PseudoInst::quiesceSkip(tc); + fault = new SecureMonitorTrap(machInst, 0x1E00000, EC_TRAPPED_WFI_WFE); } else { - PseudoInst::quiesce(xc->tcBase()); + PseudoInst::quiesce(tc); } + tc->getCpuPtr()->clearInterrupt(INT_ABT, 0); ''' wfiIop = InstObjParams("wfi", "WfiInst", "PredOp", \ { "code" : wfiCode, "predicate_test" : predicateTest }, @@ -564,6 +733,16 @@ let {{ decoder_output += BasicConstructor.subst(sevIop) exec_output += PredOpExecute.subst(sevIop) + sevlCode = ''' + SevMailbox = 1; + ''' + sevlIop = InstObjParams("sevl", "SevlInst", "PredOp", \ + { "code" : sevlCode, "predicate_test" : predicateTest }, + ["IsNonSpeculative", "IsSquashAfter", "IsUnverifiable"]) + header_output += BasicDeclare.subst(sevlIop) + decoder_output += BasicConstructor.subst(sevlIop) + exec_output += BasicExecute.subst(sevlIop) + itIop = InstObjParams("it", "ItInst", "PredOp", \ { "code" : ";", "predicate_test" : predicateTest }, []) @@ -571,10 +750,7 @@ let {{ decoder_output += BasicConstructor.subst(itIop) exec_output += PredOpExecute.subst(itIop) unknownCode = ''' - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(machInst, true); + return new UndefinedInstruction(machInst, true); ''' unknownIop = InstObjParams("unknown", "Unknown", "UnknownOp", \ { "code": unknownCode, @@ -626,108 +802,152 @@ let {{ exec_output += PredOpExecute.subst(bfiIop) mrc14code = ''' - CPSR cpsr = Cpsr; - if (cpsr.mode == MODE_USER) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + MiscRegIndex miscReg = (MiscRegIndex) xc->tcBase()->flattenMiscIndex(op1); + if (!canReadCoprocReg(miscReg, Scr, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + if (mcrMrc14TrapToHyp((const MiscRegIndex) op1, Hcr, Cpsr, Scr, Hdcr, + Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP14_MCR_MRC); } Dest = MiscOp1; ''' - mrc14Iop = InstObjParams("mrc", "Mrc14", "RegRegOp", + mrc14Iop = InstObjParams("mrc", "Mrc14", "RegRegImmOp", { "code": mrc14code, "predicate_test": predicateTest }, []) - header_output += RegRegOpDeclare.subst(mrc14Iop) - decoder_output += RegRegOpConstructor.subst(mrc14Iop) + header_output += RegRegImmOpDeclare.subst(mrc14Iop) + decoder_output += RegRegImmOpConstructor.subst(mrc14Iop) exec_output += PredOpExecute.subst(mrc14Iop) mcr14code = ''' - CPSR cpsr = Cpsr; - if (cpsr.mode == MODE_USER) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + MiscRegIndex miscReg = (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest); + if (!canWriteCoprocReg(miscReg, Scr, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + if (mcrMrc14TrapToHyp(miscReg, Hcr, Cpsr, Scr, Hdcr, + Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP14_MCR_MRC); } MiscDest = Op1; ''' - mcr14Iop = InstObjParams("mcr", "Mcr14", "RegRegOp", + mcr14Iop = InstObjParams("mcr", "Mcr14", "RegRegImmOp", { "code": mcr14code, "predicate_test": predicateTest }, ["IsSerializeAfter","IsNonSpeculative"]) - header_output += RegRegOpDeclare.subst(mcr14Iop) - decoder_output += RegRegOpConstructor.subst(mcr14Iop) + header_output += RegRegImmOpDeclare.subst(mcr14Iop) + decoder_output += RegRegImmOpConstructor.subst(mcr14Iop) exec_output += PredOpExecute.subst(mcr14Iop) - mrc14UserIop = InstObjParams("mrc", "Mrc14User", "RegRegOp", - { "code": "Dest = MiscOp1;", - "predicate_test": predicateTest }, []) - header_output += RegRegOpDeclare.subst(mrc14UserIop) - decoder_output += RegRegOpConstructor.subst(mrc14UserIop) - exec_output += PredOpExecute.subst(mrc14UserIop) - - mcr14UserIop = InstObjParams("mcr", "Mcr14User", "RegRegOp", - { "code": "MiscDest = Op1", - "predicate_test": predicateTest }, - ["IsSerializeAfter","IsNonSpeculative"]) - header_output += RegRegOpDeclare.subst(mcr14UserIop) - decoder_output += RegRegOpConstructor.subst(mcr14UserIop) - exec_output += PredOpExecute.subst(mcr14UserIop) - mrc15code = ''' - CPSR cpsr = Cpsr; - if (cpsr.mode == MODE_USER) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + int preFlatOp1 = flattenMiscRegNsBanked(op1, xc->tcBase()); + MiscRegIndex miscReg = (MiscRegIndex) + xc->tcBase()->flattenMiscIndex(preFlatOp1); + bool hypTrap = mcrMrc15TrapToHyp(miscReg, Hcr, Cpsr, Scr, Hdcr, Hstr, + Hcptr, imm); + bool canRead = canReadCoprocReg(miscReg, Scr, Cpsr, xc->tcBase()); + + // if we're in non secure PL1 mode then we can trap regargless of whether + // the register is accessable, in other modes we trap if only if the register + // IS accessable. + if (!canRead & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) { + return new UndefinedInstruction(machInst, false, mnemonic); } - Dest = MiscOp1; + if (hypTrap) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCR_MRC); + } + Dest = MiscNsBankedOp1; ''' - mrc15Iop = InstObjParams("mrc", "Mrc15", "RegRegOp", + mrc15Iop = InstObjParams("mrc", "Mrc15", "RegRegImmOp", { "code": mrc15code, "predicate_test": predicateTest }, []) - header_output += RegRegOpDeclare.subst(mrc15Iop) - decoder_output += RegRegOpConstructor.subst(mrc15Iop) + header_output += RegRegImmOpDeclare.subst(mrc15Iop) + decoder_output += RegRegImmOpConstructor.subst(mrc15Iop) exec_output += PredOpExecute.subst(mrc15Iop) mcr15code = ''' - CPSR cpsr = Cpsr; - if (cpsr.mode == MODE_USER) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + int preFlatDest = flattenMiscRegNsBanked(dest, xc->tcBase()); + MiscRegIndex miscReg = (MiscRegIndex) + xc->tcBase()->flattenMiscIndex(preFlatDest); + bool hypTrap = mcrMrc15TrapToHyp(miscReg, Hcr, Cpsr, Scr, Hdcr, Hstr, + Hcptr, imm); + bool canWrite = canWriteCoprocReg(miscReg, Scr, Cpsr, xc->tcBase()); + + // if we're in non secure PL1 mode then we can trap regargless of whether + // the register is accessable, in other modes we trap if only if the register + // IS accessable. + if (!canWrite & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) { + return new UndefinedInstruction(machInst, false, mnemonic); } - MiscDest = Op1; + if (hypTrap) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCR_MRC); + } + MiscNsBankedDest = Op1; ''' - mcr15Iop = InstObjParams("mcr", "Mcr15", "RegRegOp", + mcr15Iop = InstObjParams("mcr", "Mcr15", "RegRegImmOp", { "code": mcr15code, "predicate_test": predicateTest }, ["IsSerializeAfter","IsNonSpeculative"]) - header_output += RegRegOpDeclare.subst(mcr15Iop) - decoder_output += RegRegOpConstructor.subst(mcr15Iop) + header_output += RegRegImmOpDeclare.subst(mcr15Iop) + decoder_output += RegRegImmOpConstructor.subst(mcr15Iop) exec_output += PredOpExecute.subst(mcr15Iop) - mrc15UserIop = InstObjParams("mrc", "Mrc15User", "RegRegOp", - { "code": "Dest = MiscOp1;", - "predicate_test": predicateTest }, []) - header_output += RegRegOpDeclare.subst(mrc15UserIop) - decoder_output += RegRegOpConstructor.subst(mrc15UserIop) - exec_output += PredOpExecute.subst(mrc15UserIop) - - mcr15UserIop = InstObjParams("mcr", "Mcr15User", "RegRegOp", - { "code": "MiscDest = Op1", - "predicate_test": predicateTest }, - ["IsSerializeAfter","IsNonSpeculative"]) - header_output += RegRegOpDeclare.subst(mcr15UserIop) - decoder_output += RegRegOpConstructor.subst(mcr15UserIop) - exec_output += PredOpExecute.subst(mcr15UserIop) + + mrrc15code = ''' + int preFlatOp1 = flattenMiscRegNsBanked(op1, xc->tcBase()); + MiscRegIndex miscReg = (MiscRegIndex) + xc->tcBase()->flattenMiscIndex(preFlatOp1); + bool hypTrap = mcrrMrrc15TrapToHyp(miscReg, Cpsr, Scr, Hstr, Hcr, imm); + bool canRead = canReadCoprocReg(miscReg, Scr, Cpsr, xc->tcBase()); + + // if we're in non secure PL1 mode then we can trap regargless of whether + // the register is accessable, in other modes we trap if only if the register + // IS accessable. + if (!canRead & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + if (hypTrap) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCRR_MRRC); + } + Dest = bits(MiscNsBankedOp164, 63, 32); + Dest2 = bits(MiscNsBankedOp164, 31, 0); + ''' + mrrc15Iop = InstObjParams("mrrc", "Mrrc15", "MrrcOp", + { "code": mrrc15code, + "predicate_test": predicateTest }, []) + header_output += MrrcOpDeclare.subst(mrrc15Iop) + decoder_output += MrrcOpConstructor.subst(mrrc15Iop) + exec_output += PredOpExecute.subst(mrrc15Iop) + + + mcrr15code = ''' + int preFlatDest = flattenMiscRegNsBanked(dest, xc->tcBase()); + MiscRegIndex miscReg = (MiscRegIndex) + xc->tcBase()->flattenMiscIndex(preFlatDest); + bool hypTrap = mcrrMrrc15TrapToHyp(miscReg, Cpsr, Scr, Hstr, Hcr, imm); + bool canWrite = canWriteCoprocReg(miscReg, Scr, Cpsr, xc->tcBase()); + + // if we're in non secure PL1 mode then we can trap regargless of whether + // the register is accessable, in other modes we trap if only if the register + // IS accessable. + if (!canWrite & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + if (hypTrap) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCRR_MRRC); + } + MiscNsBankedDest64 = ((uint64_t) Op1 << 32) | Op2; + ''' + mcrr15Iop = InstObjParams("mcrr", "Mcrr15", "McrrOp", + { "code": mcrr15code, + "predicate_test": predicateTest }, []) + header_output += McrrOpDeclare.subst(mcrr15Iop) + decoder_output += McrrOpConstructor.subst(mcrr15Iop) + exec_output += PredOpExecute.subst(mcrr15Iop) + enterxCode = ''' NextThumb = true; @@ -775,35 +995,53 @@ let {{ exec_output += PredOpExecute.subst(clrexIop) isbCode = ''' + // If the barrier is due to a CP15 access check for hyp traps + if ((imm != 0) && mcrMrc15TrapToHyp(MISCREG_CP15ISB, Hcr, Cpsr, Scr, + Hdcr, Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, + EC_TRAPPED_CP15_MCR_MRC); + } fault = new FlushPipe; ''' - isbIop = InstObjParams("isb", "Isb", "PredOp", + isbIop = InstObjParams("isb", "Isb", "ImmOp", {"code": isbCode, "predicate_test": predicateTest}, ['IsSerializeAfter']) - header_output += BasicDeclare.subst(isbIop) - decoder_output += BasicConstructor.subst(isbIop) + header_output += ImmOpDeclare.subst(isbIop) + decoder_output += ImmOpConstructor.subst(isbIop) exec_output += PredOpExecute.subst(isbIop) dsbCode = ''' + // If the barrier is due to a CP15 access check for hyp traps + if ((imm != 0) && mcrMrc15TrapToHyp(MISCREG_CP15DSB, Hcr, Cpsr, Scr, + Hdcr, Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, + EC_TRAPPED_CP15_MCR_MRC); + } fault = new FlushPipe; ''' - dsbIop = InstObjParams("dsb", "Dsb", "PredOp", + dsbIop = InstObjParams("dsb", "Dsb", "ImmOp", {"code": dsbCode, "predicate_test": predicateTest}, ['IsMemBarrier', 'IsSerializeAfter']) - header_output += BasicDeclare.subst(dsbIop) - decoder_output += BasicConstructor.subst(dsbIop) + header_output += ImmOpDeclare.subst(dsbIop) + decoder_output += ImmOpConstructor.subst(dsbIop) exec_output += PredOpExecute.subst(dsbIop) dmbCode = ''' + // If the barrier is due to a CP15 access check for hyp traps + if ((imm != 0) && mcrMrc15TrapToHyp(MISCREG_CP15DMB, Hcr, Cpsr, Scr, + Hdcr, Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, + EC_TRAPPED_CP15_MCR_MRC); + } ''' - dmbIop = InstObjParams("dmb", "Dmb", "PredOp", + dmbIop = InstObjParams("dmb", "Dmb", "ImmOp", {"code": dmbCode, "predicate_test": predicateTest}, ['IsMemBarrier']) - header_output += BasicDeclare.subst(dmbIop) - decoder_output += BasicConstructor.subst(dmbIop) + header_output += ImmOpDeclare.subst(dmbIop) + decoder_output += ImmOpConstructor.subst(dmbIop) exec_output += PredOpExecute.subst(dmbIop) dbgCode = ''' diff --git a/src/arch/arm/isa/insts/misc64.isa b/src/arch/arm/isa/insts/misc64.isa new file mode 100644 index 000000000..6ebbcc2ba --- /dev/null +++ b/src/arch/arm/isa/insts/misc64.isa @@ -0,0 +1,147 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + svcCode = ''' + fault = new SupervisorCall(machInst, bits(machInst, 20, 5)); + ''' + + svcIop = InstObjParams("svc", "Svc64", "ArmStaticInst", + svcCode, ["IsSyscall", "IsNonSpeculative", + "IsSerializeAfter"]) + header_output = BasicDeclare.subst(svcIop) + decoder_output = BasicConstructor64.subst(svcIop) + exec_output = BasicExecute.subst(svcIop) + + # @todo: extend to take into account Virtualization. + smcCode = ''' + SCR scr = Scr64; + CPSR cpsr = Cpsr; + + if (!ArmSystem::haveSecurity(xc->tcBase()) || inUserMode(cpsr) || scr.smd) { + fault = disabledFault(); + } else { + fault = new SecureMonitorCall(machInst); + } + ''' + + smcIop = InstObjParams("smc", "Smc64", "ArmStaticInst", + smcCode, ["IsNonSpeculative", "IsSerializeAfter"]) + header_output += BasicDeclare.subst(smcIop) + decoder_output += BasicConstructor64.subst(smcIop) + exec_output += BasicExecute.subst(smcIop) + + def subst(templateBase, iop): + global header_output, decoder_output, exec_output + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + bfmMaskCode = ''' + uint64_t bitMask; + int diff = imm2 - imm1; + if (imm1 <= imm2) { + bitMask = mask(diff + 1); + } else { + bitMask = mask(imm2 + 1); + bitMask = (bitMask >> imm1) | (bitMask << (intWidth - imm1)); + diff += intWidth; + } + uint64_t topBits M5_VAR_USED = ~mask(diff+1); + uint64_t result = (Op164 >> imm1) | (Op164 << (intWidth - imm1)); + result &= bitMask; + ''' + + bfmCode = bfmMaskCode + 'Dest64 = result | (Dest64 & ~bitMask);' + bfmIop = InstObjParams("bfm", "Bfm64", "RegRegImmImmOp64", bfmCode); + subst("RegRegImmImmOp64", bfmIop) + + ubfmCode = bfmMaskCode + 'Dest64 = result;' + ubfmIop = InstObjParams("ubfm", "Ubfm64", "RegRegImmImmOp64", ubfmCode); + subst("RegRegImmImmOp64", ubfmIop) + + sbfmCode = bfmMaskCode + \ + 'Dest64 = result | (bits(Op164, imm2) ? topBits : 0);' + sbfmIop = InstObjParams("sbfm", "Sbfm64", "RegRegImmImmOp64", sbfmCode); + subst("RegRegImmImmOp64", sbfmIop) + + extrCode = ''' + if (imm == 0) { + Dest64 = Op264; + } else { + Dest64 = (Op164 << (intWidth - imm)) | (Op264 >> imm); + } + ''' + extrIop = InstObjParams("extr", "Extr64", "RegRegRegImmOp64", extrCode); + subst("RegRegRegImmOp64", extrIop); + + unknownCode = ''' + return new UndefinedInstruction(machInst, true); + ''' + unknown64Iop = InstObjParams("unknown", "Unknown64", "UnknownOp64", + unknownCode) + header_output += BasicDeclare.subst(unknown64Iop) + decoder_output += BasicConstructor64.subst(unknown64Iop) + exec_output += BasicExecute.subst(unknown64Iop) + + isbIop = InstObjParams("isb", "Isb64", "ArmStaticInst", + "fault = new FlushPipe;", ['IsSerializeAfter']) + header_output += BasicDeclare.subst(isbIop) + decoder_output += BasicConstructor64.subst(isbIop) + exec_output += BasicExecute.subst(isbIop) + + dsbIop = InstObjParams("dsb", "Dsb64", "ArmStaticInst", + "fault = new FlushPipe;", + ['IsMemBarrier', 'IsSerializeAfter']) + header_output += BasicDeclare.subst(dsbIop) + decoder_output += BasicConstructor64.subst(dsbIop) + exec_output += BasicExecute.subst(dsbIop) + + dmbIop = InstObjParams("dmb", "Dmb64", "ArmStaticInst", "", + ['IsMemBarrier']) + header_output += BasicDeclare.subst(dmbIop) + decoder_output += BasicConstructor64.subst(dmbIop) + exec_output += BasicExecute.subst(dmbIop) + + clrexIop = InstObjParams("clrex", "Clrex64", "ArmStaticInst", + "LLSCLock = 0;") + header_output += BasicDeclare.subst(clrexIop) + decoder_output += BasicConstructor64.subst(clrexIop) + exec_output += BasicExecute.subst(clrexIop) +}}; diff --git a/src/arch/arm/isa/insts/neon.isa b/src/arch/arm/isa/insts/neon.isa index 876bb3bb7..ca5c3038c 100644 --- a/src/arch/arm/isa/insts/neon.isa +++ b/src/arch/arm/isa/insts/neon.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -94,8 +94,8 @@ output header {{ template