From 536f43cb96be91c95f6b4a88dfc8c2ba33dbda4d Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 24 Jul 2020 09:30:04 -0700 Subject: [PATCH] freedreno: slurp in afuc Signed-off-by: Rob Clark Part-of: --- src/freedreno/afuc/Makefile | 368 +++++++++++++++ src/freedreno/afuc/README.rst | 317 +++++++++++++ src/freedreno/afuc/afuc.h | 188 ++++++++ src/freedreno/afuc/asm.c | 435 +++++++++++++++++ src/freedreno/afuc/asm.h | 127 +++++ src/freedreno/afuc/disasm.c | 829 +++++++++++++++++++++++++++++++++ src/freedreno/afuc/lexer.l | 92 ++++ src/freedreno/afuc/meson.build | 69 +++ src/freedreno/afuc/parser.y | 269 +++++++++++ src/freedreno/meson.build | 1 + 10 files changed, 2695 insertions(+) create mode 100644 src/freedreno/afuc/Makefile create mode 100644 src/freedreno/afuc/README.rst create mode 100644 src/freedreno/afuc/afuc.h create mode 100644 src/freedreno/afuc/asm.c create mode 100644 src/freedreno/afuc/asm.h create mode 100644 src/freedreno/afuc/disasm.c create mode 100644 src/freedreno/afuc/lexer.l create mode 100644 src/freedreno/afuc/meson.build create mode 100644 src/freedreno/afuc/parser.y diff --git a/src/freedreno/afuc/Makefile b/src/freedreno/afuc/Makefile new file mode 100644 index 00000000000..12e6f3aebf9 --- /dev/null +++ b/src/freedreno/afuc/Makefile @@ -0,0 +1,368 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.17 + +# Default target executed when no arguments are given to make. +default_target: all + +.PHONY : default_target + +# Allow only one "make -f Makefile2" at a time, but pass parallelism. +.NOTPARALLEL: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Disable VCS-based implicit rules. +% : %,v + + +# Disable VCS-based implicit rules. +% : RCS/% + + +# Disable VCS-based implicit rules. +% : RCS/%,v + + +# Disable VCS-based implicit rules. +% : SCCS/s.% + + +# Disable VCS-based implicit rules. +% : s.% + + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Command-line flag to silence nested $(MAKE). +$(VERBOSE)MAKESILENT = -s + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E rm -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/robclark/src/envytools + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/robclark/src/envytools + +#============================================================================= +# Targets provided globally by CMake. + +# Special rule for the target install/strip +install/strip: preinstall + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing the project stripped..." + /usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake +.PHONY : install/strip + +# Special rule for the target install/strip +install/strip/fast: preinstall/fast + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing the project stripped..." + /usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake +.PHONY : install/strip/fast + +# Special rule for the target install/local +install/local: preinstall + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing only the local directory..." + /usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake +.PHONY : install/local + +# Special rule for the target install/local +install/local/fast: preinstall/fast + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing only the local directory..." + /usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake +.PHONY : install/local/fast + +# Special rule for the target edit_cache +edit_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake cache editor..." + /usr/bin/ccmake -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) +.PHONY : edit_cache + +# Special rule for the target edit_cache +edit_cache/fast: edit_cache + +.PHONY : edit_cache/fast + +# Special rule for the target test +test: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running tests..." + /usr/bin/ctest --force-new-ctest-process $(ARGS) +.PHONY : test + +# Special rule for the target test +test/fast: test + +.PHONY : test/fast + +# Special rule for the target install +install: preinstall + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Install the project..." + /usr/bin/cmake -P cmake_install.cmake +.PHONY : install + +# Special rule for the target install +install/fast: preinstall/fast + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Install the project..." + /usr/bin/cmake -P cmake_install.cmake +.PHONY : install/fast + +# Special rule for the target list_install_components +list_install_components: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Available install components are: \"Unspecified\"" +.PHONY : list_install_components + +# Special rule for the target list_install_components +list_install_components/fast: list_install_components + +.PHONY : list_install_components/fast + +# Special rule for the target rebuild_cache +rebuild_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." + /usr/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) +.PHONY : rebuild_cache + +# Special rule for the target rebuild_cache +rebuild_cache/fast: rebuild_cache + +.PHONY : rebuild_cache/fast + +# The main all target +all: cmake_check_build_system + cd /home/robclark/src/envytools && $(CMAKE_COMMAND) -E cmake_progress_start /home/robclark/src/envytools/CMakeFiles /home/robclark/src/envytools/afuc/CMakeFiles/progress.marks + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/robclark/src/envytools/CMakeFiles 0 +.PHONY : all + +# The main clean target +clean: + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/clean +.PHONY : clean + +# The main clean target +clean/fast: clean + +.PHONY : clean/fast + +# Prepare targets for installation. +preinstall: all + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/preinstall +.PHONY : preinstall + +# Prepare targets for installation. +preinstall/fast: + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/preinstall +.PHONY : preinstall/fast + +# clear depends +depend: + cd /home/robclark/src/envytools && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 +.PHONY : depend + +# Convenience name for target. +afuc/CMakeFiles/asm.dir/rule: + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/CMakeFiles/asm.dir/rule +.PHONY : afuc/CMakeFiles/asm.dir/rule + +# Convenience name for target. +asm: afuc/CMakeFiles/asm.dir/rule + +.PHONY : asm + +# fast build rule for target. +asm/fast: + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/build +.PHONY : asm/fast + +# Convenience name for target. +afuc/CMakeFiles/disasm.dir/rule: + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/CMakeFiles/disasm.dir/rule +.PHONY : afuc/CMakeFiles/disasm.dir/rule + +# Convenience name for target. +disasm: afuc/CMakeFiles/disasm.dir/rule + +.PHONY : disasm + +# fast build rule for target. +disasm/fast: + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/build +.PHONY : disasm/fast + +asm.o: asm.c.o + +.PHONY : asm.o + +# target to build an object file +asm.c.o: + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/asm.c.o +.PHONY : asm.c.o + +asm.i: asm.c.i + +.PHONY : asm.i + +# target to preprocess a source file +asm.c.i: + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/asm.c.i +.PHONY : asm.c.i + +asm.s: asm.c.s + +.PHONY : asm.s + +# target to generate assembly for a file +asm.c.s: + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/asm.c.s +.PHONY : asm.c.s + +disasm.o: disasm.c.o + +.PHONY : disasm.o + +# target to build an object file +disasm.c.o: + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/disasm.c.o +.PHONY : disasm.c.o + +disasm.i: disasm.c.i + +.PHONY : disasm.i + +# target to preprocess a source file +disasm.c.i: + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/disasm.c.i +.PHONY : disasm.c.i + +disasm.s: disasm.c.s + +.PHONY : disasm.s + +# target to generate assembly for a file +disasm.c.s: + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/disasm.c.s +.PHONY : disasm.c.s + +lexer.o: lexer.c.o + +.PHONY : lexer.o + +# target to build an object file +lexer.c.o: + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/lexer.c.o +.PHONY : lexer.c.o + +lexer.i: lexer.c.i + +.PHONY : lexer.i + +# target to preprocess a source file +lexer.c.i: + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/lexer.c.i +.PHONY : lexer.c.i + +lexer.s: lexer.c.s + +.PHONY : lexer.s + +# target to generate assembly for a file +lexer.c.s: + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/lexer.c.s +.PHONY : lexer.c.s + +parser.o: parser.c.o + +.PHONY : parser.o + +# target to build an object file +parser.c.o: + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/parser.c.o +.PHONY : parser.c.o + +parser.i: parser.c.i + +.PHONY : parser.i + +# target to preprocess a source file +parser.c.i: + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/parser.c.i +.PHONY : parser.c.i + +parser.s: parser.c.s + +.PHONY : parser.s + +# target to generate assembly for a file +parser.c.s: + cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/parser.c.s +.PHONY : parser.c.s + +# Help Target +help: + @echo "The following are some of the valid targets for this Makefile:" + @echo "... all (the default if no target is provided)" + @echo "... clean" + @echo "... depend" + @echo "... edit_cache" + @echo "... install" + @echo "... install/local" + @echo "... install/strip" + @echo "... list_install_components" + @echo "... rebuild_cache" + @echo "... test" + @echo "... asm" + @echo "... disasm" + @echo "... asm.o" + @echo "... asm.i" + @echo "... asm.s" + @echo "... disasm.o" + @echo "... disasm.i" + @echo "... disasm.s" + @echo "... lexer.o" + @echo "... lexer.i" + @echo "... lexer.s" + @echo "... parser.o" + @echo "... parser.i" + @echo "... parser.s" +.PHONY : help + + + +#============================================================================= +# Special targets to cleanup operation of make. + +# Special rule to run CMake to check the build system integrity. +# No rule that depends on this can have commands that come from listfiles +# because they might be regenerated. +cmake_check_build_system: + cd /home/robclark/src/envytools && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 +.PHONY : cmake_check_build_system + diff --git a/src/freedreno/afuc/README.rst b/src/freedreno/afuc/README.rst new file mode 100644 index 00000000000..e06c9397d60 --- /dev/null +++ b/src/freedreno/afuc/README.rst @@ -0,0 +1,317 @@ +===================== +Adreno Five Microcode +===================== + +.. contents:: + +.. _afuc-introduction: + +Introduction +============ + +Adreno GPUs prior to 6xx use two micro-controllers to parse the command-stream, +setup the hardware for draws (or compute jobs), and do various GPU +housekeeping. They are relatively simple (basically glorified +register writers) and basically all their state is in a collection +of registers. Ie. there is no stack, and no memory assigned to +them; any global state like which bank of context registers is to +be used in the next draw is stored in a register. + +The setup is similar to radeon, in fact Adreno 2xx thru 4xx used +basically the same instruction set as r600. There is a "PFP" +(Prefetch Parser) and "ME" (Micro Engine, also confusingly referred +to as "PM4"). These make up the "CP" ("Command Parser"). The +PFP runs ahead of the ME, with some PM4 packets handled entirely +in the PFP. Between the PFP and ME is a FIFO ("MEQ"). In the +generations prior to Adreno 5xx, the PFP and ME had different +instruction sets. + +Starting with Adreno 5xx, a new microcontroller with a unified +instruction set was introduced, although the overall architecture +and purpose of the two microcontrollers remains the same. + +For lack of a better name, this new instruction set is called +"Adreno Five MicroCode" or "afuc". (No idea what Qualcomm calls +it internally. + +With Adreno 6xx, the separate PF and ME are replaced with a single +SQE microcontroller using the same instruction set as 5xx. + +.. _afuc-overview: + +Instruction Set Overview +======================== + +32bit instruction set with basic arithmatic ops that can take +either two source registers or one src and a 16b immediate. + +32 registers, although some are special purpose: + +- ``$00`` - always reads zero, otherwise seems to be the PC +- ``$01`` - current PM4 packet header +- ``$1c`` - alias ``$rem``, remaining data in packet +- ``$1d`` - alias ``$addr`` +- ``$1f`` - alias ``$data`` + +Branch instructions have a delay slot so the following instruction +is always executed regardless of whether branch is taken or not. + + +.. _afuc-alu: + +ALU Instructions +================ + +The following instructions are available: + +- ``add`` - add +- ``addhi`` - add + carry (for upper 32b of 64b value) +- ``sub`` - subtract +- ``subhi`` - subtract + carry (for upper 32b of 64b value) +- ``and`` - bitwise AND +- ``or`` - bitwise OR +- ``xor`` - bitwise XOR +- ``not`` - bitwise NOT (no src1) +- ``shl`` - shift-left +- ``ushr`` - unsigned shift-right +- ``ishr`` - signed shift-right +- ``rot`` - rotate-left (like shift-left with wrap-around) +- ``mul8`` - multiply low 8b of two src +- ``min`` - minimum +- ``max`` - maximum +- ``comp`` - compare two values + +The ALU instructions can take either two src registers, or a src +plus 16b immediate as 2nd src, ex:: + + add $dst, $src, 0x1234 ; src2 is immed + add $dst, $src1, $src2 ; src2 is reg + +The ``not`` instruction only takes a single source:: + + not $dst, $src + not $dst, 0x1234 + +.. _afuc-alu-cmp: + +The ``cmp`` instruction returns: + +- ``0x00`` if src1 > src2 +- ``0x2b`` if src1 == src2 +- ``0x1e`` if src1 < src2 + +See explanation in :ref:`afuc-branch` + + +.. _afuc-branch: + +Branch Instructions +=================== + +The following branch/jump instructions are available: + +- ``brne`` - branch if not equal (or bit not set) +- ``breq`` - branch if equal (or bit set) +- ``jump`` - unconditional jump + +Both ``brne`` and ``breq`` have two forms, comparing the src register +against either a small immediate (up to 5 bits) or a specific bit:: + + breq $src, b3, #somelabel ; branch if src & (1 << 3) + breq $src, 0x3, #somelabel ; branch if src == 3 + +The branch instructions are encoded with a 16b relative offset. +Since ``$00`` always reads back zero, it can be used to construct +an unconditional relative jump. + +The :ref:`cmp ` instruction can be paired with the +bit-test variants of ``brne``/``breq`` to implement gt/ge/lt/le, +due to the bit pattern it returns, for example:: + + cmp $04, $02, $03 + breq $04, b1, #somelabel + +will branch if ``$02`` is less than or equal to ``$03``. + + +.. _afuc-call: + +Call/Return +=========== + +Simple subroutines can be implemented with ``call``/``ret``. The +jump instruction encodes a fixed offset. + + TODO not sure how many levels deep function calls can be nested. + There isn't really a stack. Definitely seems to be multiple + levels of fxn call, see in PFP: CP_CONTEXT_SWITCH_YIELD -> f13 -> + f22. + + +.. _afuc-control: + +Config Instructions +=================== + +These seem to read/write config state in other parts of CP. In at +least some cases I expect these map to CP registers (but possibly +not directly??) + +- ``cread $dst, [$off + addr], flags`` +- ``cwrite $src, [$off + addr], flags`` + +In cases where no offset is needed, ``$00`` is frequently used as +the offset. + +For example, the following sequences sets:: + + ; load CP_INDIRECT_BUFFER parameters from cmdstream: + mov $02, $data ; low 32b of IB target address + mov $03, $data ; high 32b of IB target + mov $04, $data ; IB size in dwords + + ; sanity check # of dwords: + breq $04, 0x0, #l23 (#69, 04a2) + + ; this seems something to do with figuring out whether + ; we are going from RB->IB1 or IB1->IB2 (ie. so the + ; below cwrite instructions update either + ; CP_IB1_BASE_LO/HI/BUFSIZE or CP_IB2_BASE_LO/HI/BUFSIZE + and $05, $18, 0x0003 + shl $05, $05, 0x0002 + + ; update CP_IBn_BASE_LO/HI/BUFSIZE: + cwrite $02, [$05 + 0x0b0], 0x8 + cwrite $03, [$05 + 0x0b1], 0x8 + cwrite $04, [$05 + 0x0b2], 0x8 + + + +.. _afuc-reg-access: + +Register Access +=============== + +The special registers ``$addr`` and ``$data`` can be used to write GPU +registers, for example, to write:: + + mov $addr, CP_SCRATCH_REG[0x2] ; set register to write + mov $data, $03 ; CP_SCRATCH_REG[0x2] + mov $data, $04 ; CP_SCRATCH_REG[0x3] + ... + +subsequent writes to ``$data`` will increment the address of the register +to write, so a sequence of consecutive registers can be written + +To read:: + + mov $addr, CP_SCRATCH_REG[0x2] + mov $03, $addr + mov $04, $addr + +Many registers that are updated frequently have two banks, so they can be +updated without stalling for previous draw to finish. These banks are +arranged so bit 11 is zero for bank 0 and 1 for bank 1. The ME fw (at +least the version I'm looking at) stores this in ``$17``, so to update +these registers from ME:: + + or $addr, $17, VFD_INDEX_OFFSET + mov $data, $03 + ... + +Note that PFP doesn't seem to use this approach, instead it does something +like:: + + mov $0c, CP_SCRATCH_REG[0x7] + mov $02, 0x789a ; value + cwrite $0c, [$00 + 0x010], 0x8 + cwrite $02, [$00 + 0x011], 0x8 + +Like with the ``$addr``/``$data`` approach, the destination register address +increments on each write. + +.. _afuc-mem: + +Memory Access +============= + +There are no load/store instructions, as such. The microcontrollers +have only indirect memory access via GPU registers. There are two +mechanism possible. + +Read/Write via CP_NRT Registers +------------------------------- + +This seems to be only used by ME. If PFP were also using it, they would +race with each other. It seems to be primarily used for small reads. + +- ``CP_ME_NRT_ADDR_LO``/``_HI`` - write to set the address to read or write +- ``CP_ME_NRT_DATA`` - write to trigger write to address in ``CP_ME_NRT_ADDR`` + +The address register increments with successive reads or writes. + +Memory Write example:: + + ; store 64b value in $04+$05 to 64b address in $02+$03 + mov $addr, CP_ME_NRT_ADDR_LO + mov $data, $02 + mov $data, $03 + mov $addr, CP_ME_NRT_DATA + mov $data, $04 + mov $data, $05 + +Memory Read example:: + + ; load 64b value from address in $02+$03 into $04+$05 + mov $addr, CP_ME_NRT_ADDR_LO + mov $data, $02 + mov $data, $03 + mov $04, $addr + mov $05, $addr + + +Read via Control Instructions +----------------------------- + +This is used by PFP whenever it needs to read memory. Also seems to be +used by ME for streaming reads (larger amounts of data). The DMA access +seems to be done by ROQ. + + TODO might also be possible for write access + + TODO some of the control commands might be synchronizing access + between PFP and ME?? + +An example from ``CP_DRAW_INDIRECT`` packet handler:: + + mov $07, 0x0004 ; # of dwords to read from draw-indirect buffer + ; load address of indirect buffer from cmdstream: + cwrite $data, [$00 + 0x0b8], 0x8 + cwrite $data, [$00 + 0x0b9], 0x8 + ; set # of dwords to read: + cwrite $07, [$00 + 0x0ba], 0x8 + ... + ; read parameters from draw-indirect buffer: + mov $09, $addr + mov $07, $addr + cread $12, [$00 + 0x040], 0x8 + ; the start parameter gets written into MEQ, which ME writes + ; to VFD_INDEX_OFFSET register: + mov $data, $addr + + +A6XX NOTES +========== + +The ``$14`` register holds global flags set by: + + CP_SKIP_IB2_ENABLE_LOCAL - b8 + CP_SKIP_IB2_ENABLE_GLOBAL - b9 + CP_SET_MARKER + MODE=GMEM - sets b15 + MODE=BLIT2D - clears b15, b12, b7 + CP_SET_MODE - b29+b30 + CP_SET_VISIBILITY_OVERRIDE - b11, b21, b30? + CP_SET_DRAW_STATE - checks b29+b30 + + CP_COND_REG_EXEC - checks b10, which should be predicate flag? diff --git a/src/freedreno/afuc/afuc.h b/src/freedreno/afuc/afuc.h new file mode 100644 index 00000000000..4f9e9d21815 --- /dev/null +++ b/src/freedreno/afuc/afuc.h @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2017 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _AFUC_H_ +#define _AFUC_H_ + +/* +TODO kernel debugfs to inject packet into rb for easier experimentation. It +should trigger reloading pfp/me and resetting gpu.. + +Actually maybe it should be flag on submit ioctl to be able to deal w/ relocs, +should be restricted to CAP_ADMIN and probably compile option too (default=n). +if flag set, copy cmdstream bo contents into RB instead of IB'ing to it from +RB. + */ + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#define PACKED __attribute__((__packed__)) + +/* The opcode is encoded variable length. Opcodes less than 0x30 + * are encoded as 5 bits followed by (rep) flag. Opcodes >= 0x30 + * (ie. top two bits are '11' are encoded as 6 bits. See get_opc() + */ +typedef enum { + OPC_NOP = 0x00, + + OPC_ADD = 0x01, /* add immediate */ + OPC_ADDHI = 0x02, /* add immediate (hi 32b of 64b) */ + OPC_SUB = 0x03, /* subtract immediate */ + OPC_SUBHI = 0x04, /* subtract immediate (hi 32b of 64b) */ + OPC_AND = 0x05, /* AND immediate */ + OPC_OR = 0x06, /* OR immediate */ + OPC_XOR = 0x07, /* XOR immediate */ + OPC_NOT = 0x08, /* bitwise not of immed (src1 ignored) */ + OPC_SHL = 0x09, /* shift-left immediate */ + OPC_USHR = 0x0a, /* unsigned shift right by immediate */ + OPC_ISHR = 0x0b, /* signed shift right by immediate */ + OPC_ROT = 0x0c, /* rotate left (left shift with wrap-around) */ + OPC_MUL8 = 0x0d, /* 8bit multiply by immediate */ + OPC_MIN = 0x0e, + OPC_MAX = 0x0f, + OPC_CMP = 0x10, /* compare src to immed */ + OPC_MOVI = 0x11, /* move immediate */ + + /* Return the most-significant bit of src2, or 0 if src2 == 0 (the + * same as if src2 == 1). src1 is ignored. Note that this overlaps + * with STORE6, so it can only be used with the two-source encoding. + */ + OPC_MSB = 0x14, + + + OPC_ALU = 0x13, /* ALU instruction with two src registers */ + + /* These seem something to do with setting some external state.. + * doesn't seem to map *directly* to registers, but I guess that + * is where things end up. For example, this sequence in the + * CP_INDIRECT_BUFFER handler: + * + * mov $02, $data ; low 32b of IB target address + * mov $03, $data ; high 32b of IB target + * mov $04, $data ; IB size in dwords + * breq $04, 0x0, #l23 (#69, 04a2) + * and $05, $18, 0x0003 + * shl $05, $05, 0x0002 + * cwrite $02, [$05 + 0x0b0], 0x8 + * cwrite $03, [$05 + 0x0b1], 0x8 + * cwrite $04, [$05 + 0x0b2], 0x8 + * + * Note that CP_IB1/2_BASE_LO/HI/BUFSZ in 0x0b1f->0xb21 (IB1) and + * 0x0b22->0x0b24 (IB2). Presumably $05 ends up w/ different value + * for RB->IB1 vs IB1->IB2. + */ + OPC_CWRITE5 = 0x15, + OPC_CREAD5 = 0x16, + + /* A6xx shuffled around the cwrite/cread opcodes and added new opcodes + * that let you read/write directly to memory (and bypass the IOMMU?). + */ + OPC_STORE6 = 0x14, + OPC_CWRITE6 = 0x15, + OPC_LOAD6 = 0x16, + OPC_CREAD6 = 0x17, + + OPC_BRNEI = 0x30, /* relative branch (if $src != immed) */ + OPC_BREQI = 0x31, /* relative branch (if $src == immed) */ + OPC_BRNEB = 0x32, /* relative branch (if bit not set) */ + OPC_BREQB = 0x33, /* relative branch (if bit is set) */ + OPC_RET = 0x34, /* return */ + OPC_CALL = 0x35, /* "function" call */ + OPC_WIN = 0x36, /* wait for input (ie. wait for WPTR to advance) */ + OPC_PREEMPTLEAVE6 = 0x38, /* try to leave preemption */ +} afuc_opc; + + +typedef union PACKED { + /* addi, subi, andi, ori, xori, etc: */ + struct PACKED { + uint32_t uimm : 16; + uint32_t dst : 5; + uint32_t src : 5; + uint32_t hdr : 6; + } alui; + struct PACKED { + uint32_t uimm : 16; + uint32_t dst : 5; + uint32_t shift : 5; + uint32_t hdr : 6; + } movi; + struct PACKED { + uint32_t alu : 5; + uint32_t pad : 6; + uint32_t dst : 5; + uint32_t src2 : 5; + uint32_t src1 : 5; + uint32_t hdr : 6; + } alu; + struct PACKED { + uint32_t uimm : 12; + uint32_t flags : 4; + uint32_t src1 : 5; /* dst (cread) or src (cwrite) register */ + uint32_t src2 : 5; /* read or write address is src2+uimm */ + uint32_t hdr : 6; + } control; + struct PACKED { + int32_t ioff : 16; /* relative offset */ + uint32_t bit_or_imm : 5; + uint32_t src : 5; + uint32_t hdr : 6; + } br; + struct PACKED { + uint32_t uoff : 26; /* absolute (unsigned) offset */ + uint32_t hdr : 6; + } call; + struct PACKED { + uint32_t pad : 26; + uint32_t hdr : 6; + } waitin; + struct PACKED { + uint32_t pad : 26; + uint32_t opc_r : 6; + }; + +} afuc_instr; + +static inline void +afuc_get_opc(afuc_instr *ai, afuc_opc *opc, bool *rep) +{ + if (ai->opc_r < 0x30) { + *opc = ai->opc_r >> 1; + *rep = ai->opc_r & 0x1; + } else { + *opc = ai->opc_r; + *rep = false; + } +} + +static inline void +afuc_set_opc(afuc_instr *ai, afuc_opc opc, bool rep) +{ + if (opc < 0x30) { + ai->opc_r = opc << 1; + ai->opc_r |= !!rep; + } else { + ai->opc_r = opc; + } +} + +#endif /* _AFUC_H_ */ diff --git a/src/freedreno/afuc/asm.c b/src/freedreno/afuc/asm.c new file mode 100644 index 00000000000..321d06adfef --- /dev/null +++ b/src/freedreno/afuc/asm.c @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2017 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "afuc.h" +#include "rnn.h" +#include "rnndec.h" +#include "parser.h" +#include "asm.h" + +int gpuver; + + +static struct rnndeccontext *ctx; +static struct rnndb *db; +static struct rnndomain *control_regs; +struct rnndomain *dom[2]; + + +/* bit lame to hard-code max but fw sizes are small */ +static struct asm_instruction instructions[0x2000]; +static unsigned num_instructions; + +static struct asm_label labels[0x512]; +static unsigned num_labels; + +struct asm_instruction *next_instr(int tok) +{ + struct asm_instruction *ai = &instructions[num_instructions++]; + assert(num_instructions < ARRAY_SIZE(instructions)); + ai->tok = tok; + return ai; +} + +void decl_label(const char *str) +{ + struct asm_label *label = &labels[num_labels++]; + + assert(num_labels < ARRAY_SIZE(labels)); + + label->offset = num_instructions; + label->label = str; +} + +static int resolve_label(const char *str) +{ + int i; + + for (i = 0; i < num_labels; i++) { + struct asm_label *label = &labels[i]; + + if (!strcmp(str, label->label)) { + return label->offset; + } + } + + fprintf(stderr, "Undeclared label: %s\n", str); + exit(2); +} + +static afuc_opc tok2alu(int tok) +{ + switch (tok) { + case T_OP_ADD: return OPC_ADD; + case T_OP_ADDHI: return OPC_ADDHI; + case T_OP_SUB: return OPC_SUB; + case T_OP_SUBHI: return OPC_SUBHI; + case T_OP_AND: return OPC_AND; + case T_OP_OR: return OPC_OR; + case T_OP_XOR: return OPC_XOR; + case T_OP_NOT: return OPC_NOT; + case T_OP_SHL: return OPC_SHL; + case T_OP_USHR: return OPC_USHR; + case T_OP_ISHR: return OPC_ISHR; + case T_OP_ROT: return OPC_ROT; + case T_OP_MUL8: return OPC_MUL8; + case T_OP_MIN: return OPC_MIN; + case T_OP_MAX: return OPC_MAX; + case T_OP_CMP: return OPC_CMP; + case T_OP_MSB: return OPC_MSB; + default: + assert(0); + return -1; + } +} + +static void emit_instructions(int outfd) +{ + int i; + + /* there is an extra 0x00000000 which kernel strips off.. we could + * perhaps use it for versioning. + */ + i = 0; + write(outfd, &i, 4); + + for (i = 0; i < num_instructions; i++) { + struct asm_instruction *ai = &instructions[i]; + afuc_instr instr = {0}; + afuc_opc opc; + + /* special case, 2nd dword is patched up w/ # of instructions + * (ie. offset of jmptbl) + */ + if (i == 1) { + assert(ai->is_literal); + ai->literal &= ~0xffff; + ai->literal |= num_instructions; + } + + if (ai->is_literal) { + write(outfd, &ai->literal, 4); + continue; + } + + switch (ai->tok) { + case T_OP_NOP: + opc = OPC_NOP; + if (gpuver >= 6) + instr.pad = 0x1000000; + break; + case T_OP_ADD: + case T_OP_ADDHI: + case T_OP_SUB: + case T_OP_SUBHI: + case T_OP_AND: + case T_OP_OR: + case T_OP_XOR: + case T_OP_NOT: + case T_OP_SHL: + case T_OP_USHR: + case T_OP_ISHR: + case T_OP_ROT: + case T_OP_MUL8: + case T_OP_MIN: + case T_OP_MAX: + case T_OP_CMP: + case T_OP_MSB: + if (ai->has_immed) { + /* MSB overlaps with STORE */ + assert(ai->tok != T_OP_MSB); + opc = tok2alu(ai->tok); + instr.alui.dst = ai->dst; + instr.alui.src = ai->src1; + instr.alui.uimm = ai->immed; + } else { + opc = OPC_ALU; + instr.alu.dst = ai->dst; + instr.alu.src1 = ai->src1; + instr.alu.src2 = ai->src2; + instr.alu.alu = tok2alu(ai->tok); + } + break; + case T_OP_MOV: + /* move can either be encoded as movi (ie. move w/ immed) or + * an alu instruction + */ + if (ai->has_immed) { + opc = OPC_MOVI; + instr.movi.dst = ai->dst; + instr.movi.uimm = ai->immed; + instr.movi.shift = ai->shift; + } else if (ai->label) { + /* mov w/ a label is just an alias for an immediate, this + * is useful to load the address of a constant table into + * a register: + */ + opc = OPC_MOVI; + instr.movi.dst = ai->dst; + instr.movi.uimm = resolve_label(ai->label); + instr.movi.shift = ai->shift; + } else { + /* encode as: or $dst, $00, $src */ + opc = OPC_ALU; + instr.alu.dst = ai->dst; + instr.alu.src1 = 0x00; /* $00 reads-back 0 */ + instr.alu.src2 = ai->src1; + instr.alu.alu = OPC_OR; + } + break; + case T_OP_CWRITE: + case T_OP_CREAD: + case T_OP_STORE: + case T_OP_LOAD: + if (gpuver >= 6) { + if (ai->tok == T_OP_CWRITE) { + opc = OPC_CWRITE6; + } else if (ai->tok == T_OP_CREAD) { + opc = OPC_CREAD6; + } else if (ai->tok == T_OP_STORE) { + opc = OPC_STORE6; + } else if (ai->tok == T_OP_LOAD) { + opc = OPC_LOAD6; + } + } else { + if (ai->tok == T_OP_CWRITE) { + opc = OPC_CWRITE5; + } else if (ai->tok == T_OP_CREAD) { + opc = OPC_CREAD5; + } else if (ai->tok == T_OP_STORE || + ai->tok == T_OP_LOAD) { + fprintf(stderr, "load and store do not exist on a5xx\n"); + exit(1); + } + } + instr.control.src1 = ai->src1; + instr.control.src2 = ai->src2; + instr.control.flags = ai->bit; + instr.control.uimm = ai->immed; + break; + case T_OP_BRNE: + case T_OP_BREQ: + if (ai->has_immed) { + opc = (ai->tok == T_OP_BRNE) ? OPC_BRNEI : OPC_BREQI; + instr.br.bit_or_imm = ai->immed; + } else { + opc = (ai->tok == T_OP_BRNE) ? OPC_BRNEB : OPC_BREQB; + instr.br.bit_or_imm = ai->bit; + } + instr.br.src = ai->src1; + instr.br.ioff = resolve_label(ai->label) - i; + break; + case T_OP_RET: + opc = OPC_RET; + break; + case T_OP_CALL: + opc = OPC_CALL; + instr.call.uoff = resolve_label(ai->label); + break; + case T_OP_PREEMPTLEAVE: + opc = OPC_PREEMPTLEAVE6; + instr.call.uoff = resolve_label(ai->label); + break; + case T_OP_JUMP: + /* encode jump as: brne $00, b0, #label */ + opc = OPC_BRNEB; + instr.br.bit_or_imm = 0; + instr.br.src = 0x00; /* $00 reads-back 0.. compare to 0 */ + instr.br.ioff = resolve_label(ai->label) - i; + break; + case T_OP_WAITIN: + opc = OPC_WIN; + break; + default: + assert(0); + } + + afuc_set_opc(&instr, opc, ai->rep); + + write(outfd, &instr, 4); + } + +} + +static int find_enum_val(struct rnnenum *en, const char *name) +{ + int i; + + for (i = 0; i < en->valsnum; i++) + if (en->vals[i]->valvalid && !strcmp(name, en->vals[i]->name)) + return en->vals[i]->value; + + return -1; +} + +static int find_reg(struct rnndomain *dom, const char *name) +{ + int i; + + for (i = 0; i < dom->subelemsnum; i++) + if (!strcmp(name, dom->subelems[i]->name)) + return dom->subelems[i]->offset; + + return -1; +} + +unsigned parse_control_reg(const char *name) +{ + /* skip leading "@" */ + int val = find_reg(control_regs, name + 1); + if (val < 0) { + printf("invalid control reg: %s\n", name); + exit(2); + } + return (unsigned)val; +} + +static void emit_jumptable(int outfd) +{ + struct rnnenum *en = rnn_findenum(ctx->db, "adreno_pm4_type3_packets"); + uint32_t jmptable[0x80] = {0}; + int i; + + for (i = 0; i < num_labels; i++) { + struct asm_label *label = &labels[i]; + int id = find_enum_val(en, label->label); + + /* if it doesn't match a known PM4 packet-id, try to match UNKN%d: */ + if (id < 0) { + if (sscanf(label->label, "UNKN%d", &id) != 1) { + /* if still not found, must not belong in jump-table: */ + continue; + } + } + + jmptable[id] = label->offset; + } + + write(outfd, jmptable, sizeof(jmptable)); +} + +static void usage(void) +{ + fprintf(stderr, "Usage:\n" + "\tasm [-g GPUVER] filename.asm filename.fw\n" + "\t\t-g - specify GPU version (5, etc)\n" + ); + exit(2); +} + +int main(int argc, char **argv) +{ + FILE *in; + char *file, *outfile, *name, *control_reg_name; + int c, ret, outfd; + + /* Argument parsing: */ + while ((c = getopt (argc, argv, "g:")) != -1) { + switch (c) { + case 'g': + gpuver = atoi(optarg); + break; + default: + usage(); + } + } + + if (optind >= (argc + 1)) { + fprintf(stderr, "no file specified!\n"); + usage(); + } + + file = argv[optind]; + outfile = argv[optind + 1]; + + outfd = open(outfile, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (outfd < 0) { + fprintf(stderr, "could not open \"%s\"\n", outfile); + usage(); + } + + in = fopen(file, "r"); + if (!in) { + fprintf(stderr, "could not open \"%s\"\n", file); + usage(); + } + + yyset_in(in); + + /* if gpu version not specified, infer from filename: */ + if (!gpuver) { + if (strstr(file, "a5")) { + gpuver = 5; + } else if (strstr(file, "a6")) { + gpuver = 6; + } + } + + switch (gpuver) { + case 6: + name = "A6XX"; + control_reg_name = "A6XX_CONTROL_REG"; + break; + case 5: + name = "A5XX"; + control_reg_name = "A5XX_CONTROL_REG"; + break; + default: + fprintf(stderr, "unknown GPU version!\n"); + usage(); + } + + rnn_init(); + db = rnn_newdb(); + + ctx = rnndec_newcontext(db); + + rnn_parsefile(db, "adreno.xml"); + dom[0] = rnn_finddomain(db, name); + dom[1] = rnn_finddomain(db, "AXXX"); + control_regs = rnn_finddomain(db, control_reg_name); + + ret = yyparse(); + if (ret) { + fprintf(stderr, "parse failed: %d\n", ret); + return ret; + } + + emit_instructions(outfd); + emit_jumptable(outfd); + + close(outfd); + + return 0; +} diff --git a/src/freedreno/afuc/asm.h b/src/freedreno/afuc/asm.h new file mode 100644 index 00000000000..03fb1508907 --- /dev/null +++ b/src/freedreno/afuc/asm.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2017 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _ASM_H_ +#define _ASM_H_ + +#include +#include +#include "afuc.h" + +extern int gpuver; + +/** + * Intermediate representation for an instruction, before final encoding. + * This mostly exists because we need to resolve label offset's in a 2nd + * pass, but also so that parser.y doesn't really need to care so much + * about the different encodings for 2src regs vs 1src+immed, or mnemonics + */ +struct asm_instruction { + int tok; + int dst; + int src1; + int src2; + int immed; + int shift; + int bit; + uint32_t literal; + const char *label; + + bool has_immed : 1; + bool has_shift : 1; + bool has_bit : 1; + bool is_literal : 1; + bool rep : 1; +}; + +struct asm_label { + unsigned offset; + const char *label; +}; + +struct asm_instruction *next_instr(int tok); +void decl_label(const char *str); + + +static inline uint32_t +parse_reg(const char *str) +{ + char *retstr; + long int ret; + + if (!strcmp(str, "$rem")) + return 0x1c; + else if (!strcmp(str, "$addr")) + return 0x1d; + else if (!strcmp(str, "$addr2")) + return 0x1e; + else if (!strcmp(str, "$data")) + return 0x1f; + + ret = strtol(str + 1, &retstr, 16); + + if (*retstr != '\0') { + printf("invalid register: %s\n", str); + exit(2); + } + + return ret; +} + +static inline uint32_t +parse_literal(const char *str) +{ + char *retstr; + long int ret; + + ret = strtol(str + 1, &retstr, 16); + + if (*retstr != ']') { + printf("invalid literal: %s\n", str); + exit(2); + } + + return ret; +} + +static inline uint32_t +parse_bit(const char *str) +{ + return strtol(str + 1, NULL, 10); +} + +unsigned parse_control_reg(const char *name); + +/* string trailing ':' off label: */ +static inline const char * +parse_label_decl(const char *str) +{ + char *s = strdup(str); + s[strlen(s) - 1] = '\0'; + return s; +} + +void yyset_in (FILE * _in_str ); + + +#endif /* _ASM_H_ */ diff --git a/src/freedreno/afuc/disasm.c b/src/freedreno/afuc/disasm.c new file mode 100644 index 00000000000..ea9f34cd97f --- /dev/null +++ b/src/freedreno/afuc/disasm.c @@ -0,0 +1,829 @@ +/* + * Copyright (c) 2017 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "afuc.h" +#include "rnn.h" +#include "rnndec.h" + +static int gpuver; + + +static struct rnndeccontext *ctx; +static struct rnndb *db; +static struct rnndomain *control_regs; +struct rnndomain *dom[2]; +const char *variant; + +/* non-verbose mode should output something suitable to feed back into + * assembler.. verbose mode has additional output useful for debugging + * (like unexpected bits that are set) + */ +static bool verbose = false; + +static void print_gpu_reg(uint32_t regbase) +{ + struct rnndomain *d = NULL; + + if (regbase < 0x100) + return; + + if (rnndec_checkaddr(ctx, dom[0], regbase, 0)) + d = dom[0]; + else if (rnndec_checkaddr(ctx, dom[1], regbase, 0)) + d = dom[1]; + + if (d) { + struct rnndecaddrinfo *info = rnndec_decodeaddr(ctx, d, regbase, 0); + if (info) { + printf("\t; %s", info->name); + free(info->name); + free(info); + return; + } + } +} + +static void printc(const char *c, const char *fmt, ...) +{ + va_list args; + printf("%s", c); + va_start(args, fmt); + vprintf(fmt, args); + va_end(args); + printf("%s", ctx->colors->reset); +} + +#define printerr(fmt, ...) printc(ctx->colors->err, fmt, ##__VA_ARGS__) +#define printlbl(fmt, ...) printc(ctx->colors->btarg, fmt, ##__VA_ARGS__) + +static void print_reg(unsigned reg) +{ +// XXX seems like *reading* $00 --> literal zero?? +// seems like read from $1c gives packet remaining len?? +// $01 current packet header, writing to $01 triggers +// parsing header and jumping to appropriate handler. + if (reg == 0x1c) + printf("$rem"); /* remainding dwords in packet */ + else if (reg == 0x1d) + printf("$addr"); + else if (reg == 0x1e) + printf("$addr2"); // XXX + else if (reg == 0x1f) + printf("$data"); + else + printf("$%02x", reg); +} + +static void print_src(unsigned reg) +{ + print_reg(reg); +} + +static void print_dst(unsigned reg) +{ + print_reg(reg); +} + +static void print_alu_name(afuc_opc opc, uint32_t instr) +{ + if (opc == OPC_ADD) { + printf("add "); + } else if (opc == OPC_ADDHI) { + printf("addhi "); + } else if (opc == OPC_SUB) { + printf("sub "); + } else if (opc == OPC_SUBHI) { + printf("subhi "); + } else if (opc == OPC_AND) { + printf("and "); + } else if (opc == OPC_OR) { + printf("or "); + } else if (opc == OPC_XOR) { + printf("xor "); + } else if (opc == OPC_NOT) { + printf("not "); + } else if (opc == OPC_SHL) { + printf("shl "); + } else if (opc == OPC_USHR) { + printf("ushr "); + } else if (opc == OPC_ISHR) { + printf("ishr "); + } else if (opc == OPC_ROT) { + printf("rot "); + } else if (opc == OPC_MUL8) { + printf("mul8 "); + } else if (opc == OPC_MIN) { + printf("min "); + } else if (opc == OPC_MAX) { + printf("max "); + } else if (opc == OPC_CMP) { + printf("cmp "); + } else if (opc == OPC_MSB) { + printf("msb "); + } else { + printerr("[%08x]", instr); + printf(" ; alu%02x ", opc); + } +} + +static char *getpm4(uint32_t id) +{ + struct rnnenum *en = rnn_findenum(ctx->db, "adreno_pm4_type3_packets"); + if (en) { + int i; + for (i = 0; i < en->valsnum; i++) + if (en->vals[i]->valvalid && en->vals[i]->value == id) { + const char *v = en->vals[i]->varinfo.variantsstr; + if (v && !strstr(v, variant)) + continue; + return en->vals[i]->name; + } + } + return NULL; +} + +static inline unsigned +_odd_parity_bit(unsigned val) +{ + /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel + * note that we want odd parity so 0x6996 is inverted. + */ + val ^= val >> 16; + val ^= val >> 8; + val ^= val >> 4; + val &= 0xf; + return (~0x6996 >> val) & 1; +} + +static struct { + uint32_t offset; + uint32_t num_jump_labels; + uint32_t jump_labels[256]; +} jump_labels[1024]; +int num_jump_labels; + +static void add_jump_table_entry(uint32_t n, uint32_t offset) +{ + int i; + + if (n > 128) /* can't possibly be a PM4 PKT3.. */ + return; + + for (i = 0; i < num_jump_labels; i++) + if (jump_labels[i].offset == offset) + goto add_label; + + num_jump_labels = i + 1; + jump_labels[i].offset = offset; + jump_labels[i].num_jump_labels = 0; + +add_label: + jump_labels[i].jump_labels[jump_labels[i].num_jump_labels++] = n; + assert(jump_labels[i].num_jump_labels < 256); +} + +static int get_jump_table_entry(uint32_t offset) +{ + int i; + + for (i = 0; i < num_jump_labels; i++) + if (jump_labels[i].offset == offset) + return i; + + return -1; +} + +static uint32_t label_offsets[0x512]; +static int num_label_offsets; + +static int label_idx(uint32_t offset, bool create) +{ + int i; + for (i = 0; i < num_label_offsets; i++) + if (offset == label_offsets[i]) + return i; + if (!create) + return -1; + label_offsets[i] = offset; + num_label_offsets = i+1; + return i; +} + +static const char * +label_name(uint32_t offset, bool allow_jt) +{ + static char name[8]; + int lidx; + + if (allow_jt) { + lidx = get_jump_table_entry(offset); + if (lidx >= 0) { + int j; + for (j = 0; j < jump_labels[lidx].num_jump_labels; j++) { + uint32_t jump_label = jump_labels[lidx].jump_labels[j]; + char *str = getpm4(jump_label); + if (str) + return str; + } + // if we don't find anything w/ known name, maybe we should + // return UNKN%d to at least make it clear that this is some + // sort of jump-table entry? + } + } + + lidx = label_idx(offset, false); + if (lidx < 0) + return NULL; + sprintf(name, "l%03d", lidx); + return name; +} + + +static uint32_t fxn_offsets[0x512]; +static int num_fxn_offsets; + +static int fxn_idx(uint32_t offset, bool create) +{ + int i; + for (i = 0; i < num_fxn_offsets; i++) + if (offset == fxn_offsets[i]) + return i; + if (!create) + return -1; + fxn_offsets[i] = offset; + num_fxn_offsets = i+1; + return i; +} + +static const char * +fxn_name(uint32_t offset) +{ + static char name[8]; + int fidx = fxn_idx(offset, false); + if (fidx < 0) + return NULL; + sprintf(name, "fxn%02d", fidx); + return name; +} + +static void print_control_reg(uint32_t id) +{ + if (rnndec_checkaddr(ctx, control_regs, id, 0)) { + struct rnndecaddrinfo *info = rnndec_decodeaddr(ctx, control_regs, id, 0); + printf("@%s", info->name); + free(info->name); + free(info); + } else { + printf("0x%03x", id); + } +} + +static void disasm(uint32_t *buf, int sizedwords) +{ + uint32_t *instrs = buf; + const int jmptbl_start = instrs[1] & 0xffff; + uint32_t *jmptbl = &buf[jmptbl_start]; + afuc_opc opc; + bool rep; + int i; + + + /* parse jumptable: */ + for (i = 0; i < 0x80; i++) { + unsigned offset = jmptbl[i]; + unsigned n = i;// + CP_NOP; + add_jump_table_entry(n, offset); + } + + /* do a pre-pass to find instructions that are potential branch targets, + * and add labels for them: + */ + for (i = 0; i < jmptbl_start; i++) { + afuc_instr *instr = (void *)&instrs[i]; + + afuc_get_opc(instr, &opc, &rep); + + switch (opc) { + case OPC_BRNEI: + case OPC_BREQI: + case OPC_BRNEB: + case OPC_BREQB: + label_idx(i + instr->br.ioff, true); + break; + case OPC_PREEMPTLEAVE6: + if (gpuver >= 6) + label_idx(instr->call.uoff, true); + break; + case OPC_CALL: + fxn_idx(instr->call.uoff, true); + break; + default: + break; + } + } + + /* print instructions: */ + for (i = 0; i < jmptbl_start; i++) { + int jump_label_idx; + afuc_instr *instr = (void *)&instrs[i]; + const char *fname, *lname; + afuc_opc opc; + bool rep; + + afuc_get_opc(instr, &opc, &rep); + + lname = label_name(i, false); + fname = fxn_name(i); + jump_label_idx = get_jump_table_entry(i); + + if (jump_label_idx >= 0) { + int j; + printf("\n"); + for (j = 0; j < jump_labels[jump_label_idx].num_jump_labels; j++) { + uint32_t jump_label = jump_labels[jump_label_idx].jump_labels[j]; + char *name = getpm4(jump_label); + if (name) { + printlbl("%s", name); + } else { + printlbl("UNKN%d", jump_label); + } + printf(":\n"); + } + } + + if (fname) { + printlbl("%s", fname); + printf(":\n"); + } + + if (lname) { + printlbl(" %s", lname); + printf(":"); + } else { + printf(" "); + } + + + if (verbose) { + printf("\t%04x: %08x ", i, instrs[i]); + } else { + printf(" "); + } + + switch (opc) { + case OPC_NOP: { + /* a6xx changed the default immediate, and apparently 0 + * is illegal now. + */ + const uint32_t nop = gpuver >= 6 ? 0x1000000 : 0x0; + if (instrs[i] != nop) { + printerr("[%08x]", instrs[i]); + printf(" ; "); + } + if (rep) + printf("(rep)"); + printf("nop"); + print_gpu_reg(instrs[i]); + + break; + } + case OPC_ADD: + case OPC_ADDHI: + case OPC_SUB: + case OPC_SUBHI: + case OPC_AND: + case OPC_OR: + case OPC_XOR: + case OPC_NOT: + case OPC_SHL: + case OPC_USHR: + case OPC_ISHR: + case OPC_ROT: + case OPC_MUL8: + case OPC_MIN: + case OPC_MAX: + case OPC_CMP: { + bool src1 = true; + + if (opc == OPC_NOT) + src1 = false; + + if (rep) + printf("(rep)"); + + print_alu_name(opc, instrs[i]); + print_dst(instr->alui.dst); + printf(", "); + if (src1) { + print_src(instr->alui.src); + printf(", "); + } + printf("0x%04x", instr->alui.uimm); + print_gpu_reg(instr->alui.uimm); + + /* print out unexpected bits: */ + if (verbose) { + if (instr->alui.src && !src1) + printerr(" (src=%02x)", instr->alui.src); + } + + break; + } + case OPC_MOVI: { + if (rep) + printf("(rep)"); + printf("mov "); + print_dst(instr->movi.dst); + printf(", 0x%04x", instr->movi.uimm); + if (instr->movi.shift) + printf(" << %u", instr->movi.shift); + + /* using mov w/ << 16 is popular way to construct a pkt7 + * header to send (for ex, from PFP to ME), so check that + * case first + */ + if ((instr->movi.shift == 16) && + ((instr->movi.uimm & 0xff00) == 0x7000)) { + unsigned opc, p; + + opc = instr->movi.uimm & 0x7f; + p = _odd_parity_bit(opc); + + /* So, you'd think that checking the parity bit would be + * a good way to rule out false positives, but seems like + * ME doesn't really care.. at least it would filter out + * things that look like actual legit packets between + * PFP and ME.. + */ + if (1 || p == ((instr->movi.uimm >> 7) & 0x1)) { + const char *name = getpm4(opc); + printf("\t; "); + if (name) + printlbl("%s", name); + else + printlbl("UNKN%u", opc); + break; + } + } + + print_gpu_reg(instr->movi.uimm << instr->movi.shift); + + break; + } + case OPC_ALU: { + bool src1 = true; + + if (instr->alu.alu == OPC_NOT || instr->alu.alu == OPC_MSB) + src1 = false; + + if (instr->alu.pad) + printf("[%08x] ; ", instrs[i]); + + if (rep) + printf("(rep)"); + + /* special case mnemonics: + * reading $00 seems to always yield zero, and so: + * or $dst, $00, $src -> mov $dst, $src + * Maybe add one for negate too, ie. + * sub $dst, $00, $src ??? + */ + if ((instr->alu.alu == OPC_OR) && !instr->alu.src1) { + printf("mov "); + src1 = false; + } else { + print_alu_name(instr->alu.alu, instrs[i]); + } + + print_dst(instr->alu.dst); + if (src1) { + printf(", "); + print_src(instr->alu.src1); + } + printf(", "); + print_src(instr->alu.src2); + + /* print out unexpected bits: */ + if (verbose) { + if (instr->alu.pad) + printerr(" (pad=%03x)", instr->alu.pad); + if (instr->alu.src1 && !src1) + printerr(" (src1=%02x)", instr->alu.src1); + } + break; + } + case OPC_CWRITE6: + case OPC_CREAD6: + case OPC_STORE6: + case OPC_LOAD6: { + if (rep) + printf("(rep)"); + + bool is_control_reg = true; + if (gpuver >= 6) { + switch (opc) { + case OPC_CWRITE6: + printf("cwrite "); + break; + case OPC_CREAD6: + printf("cread "); + break; + case OPC_STORE6: + is_control_reg = false; + printf("store "); + break; + case OPC_LOAD6: + is_control_reg = false; + printf("load "); + break; + default: + assert(!"unreachable"); + } + } else { + switch (opc) { + case OPC_CWRITE5: + printf("cwrite "); + break; + case OPC_CREAD5: + printf("cread "); + break; + default: + fprintf(stderr, "A6xx control opcode on A5xx?\n"); + exit(1); + } + } + + print_src(instr->control.src1); + printf(", ["); + print_src(instr->control.src2); + printf(" + "); + if (is_control_reg && instr->control.flags != 0x4) + print_control_reg(instr->control.uimm); + else + printf("0x%03x", instr->control.uimm); + printf("], 0x%x", instr->control.flags); + break; + } + case OPC_BRNEI: + case OPC_BREQI: + case OPC_BRNEB: + case OPC_BREQB: { + unsigned off = i + instr->br.ioff; + + assert(!rep); + + /* Since $00 reads back zero, it can be used as src for + * unconditional branches. (This only really makes sense + * for the BREQB.. or possible BRNEI if imm==0.) + * + * If bit=0 then branch is taken if *all* bits are zero. + * Otherwise it is taken if bit (bit-1) is clear. + * + * Note the instruction after a jump/branch is executed + * regardless of whether branch is taken, so use nop or + * take that into account in code. + */ + if (instr->br.src || (opc != OPC_BRNEB)) { + bool immed = false; + + if (opc == OPC_BRNEI) { + printf("brne "); + immed = true; + } else if (opc == OPC_BREQI) { + printf("breq "); + immed = true; + } else if (opc == OPC_BRNEB) { + printf("brne "); + } else if (opc == OPC_BREQB) { + printf("breq "); + } + print_src(instr->br.src); + if (immed) { + printf(", 0x%x,", instr->br.bit_or_imm); + } else { + printf(", b%u,", instr->br.bit_or_imm); + } + } else { + printf("jump"); + if (verbose && instr->br.bit_or_imm) { + printerr(" (src=%03x, bit=%03x) ", + instr->br.src, instr->br.bit_or_imm); + } + } + + printf(" #"); + printlbl("%s", label_name(off, true)); + if (verbose) + printf(" (#%d, %04x)", instr->br.ioff, off); + break; + } + case OPC_CALL: + assert(!rep); + printf("call #"); + printlbl("%s", fxn_name(instr->call.uoff)); + if (verbose) { + printf(" (%04x)", instr->call.uoff); + if (instr->br.bit_or_imm || instr->br.src) { + printerr(" (src=%03x, bit=%03x) ", + instr->br.src, instr->br.bit_or_imm); + } + } + break; + case OPC_RET: + assert(!rep); + if (instr->pad) + printf("[%08x] ; ", instrs[i]); + printf("ret"); + break; + case OPC_WIN: + assert(!rep); + if (instr->waitin.pad) + printf("[%08x] ; ", instrs[i]); + printf("waitin"); + if (verbose && instr->waitin.pad) + printerr(" (pad=%x)", instr->waitin.pad); + break; + case OPC_PREEMPTLEAVE6: + if (gpuver < 6) { + printf("[%08x] ; op38", instrs[i]); + } + printf("preemptleave #"); + printlbl("%s", label_name(instr->call.uoff, true)); + break; + default: + printerr("[%08x]", instrs[i]); + printf(" ; op%02x ", opc); + print_dst(instr->alui.dst); + printf(", "); + print_src(instr->alui.src); + print_gpu_reg(instrs[i] & 0xffff); + break; + } + printf("\n"); + } + + /* print jumptable: */ + if (verbose) { + printf(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n"); + printf("; JUMP TABLE\n"); + for (i = 0; i < 0x7f; i++) { + int n = i;// + CP_NOP; + uint32_t offset = jmptbl[i]; + char *name = getpm4(n); + printf("%3d %02x: ", n, n); + printf("%04x", offset); + if (name) { + printf(" ; %s", name); + } else { + printf(" ; UNKN%d", n); + } + printf("\n"); + } + } +} + +#define CHUNKSIZE 4096 + +static char * readfile(const char *path, int *sz) +{ + char *buf = NULL; + int fd, ret, n = 0; + + fd = open(path, O_RDONLY); + if (fd < 0) + return NULL; + + while (1) { + buf = realloc(buf, n + CHUNKSIZE); + ret = read(fd, buf + n, CHUNKSIZE); + if (ret < 0) { + free(buf); + *sz = 0; + return NULL; + } else if (ret < CHUNKSIZE) { + n += ret; + *sz = n; + return buf; + } else { + n += CHUNKSIZE; + } + } +} + +static void usage(void) +{ + fprintf(stderr, "Usage:\n" + "\tdisasm [-g GPUVER] [-v] [-c] filename.asm\n" + "\t\t-g - specify GPU version (5, etc)\n" + "\t\t-c - use colors\n" + "\t\t-v - verbose output\n" + ); + exit(2); +} + +int main(int argc, char **argv) +{ + uint32_t *buf; + char *file, *control_reg_name; + bool colors = false; + int sz, c; + + /* Argument parsing: */ + while ((c = getopt (argc, argv, "g:vc")) != -1) { + switch (c) { + case 'g': + gpuver = atoi(optarg); + break; + case 'v': + verbose = true; + break; + case 'c': + colors = true; + break; + default: + usage(); + } + } + + if (optind >= argc) { + fprintf(stderr, "no file specified!\n"); + usage(); + } + + file = argv[optind]; + + /* if gpu version not specified, infer from filename: */ + if (!gpuver) { + if (strstr(file, "a5")) { + gpuver = 5; + } else if (strstr(file, "a6")) { + gpuver = 6; + } + } + + switch (gpuver) { + case 6: + printf("; a6xx microcode\n"); + variant = "A6XX"; + control_reg_name = "A6XX_CONTROL_REG"; + break; + case 5: + printf("; a5xx microcode\n"); + variant = "A5XX"; + control_reg_name = "A5XX_CONTROL_REG"; + break; + default: + fprintf(stderr, "unknown GPU version!\n"); + usage(); + } + + rnn_init(); + db = rnn_newdb(); + + ctx = rnndec_newcontext(db); + ctx->colors = colors ? &envy_def_colors : &envy_null_colors; + + rnn_parsefile(db, "adreno.xml"); + dom[0] = rnn_finddomain(db, variant); + dom[1] = rnn_finddomain(db, "AXXX"); + control_regs = rnn_finddomain(db, control_reg_name); + + buf = (uint32_t *)readfile(file, &sz); + + printf("; Disassembling microcode: %s\n", file); + printf("; Version: %08x\n\n", buf[1]); + disasm(&buf[1], sz/4 - 1); + + return 0; +} diff --git a/src/freedreno/afuc/lexer.l b/src/freedreno/afuc/lexer.l new file mode 100644 index 00000000000..aacc9473877 --- /dev/null +++ b/src/freedreno/afuc/lexer.l @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +%{ +#include +#include "parser.h" +#include "asm.h" + +#define TOKEN(t) (yylval.tok = t) +extern YYSTYPE yylval; + +%} + +%option noyywrap + +%% +"\n" yylineno++; +[ \t] ; /* ignore whitespace */ +";"[^\n]*"\n" yylineno++; /* ignore comments */ +[1-9][0-9]* yylval.num = strtoul(yytext, NULL, 0); return T_INT; +"0x"[0-9a-fA-F]* yylval.num = strtoul(yytext, NULL, 0); return T_HEX; + +"$"[0-9a-fA-F][0-9a-fA-F] yylval.num = parse_reg(yytext); return T_REGISTER; +"$"[a-zA-Z][a-zA-Z0-9]* yylval.num = parse_reg(yytext); return T_REGISTER; +"b"[0-9][0-9]* yylval.num = parse_bit(yytext); return T_BIT; +"@"[a-zA-Z_][a-zA-Z0-9_]* yylval.num = parse_control_reg(yytext); return T_CONTROL_REG; +"#"[a-zA-Z_][a-zA-Z0-9_]* yylval.str = strdup(yytext+1); return T_LABEL_REF; /* label reference */ +[a-zA-Z_][a-zA-Z0-9_]*":" yylval.str = parse_label_decl(yytext); return T_LABEL_DECL; /* label declaration */ +"["[0-9a-fA-F][0-9a-fA-F]*"]" yylval.num = parse_literal(yytext); return T_LITERAL; + + /* instructions: */ +"nop" return TOKEN(T_OP_NOP); +"add" return TOKEN(T_OP_ADD); +"addhi" return TOKEN(T_OP_ADDHI); +"sub" return TOKEN(T_OP_SUB); +"subhi" return TOKEN(T_OP_SUBHI); +"and" return TOKEN(T_OP_AND); +"or" return TOKEN(T_OP_OR); +"xor" return TOKEN(T_OP_XOR); +"not" return TOKEN(T_OP_NOT); +"shl" return TOKEN(T_OP_SHL); +"ushr" return TOKEN(T_OP_USHR); +"ishr" return TOKEN(T_OP_ISHR); +"rot" return TOKEN(T_OP_ROT); +"mul8" return TOKEN(T_OP_MUL8); +"min" return TOKEN(T_OP_MIN); +"max" return TOKEN(T_OP_MAX); +"cmp" return TOKEN(T_OP_CMP); +"msb" return TOKEN(T_OP_MSB); +"mov" return TOKEN(T_OP_MOV); +"cwrite" return TOKEN(T_OP_CWRITE); +"cread" return TOKEN(T_OP_CREAD); +"store" return TOKEN(T_OP_STORE); +"load" return TOKEN(T_OP_LOAD); +"brne" return TOKEN(T_OP_BRNE); +"breq" return TOKEN(T_OP_BREQ); +"ret" return TOKEN(T_OP_RET); +"call" return TOKEN(T_OP_CALL); +"jump" return TOKEN(T_OP_JUMP); +"waitin" return TOKEN(T_OP_WAITIN); +"preemptleave" return TOKEN(T_OP_PREEMPTLEAVE); +"<<" return TOKEN(T_LSHIFT); +"(rep)" return TOKEN(T_REP); + +"," return ','; +"[" return '['; +"]" return ']'; +"+" return '+'; + +. fprintf(stderr, "error at line %d: Unknown token: %s\n", yyget_lineno(), yytext); yyterminate(); + +%% diff --git a/src/freedreno/afuc/meson.build b/src/freedreno/afuc/meson.build new file mode 100644 index 00000000000..8a62a33d6b8 --- /dev/null +++ b/src/freedreno/afuc/meson.build @@ -0,0 +1,69 @@ +# Copyright © 2020 Google, Inc + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +afuc_parser = custom_target( + 'parser.[ch]', + input: 'parser.y', + output: ['parser.c', 'parser.h'], + command: [ + prog_bison, '@INPUT@', '--defines=@OUTPUT1@', '--output=@OUTPUT0@' + ] +) + +afuc_lexer = custom_target( + 'lexer.c', + input: 'lexer.l', + output: 'lexer.c', + command: [ + prog_flex, '-o', '@OUTPUT@', '@INPUT@' + ] +) + +asm = executable( + 'asm', + [ + 'asm.c', + afuc_lexer, + afuc_parser, + ], + include_directories: [ + inc_freedreno_rnn, + ], + link_with: [ + libfreedreno_rnn, + ], + dependencies: [], + build_by_default : with_tools.contains('freedreno'), + install: false, +) + +disasm = executable( + 'disasm', + 'disasm.c', + include_directories: [ + inc_freedreno_rnn, + ], + link_with: [ + libfreedreno_rnn, + ], + dependencies: [], + build_by_default : with_tools.contains('freedreno'), + install: false +) diff --git a/src/freedreno/afuc/parser.y b/src/freedreno/afuc/parser.y new file mode 100644 index 00000000000..9f82286692c --- /dev/null +++ b/src/freedreno/afuc/parser.y @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2013 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +%{ +#define YYDEBUG 0 + +#include +#include +#include +#include +#include "asm.h" + + +int yyget_lineno(void); + +#ifdef YYDEBUG +int yydebug; +#endif + +extern int yylex(void); +typedef void *YY_BUFFER_STATE; +extern YY_BUFFER_STATE yy_scan_string(const char *); +extern void yy_delete_buffer(YY_BUFFER_STATE); + +int yyparse(void); + +void yyerror(const char *error); +void yyerror(const char *error) +{ + fprintf(stderr, "error at line %d: %s\n", yyget_lineno(), error); +} + +static struct asm_instruction *instr; /* current instruction */ + +static void +new_instr(int tok) +{ + instr = next_instr(tok); +} + +static void +dst(int num) +{ + instr->dst = num; +} + +static void +src1(int num) +{ + instr->src1 = num; +} + +static void +src2(int num) +{ + instr->src2 = num; +} + +static void +immed(int num) +{ + instr->immed = num; + instr->has_immed = true; +} + +static void +shift(int num) +{ + instr->shift = num; + instr->has_shift = true; +} + +static void +bit(int num) +{ + instr->bit = num; + instr->has_bit = true; +} + +static void +literal(uint32_t num) +{ + instr->literal = num; + instr->is_literal = true; +} + +static void +label(const char *str) +{ + instr->label = str; +} + +%} + +%union { + int tok; + uint32_t num; + const char *str; +} + +%{ +static void print_token(FILE *file, int type, YYSTYPE value) +{ + fprintf(file, "\ntype: %d\n", type); +} + +#define YYPRINT(file, type, value) print_token(file, type, value) +%} + +%token T_INT +%token T_HEX +%token T_CONTROL_REG +%token T_LABEL_DECL +%token T_LABEL_REF +%token T_LITERAL +%token T_BIT +%token T_REGISTER + +%token T_OP_NOP +%token T_OP_ADD +%token T_OP_ADDHI +%token T_OP_SUB +%token T_OP_SUBHI +%token T_OP_AND +%token T_OP_OR +%token T_OP_XOR +%token T_OP_NOT +%token T_OP_SHL +%token T_OP_USHR +%token T_OP_ISHR +%token T_OP_ROT +%token T_OP_MUL8 +%token T_OP_MIN +%token T_OP_MAX +%token T_OP_CMP +%token T_OP_MSB +%token T_OP_MOV +%token T_OP_CWRITE +%token T_OP_CREAD +%token T_OP_STORE +%token T_OP_LOAD +%token T_OP_BRNE +%token T_OP_BREQ +%token T_OP_RET +%token T_OP_CALL +%token T_OP_JUMP +%token T_OP_WAITIN +%token T_OP_PREEMPTLEAVE +%token T_LSHIFT +%token T_REP + +%type reg +%type immediate + +%error-verbose + +%start instrs + +%% + +instrs: instr_or_label instrs +| instr_or_label + +instr_or_label: instr_r +| T_REP instr_r { instr->rep = true; } +| branch_instr +| other_instr +| T_LABEL_DECL { decl_label($1); } + +/* instructions that can optionally have (rep) flag: */ +instr_r: alu_instr +| config_instr + +/* need to special case: + * - not (single src, possibly an immediate) + * - msb (single src, must be reg) + * - mov (single src, plus possibly a shift) + * from the other ALU instructions: + */ + +alu_msb_instr: T_OP_MSB reg ',' reg { new_instr($1); dst($2); src2($4); } + +alu_not_instr: T_OP_NOT reg ',' reg { new_instr($1); dst($2); src2($4); } +| T_OP_NOT reg ',' immediate { new_instr($1); dst($2); immed($4); } + +alu_mov_instr: T_OP_MOV reg ',' reg { new_instr($1); dst($2); src1($4); } +| T_OP_MOV reg ',' immediate T_LSHIFT immediate { + new_instr($1); dst($2); immed($4); shift($6); +} +| T_OP_MOV reg ',' immediate { new_instr($1); dst($2); immed($4); } +| T_OP_MOV reg ',' T_LABEL_REF T_LSHIFT immediate { + new_instr($1); dst($2); label($4); shift($6); +} +| T_OP_MOV reg ',' T_LABEL_REF { new_instr($1); dst($2); label($4); } + +alu_2src_op: T_OP_ADD { new_instr($1); } +| T_OP_ADDHI { new_instr($1); } +| T_OP_SUB { new_instr($1); } +| T_OP_SUBHI { new_instr($1); } +| T_OP_AND { new_instr($1); } +| T_OP_OR { new_instr($1); } +| T_OP_XOR { new_instr($1); } +| T_OP_SHL { new_instr($1); } +| T_OP_USHR { new_instr($1); } +| T_OP_ISHR { new_instr($1); } +| T_OP_ROT { new_instr($1); } +| T_OP_MUL8 { new_instr($1); } +| T_OP_MIN { new_instr($1); } +| T_OP_MAX { new_instr($1); } +| T_OP_CMP { new_instr($1); } + +alu_2src_instr: alu_2src_op reg ',' reg ',' reg { dst($2); src1($4); src2($6); } +| alu_2src_op reg ',' reg ',' immediate { dst($2); src1($4); immed($6); } + +alu_instr: alu_2src_instr +| alu_msb_instr +| alu_not_instr +| alu_mov_instr + +config_op: T_OP_CWRITE { new_instr($1); } +| T_OP_CREAD { new_instr($1); } +| T_OP_LOAD { new_instr($1); } +| T_OP_STORE { new_instr($1); } + +config_instr: config_op reg ',' '[' reg '+' immediate ']' ',' immediate { + src1($2); src2($5); immed($7); bit($10); +} + +branch_op: T_OP_BRNE { new_instr($1); } +| T_OP_BREQ { new_instr($1); } + +branch_instr: branch_op reg ',' T_BIT ',' T_LABEL_REF { src1($2); bit($4); label($6); } +| branch_op reg ',' immediate ',' T_LABEL_REF { src1($2); immed($4); label($6); } + +other_instr: T_OP_CALL T_LABEL_REF { new_instr($1); label($2); } +| T_OP_PREEMPTLEAVE T_LABEL_REF { new_instr($1); label($2); } +| T_OP_RET { new_instr($1); } +| T_OP_JUMP T_LABEL_REF { new_instr($1); label($2); } +| T_OP_WAITIN { new_instr($1); } +| T_OP_NOP { new_instr($1); } +| T_LITERAL { new_instr($1); literal($1); } + +reg: T_REGISTER + +immediate: T_HEX +| T_INT +| T_CONTROL_REG +| T_CONTROL_REG '+' immediate { $$ = $1 + $3; } + diff --git a/src/freedreno/meson.build b/src/freedreno/meson.build index 6405a7d51dc..3df6dfb16c6 100644 --- a/src/freedreno/meson.build +++ b/src/freedreno/meson.build @@ -35,6 +35,7 @@ dep_libxml2 = dependency('libxml-2.0', required: false) if dep_libxml2.found() subdir('rnn') subdir('decode') + subdir('afuc') endif if with_tools.contains('drm-shim') -- 2.30.2