From f718ac62688b555a933c7112f656944288d04edb Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Sun, 1 Dec 2019 20:38:07 +0100 Subject: [PATCH] r600/sfn: Add a basic nir shader backend This commit adds support for vertex and fragment shaders from NIR, and support for most TEX and ALU instructions. Thanks Dave Airlied for adding support for a number of ALU instructions. v2: fix compilation with gcc-6 v3: rebase: use mesa/core glsl_type_size function Signed-off-by: Gert Wollny Part-of: --- src/gallium/drivers/r600/Makefile.sources | 52 +- src/gallium/drivers/r600/meson.build | 54 +- .../drivers/r600/sfn/sfn_alu_defines.cpp | 325 ++++ .../drivers/r600/sfn/sfn_alu_defines.h | 377 +++++ .../drivers/r600/sfn/sfn_callstack.cpp | 111 ++ src/gallium/drivers/r600/sfn/sfn_callstack.h | 47 + .../r600/sfn/sfn_conditionaljumptracker.cpp | 195 +++ .../r600/sfn/sfn_conditionaljumptracker.h | 69 + src/gallium/drivers/r600/sfn/sfn_debug.cpp | 139 ++ src/gallium/drivers/r600/sfn/sfn_debug.h | 121 ++ src/gallium/drivers/r600/sfn/sfn_defines.h | 318 ++++ .../r600/sfn/sfn_emitaluinstruction.cpp | 1301 +++++++++++++++++ .../drivers/r600/sfn/sfn_emitaluinstruction.h | 116 ++ .../drivers/r600/sfn/sfn_emitinstruction.cpp | 157 ++ .../drivers/r600/sfn/sfn_emitinstruction.h | 96 ++ .../r600/sfn/sfn_emittexinstruction.cpp | 974 ++++++++++++ .../drivers/r600/sfn/sfn_emittexinstruction.h | 104 ++ .../drivers/r600/sfn/sfn_instruction_alu.cpp | 152 ++ .../drivers/r600/sfn/sfn_instruction_alu.h | 138 ++ .../drivers/r600/sfn/sfn_instruction_base.cpp | 60 + .../drivers/r600/sfn/sfn_instruction_base.h | 102 ++ .../drivers/r600/sfn/sfn_instruction_cf.cpp | 162 ++ .../drivers/r600/sfn/sfn_instruction_cf.h | 107 ++ .../r600/sfn/sfn_instruction_export.cpp | 132 ++ .../drivers/r600/sfn/sfn_instruction_export.h | 106 ++ .../r600/sfn/sfn_instruction_fetch.cpp | 391 +++++ .../drivers/r600/sfn/sfn_instruction_fetch.h | 167 +++ .../drivers/r600/sfn/sfn_instruction_tex.cpp | 310 ++++ .../drivers/r600/sfn/sfn_instruction_tex.h | 137 ++ .../drivers/r600/sfn/sfn_ir_to_assembly.cpp | 1071 ++++++++++++++ .../drivers/r600/sfn/sfn_ir_to_assembly.h | 45 + src/gallium/drivers/r600/sfn/sfn_nir.cpp | 543 +++++++ src/gallium/drivers/r600/sfn/sfn_nir.h | 112 ++ .../drivers/r600/sfn/sfn_shader_base.cpp | 758 ++++++++++ .../drivers/r600/sfn/sfn_shader_base.h | 183 +++ .../drivers/r600/sfn/sfn_shader_fragment.cpp | 754 ++++++++++ .../drivers/r600/sfn/sfn_shader_fragment.h | 101 ++ .../drivers/r600/sfn/sfn_shader_vertex.cpp | 491 +++++++ .../drivers/r600/sfn/sfn_shader_vertex.h | 86 ++ src/gallium/drivers/r600/sfn/sfn_shaderio.cpp | 371 +++++ src/gallium/drivers/r600/sfn/sfn_shaderio.h | 162 ++ src/gallium/drivers/r600/sfn/sfn_value.cpp | 251 ++++ src/gallium/drivers/r600/sfn/sfn_value.h | 199 +++ .../drivers/r600/sfn/sfn_value_gpr.cpp | 319 ++++ src/gallium/drivers/r600/sfn/sfn_value_gpr.h | 182 +++ .../drivers/r600/sfn/sfn_valuepool.cpp | 558 +++++++ src/gallium/drivers/r600/sfn/sfn_valuepool.h | 255 ++++ 47 files changed, 12955 insertions(+), 6 deletions(-) create mode 100644 src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_alu_defines.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_callstack.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_callstack.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_debug.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_debug.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_defines.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_emitinstruction.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_alu.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_base.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_cf.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_export.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_instruction_tex.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_nir.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_nir.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_base.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_base.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_fragment.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_shader_vertex.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_shaderio.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_shaderio.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_value.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_value.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_value_gpr.h create mode 100644 src/gallium/drivers/r600/sfn/sfn_valuepool.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_valuepool.h diff --git a/src/gallium/drivers/r600/Makefile.sources b/src/gallium/drivers/r600/Makefile.sources index 763a49a07fa..5010ffaad5e 100644 --- a/src/gallium/drivers/r600/Makefile.sources +++ b/src/gallium/drivers/r600/Makefile.sources @@ -87,7 +87,55 @@ CXX_SOURCES = \ sb/sb_shader.cpp \ sb/sb_shader.h \ sb/sb_ssa_builder.cpp \ - sb/sb_valtable.cpp + sb/sb_valtable.cpp \ + sfn/sfn_alu_defines.cpp \ + sfn/sfn_alu_defines.h \ + sfn/sfn_callstack.cpp \ + sfn/sfn_callstack.h \ + sfn/sfn_conditionaljumptracker.cpp \ + sfn/sfn_conditionaljumptracker.h \ + sfn/sfn_defines.h \ + sfn/sfn_debug.cpp \ + sfn/sfn_debug.h \ + sfn/sfn_emitaluinstruction.cpp \ + sfn/sfn_emitaluinstruction.h \ + sfn/sfn_emitinstruction.cpp \ + sfn/sfn_emitinstruction.h \ + sfn/sfn_emittexinstruction.cpp \ + sfn/sfn_emittexinstruction.h \ + sfn/sfn_emitinstruction.h \ + sfn/sfn_instruction_alu.cpp \ + sfn/sfn_instruction_alu.h \ + sfn/sfn_instruction_base.cpp \ + sfn/sfn_instruction_base.h \ + sfn/sfn_instruction_cf.cpp \ + sfn/sfn_instruction_cf.h \ + sfn/sfn_instruction_export.cpp \ + sfn/sfn_instruction_export.h \ + sfn/sfn_instruction_fetch.cpp \ + sfn/sfn_instruction_fetch.h \ + sfn/sfn_instruction_tex.cpp \ + sfn/sfn_instruction_tex.h \ + sfn/sfn_ir_to_assembly.cpp \ + sfn/sfn_ir_to_assembly.h \ + sfn/sfn_nir.cpp \ + sfn/sfn_nir.h \ + sfn/sfn_shader_base.cpp \ + sfn/sfn_shader_base.h \ + sfn/sfn_shader_fragment.cpp \ + sfn/sfn_shader_fragment.h \ + sfn/sfn_shader_geometry.cpp \ + sfn/sfn_shader_geometry.h \ + sfn/sfn_shader_vertex.cpp \ + sfn/sfn_shader_vertex.h \ + sfn/sfn_shaderio.cpp \ + sfn/sfn_shaderio.h \ + sfn/sfn_value.cpp \ + sfn/sfn_value.h \ + sfn/sfn_value_gpr.cpp \ + sfn/sfn_value_gpr.h \ + sfn/sfn_valuepool.cpp \ + sfn/sfn_valuepool.h R600_GENERATED_FILES = \ - egd_tables.h \ No newline at end of file + egd_tables.h diff --git a/src/gallium/drivers/r600/meson.build b/src/gallium/drivers/r600/meson.build index 91f62a8a701..36805e1d0cc 100644 --- a/src/gallium/drivers/r600/meson.build +++ b/src/gallium/drivers/r600/meson.build @@ -105,7 +105,53 @@ files_r600 = files( 'sb/sb_shader.h', 'sb/sb_ssa_builder.cpp', 'sb/sb_valtable.cpp', -) + 'sfn/sfn_alu_defines.cpp', + 'sfn/sfn_alu_defines.h', + 'sfn/sfn_callstack.cpp', + 'sfn/sfn_callstack.h', + 'sfn/sfn_conditionaljumptracker.cpp', + 'sfn/sfn_conditionaljumptracker.h', + 'sfn/sfn_defines.h', + 'sfn/sfn_debug.cpp', + 'sfn/sfn_debug.h', + 'sfn/sfn_emitaluinstruction.cpp', + 'sfn/sfn_emitaluinstruction.h', + 'sfn/sfn_emitinstruction.cpp', + 'sfn/sfn_emitinstruction.h', + 'sfn/sfn_emittexinstruction.cpp', + 'sfn/sfn_emittexinstruction.h', + 'sfn/sfn_emitinstruction.h', + 'sfn/sfn_instruction_alu.cpp', + 'sfn/sfn_instruction_alu.h', + 'sfn/sfn_instruction_base.cpp', + 'sfn/sfn_instruction_base.h', + 'sfn/sfn_instruction_cf.cpp', + 'sfn/sfn_instruction_cf.h', + 'sfn/sfn_instruction_export.cpp', + 'sfn/sfn_instruction_export.h', + 'sfn/sfn_instruction_fetch.cpp', + 'sfn/sfn_instruction_fetch.h', + 'sfn/sfn_instruction_tex.cpp', + 'sfn/sfn_instruction_tex.h', + 'sfn/sfn_ir_to_assembly.cpp', + 'sfn/sfn_ir_to_assembly.h', + 'sfn/sfn_nir.cpp', + 'sfn/sfn_nir.h', + 'sfn/sfn_shader_base.cpp', + 'sfn/sfn_shader_base.h', + 'sfn/sfn_shader_fragment.cpp', + 'sfn/sfn_shader_fragment.h', + 'sfn/sfn_shader_vertex.cpp', + 'sfn/sfn_shader_vertex.h', + 'sfn/sfn_shaderio.cpp', + 'sfn/sfn_shaderio.h', + 'sfn/sfn_value.cpp', + 'sfn/sfn_value.h', + 'sfn/sfn_value_gpr.cpp', + 'sfn/sfn_value_gpr.h', + 'sfn/sfn_valuepool.cpp', + 'sfn/sfn_valuepool.h', + ) egd_tables_h = custom_target( 'egd_tables.h', @@ -126,13 +172,13 @@ libr600 = static_library( c_args : [c_vis_args, r600_c_args, '-Wstrict-overflow=0'], cpp_args : [cpp_vis_args], include_directories : [ - inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_amd_common, + inc_src, inc_common, inc_include, inc_compiler, inc_gallium, inc_gallium_aux, inc_amd_common, inc_gallium_drivers, ], - dependencies: [dep_libdrm_radeon, dep_elf, dep_llvm], + dependencies: [dep_libdrm_radeon, dep_elf, dep_llvm, idep_nir, idep_nir_headers], ) driver_r600 = declare_dependency( compile_args : '-DGALLIUM_R600', - link_with : [libr600, libradeonwinsys], + link_with : [libr600, libmesa_gallium, libradeonwinsys], ) diff --git a/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp b/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp new file mode 100644 index 00000000000..8690fc2690f --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp @@ -0,0 +1,325 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_alu_defines.h" + +namespace r600 { + +const std::map alu_ops = { + {op0_nop ,AluOp(0, 0, AluOp::a,"NOP")}, + {op0_group_barrier ,AluOp(0, 0, AluOp::a,"GROUP_BARRIER")}, + {op0_group_seq_begin ,AluOp(0, 0, AluOp::a,"GROUP_SEQ_BEGIN")}, + {op0_group_seq_end ,AluOp(0, 0, AluOp::a,"GROUP_SEQ_END")}, + {op0_pred_set_clr ,AluOp(0, 1, AluOp::a,"PRED_SET_CLR")}, + {op0_store_flags ,AluOp(0, 0, AluOp::v,"STORE_FLAGS")}, + {op0_lds_1a ,AluOp(0, 0, AluOp::v,"LDS_1A")}, + {op0_lds_1a1d ,AluOp(0, 0, AluOp::v,"LDS_1A1D")}, + {op0_lds_2a ,AluOp(0, 0, AluOp::v,"LDS_2A")}, + + {op1_bcnt_int ,AluOp(1, 0, AluOp::v,"BCNT_INT")}, + {op1_bcnt_accum_prev_int ,AluOp(1, 0, AluOp::v,"BCNT_ACCUM_PREV_INT")}, + {op1_bfrev_int ,AluOp(1, 0, AluOp::a,"BFREV_INT")}, + {op1_ceil ,AluOp(1, 1, AluOp::a,"CEIL")}, + {op1_cos ,AluOp(1, 1, AluOp::t,"COS")}, + {op1_exp_ieee ,AluOp(1, 1, AluOp::t,"EXP_IEEE")}, + {op1_floor ,AluOp(1, 1, AluOp::a,"FLOOR")}, + {op1_flt_to_int ,AluOp(1, 0, AluOp::a,"FLT_TO_INT")}, + {op1_flt_to_uint ,AluOp(1, 1, AluOp::t,"FLT_TO_UINT")}, + {op1_flt_to_int_rpi ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_RPI")}, + {op1_flt_to_int_floor ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_FLOOR")}, + {op1_flt16_to_flt32 ,AluOp(1, 1, AluOp::v,"FLT16_TO_FLT32")}, + {op1_flt32_to_flt16 ,AluOp(1, 1, AluOp::v,"FLT32_TO_FLT16")}, + {op1_flt32_to_flt64 ,AluOp(1, 1, AluOp::v,"FLT32_TO_FLT64")}, + {op1_flt64_to_flt32 ,AluOp(1, 1, AluOp::a,"FLT64_TO_FLT32")}, + {op1_fract ,AluOp(1, 1, AluOp::a,"FRACT")}, + {op1_fract_64 ,AluOp(1, 1, AluOp::v,"FRACT_64")}, + {op1_frexp_64 ,AluOp(1, 1, AluOp::v,"FREXP_64")}, + {op1_int_to_flt ,AluOp(1, 0, AluOp::t,"INT_TO_FLT")}, + {op1_ldexp_64 ,AluOp(1, 1, AluOp::v,"LDEXP_64")}, + {op1_interp_load_p0 ,AluOp(1, 1, AluOp::v,"INTERP_LOAD_P0")}, + {op1_interp_load_p10 ,AluOp(1, 1, AluOp::v,"INTERP_LOAD_P10")}, + {op1_interp_load_p20 ,AluOp(1, 1, AluOp::v,"INTERP_LOAD_P20")}, + {op1_load_store_flags ,AluOp(1, 0, AluOp::v,"LOAD_STORE_FLAGS")}, + {op1_log_clamped ,AluOp(1, 1, AluOp::t,"LOG_CLAMPED")}, + {op1_log_ieee ,AluOp(1, 1, AluOp::t,"LOG_IEEE")}, + {op1_max4 ,AluOp(1, 1, AluOp::v,"MAX4")}, + {op1_mbcnt_32hi_int ,AluOp(1, 0, AluOp::v,"MBCNT_32HI_INT")}, + {op1_mbcnt_32lo_accum_prev_int ,AluOp(1, 0, AluOp::v,"MBCNT_32LO_ACCUM_PREV_INT")}, + {op1_mov ,AluOp(1, 0, AluOp::a,"MOV")}, + {op1_mova_int ,AluOp(1, 0, AluOp::v,"MOVA_INT")}, + {op1_not_int ,AluOp(1, 0, AluOp::a,"NOT_INT")}, + {op1_offset_to_flt ,AluOp(1, 0, AluOp::v,"OFFSET_TO_FLT")}, + {op1_pred_set_inv ,AluOp(1, 1, AluOp::a,"PRED_SET_INV")}, + {op1_pred_set_restore ,AluOp(1, 1, AluOp::a,"PRED_SET_RESTORE")}, + {op1_set_cf_idx0 ,AluOp(1, 0, AluOp::a,"SET_CF_IDX0")}, /* Reads from AR register? */ + {op1_set_cf_idx1 ,AluOp(1, 0, AluOp::a,"SET_CF_IDX1")}, /* Reads from AR register? */ + {op1_recip_clamped ,AluOp(1, 1, AluOp::t,"RECIP_CLAMPED")}, + {op1_recip_ff ,AluOp(1, 1, AluOp::t,"RECIP_FF")}, + {op1_recip_ieee ,AluOp(1, 1, AluOp::t,"RECIP_IEEE")}, + {op1_recipsqrt_clamped ,AluOp(1, 1, AluOp::t,"RECIPSQRT_CLAMPED")}, + {op1_recipsqrt_ff ,AluOp(1, 1, AluOp::t,"RECIPSQRT_FF")}, + {op1_recipsqrt_ieee1 ,AluOp(1, 1, AluOp::t,"RECIPSQRT_IEEE")}, + {op1_recip_int ,AluOp(1, 0, AluOp::t,"RECIP_INT")}, + {op1_recip_uint ,AluOp(1, 0, AluOp::t,"RECIP_UINT")}, + {op1_recip_64 ,AluOp(1, 1, AluOp::t,"RECIP_64")}, + {op1_recip_clamped_64 ,AluOp(1, 1, AluOp::t,"RECIP_CLAMPED_64")}, + {op1_recipsqrt_64 ,AluOp(1, 1, AluOp::t,"RECIPSQRT_64")}, + {op1_recipsqrt_clamped_64,AluOp(1, 1, AluOp::t,"RECIPSQRT_CLAMPED_64")}, + {op1_rndne ,AluOp(1, 1, AluOp::a,"RNDNE")}, + {op1_sqrt_ieee ,AluOp(1, 1, AluOp::t,"SQRT_IEEE")}, + {op1_sin ,AluOp(1, 1, AluOp::t,"SIN")}, + {op1_trunc ,AluOp(1, 1, AluOp::a,"TRUNC")}, + {op1_sqrt_64 ,AluOp(1, 1, AluOp::t,"SQRT_64")}, + {op1_ubyte0_flt ,AluOp(1, 1, AluOp::v,"UBYTE0_FLT")}, + {op1_ubyte1_flt ,AluOp(1, 1, AluOp::v,"UBYTE1_FLT")}, + {op1_ubyte2_flt ,AluOp(1, 1, AluOp::v,"UBYTE2_FLT")}, + {op1_ubyte3_flt ,AluOp(1, 1, AluOp::v,"UBYTE3_FLT")}, + {op1_uint_to_flt ,AluOp(1, 0, AluOp::t,"UINT_TO_FLT")}, + {op1_ffbh_uint ,AluOp(1, 0, AluOp::v,"FFBH_UINT")}, + {op1_ffbl_int ,AluOp(1, 0, AluOp::v,"FFBL_INT")}, + {op1_ffbh_int ,AluOp(1, 0, AluOp::v,"FFBH_INT")}, + {op1_flt_to_uint4 ,AluOp(1, 1, AluOp::v,"FLT_TO_UINT4")}, + {op1v_flt32_to_flt64 ,AluOp(1, 1, AluOp::a,"FLT32_TO_FLT64")}, + {op1v_flt64_to_flt32 ,AluOp(1, 1, AluOp::v,"FLT64_TO_FLT32")}, + + {op2_add ,AluOp(2, 1, AluOp::a,"ADD")}, + {op2_bfm_int ,AluOp(2, 0, AluOp::v,"BFM_INT")}, + {op2_mul ,AluOp(2, 1, AluOp::a,"MUL")}, + {op2_mul_ieee ,AluOp(2, 1, AluOp::a,"MUL_IEEE")}, + {op2_max ,AluOp(2, 1, AluOp::a,"MAX")}, + {op2_min ,AluOp(2, 1, AluOp::a,"MIN")}, + {op2_max_dx10 ,AluOp(2, 1, AluOp::a,"MAX_DX10")}, + {op2_min_dx10 ,AluOp(2, 1, AluOp::a,"MIN_DX10")}, + {op2_sete ,AluOp(2, 1, AluOp::a,"SETE")}, + {op2_setgt ,AluOp(2, 1, AluOp::a,"SETGT")}, + {op2_setge ,AluOp(2, 1, AluOp::a,"SETGE")}, + {op2_setne ,AluOp(2, 1, AluOp::a,"SETNE")}, + {op2_sete_dx10 ,AluOp(2, 1, AluOp::a,"SETE_DX10")}, + {op2_setgt_dx10 ,AluOp(2, 1, AluOp::a,"SETGT_DX10")}, + {op2_setge_dx10 ,AluOp(2, 1, AluOp::a,"SETGE_DX10")}, + {op2_setne_dx10 ,AluOp(2, 1, AluOp::a,"SETNE_DX10")}, + {op2_ashr_int ,AluOp(2, 0, AluOp::a,"ASHR_INT")}, + {op2_lshr_int ,AluOp(2, 0, AluOp::a,"LSHR_INT")}, + {op2_lshl_int ,AluOp(2, 0, AluOp::a,"LSHL_INT")}, + {op2_mul_64 ,AluOp(2, 1, AluOp::a,"MUL_64")}, + {op2_pred_setgt_uint ,AluOp(2, 0, AluOp::a,"PRED_SETGT_UINT")}, + {op2_pred_setge_uint ,AluOp(2, 0, AluOp::a,"PRED_SETGE_UINT")}, + {op2_pred_sete ,AluOp(2, 1, AluOp::a,"PRED_SETE")}, + {op2_pred_setgt ,AluOp(2, 1, AluOp::a,"PRED_SETGT")}, + {op2_pred_setge ,AluOp(2, 1, AluOp::a,"PRED_SETGE")}, + {op2_pred_setne ,AluOp(2, 1, AluOp::a,"PRED_SETNE")}, + {op2_pred_set_pop ,AluOp(2, 1, AluOp::a,"PRED_SET_POP")}, + {op2_pred_sete_push ,AluOp(2, 1, AluOp::a,"PRED_SETE_PUSH")}, + {op2_pred_setgt_push ,AluOp(2, 1, AluOp::a,"PRED_SETGT_PUSH")}, + {op2_pred_setge_push ,AluOp(2, 1, AluOp::a,"PRED_SETGE_PUSH")}, + {op2_pred_setne_push ,AluOp(2, 1, AluOp::a,"PRED_SETNE_PUSH")}, + {op2_kille ,AluOp(2, 1, AluOp::a,"KILLE")}, + {op2_killgt ,AluOp(2, 1, AluOp::a,"KILLGT")}, + {op2_killge ,AluOp(2, 1, AluOp::a,"KILLGE")}, + {op2_killne ,AluOp(2, 1, AluOp::a,"KILLNE")}, + {op2_and_int ,AluOp(2, 0, AluOp::a,"AND_INT")}, + {op2_or_int ,AluOp(2, 0, AluOp::a,"OR_INT")}, + {op2_xor_int ,AluOp(2, 0, AluOp::a,"XOR_INT")}, + {op2_add_int ,AluOp(2, 0, AluOp::a,"ADD_INT")}, + {op2_sub_int ,AluOp(2, 0, AluOp::a,"SUB_INT")}, + {op2_max_int ,AluOp(2, 0, AluOp::a,"MAX_INT")}, + {op2_min_int ,AluOp(2, 0, AluOp::a,"MIN_INT")}, + {op2_max_uint ,AluOp(2, 0, AluOp::a,"MAX_UINT")}, + {op2_min_uint ,AluOp(2, 0, AluOp::a,"MIN_UINT")}, + {op2_sete_int ,AluOp(2, 0, AluOp::a,"SETE_INT")}, + {op2_setgt_int ,AluOp(2, 0, AluOp::a,"SETGT_INT")}, + {op2_setge_int ,AluOp(2, 0, AluOp::a,"SETGE_INT")}, + {op2_setne_int ,AluOp(2, 0, AluOp::a,"SETNE_INT")}, + {op2_setgt_uint ,AluOp(2, 0, AluOp::a,"SETGT_UINT")}, + {op2_setge_uint ,AluOp(2, 0, AluOp::a,"SETGE_UINT")}, + {op2_killgt_uint ,AluOp(2, 0, AluOp::a,"KILLGT_UINT")}, + {op2_killge_uint ,AluOp(2, 0, AluOp::a,"KILLGE_UINT")}, + {op2_prede_int ,AluOp(2, 0, AluOp::a,"PREDE_INT")}, + {op2_pred_setgt_int ,AluOp(2, 0, AluOp::a,"PRED_SETGT_INT")}, + {op2_pred_setge_int ,AluOp(2, 0, AluOp::a,"PRED_SETGE_INT")}, + {op2_pred_setne_int ,AluOp(2, 0, AluOp::a,"PRED_SETNE_INT")}, + {op2_kille_int ,AluOp(2, 0, AluOp::a,"KILLE_INT")}, + {op2_killgt_int ,AluOp(2, 0, AluOp::a,"KILLGT_INT")}, + {op2_killge_int ,AluOp(2, 0, AluOp::a,"KILLGE_INT")}, + {op2_killne_int ,AluOp(2, 0, AluOp::a,"KILLNE_INT")}, + {op2_pred_sete_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETE_PUSH_INT")}, + {op2_pred_setgt_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETGT_PUSH_INT")}, + {op2_pred_setge_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETGE_PUSH_INT")}, + {op2_pred_setne_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETNE_PUSH_INT")}, + {op2_pred_setlt_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETLT_PUSH_INT")}, + {op2_pred_setle_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETLE_PUSH_INT")}, + {op2_addc_uint ,AluOp(2, 0, AluOp::a,"ADDC_UINT")}, + {op2_subb_uint ,AluOp(2, 0, AluOp::a,"SUBB_UINT")}, + {op2_set_mode ,AluOp(2, 0, AluOp::a,"SET_MODE")}, + {op2_set_lds_size ,AluOp(2, 0, AluOp::a,"SET_LDS_SIZE")}, + {op2_mullo_int ,AluOp(2, 0, AluOp::t,"MULLO_INT")}, + {op2_mulhi_int ,AluOp(2, 0, AluOp::t,"MULHI_INT")}, + {op2_mullo_uint ,AluOp(2, 0, AluOp::t,"MULLO_UINT")}, + {op2_mulhi_uint ,AluOp(2, 0, AluOp::t,"MULHI_UINT")}, + {op2_dot_ieee ,AluOp(2, 1, AluOp::v,"DOT_IEEE")}, + {op2_mulhi_uint24 ,AluOp(2, 0, AluOp::v,"MULHI_UINT24")}, + {op2_mul_uint24 ,AluOp(2, 0, AluOp::v,"MUL_UINT24")}, + {op2_sete_64 ,AluOp(2, 1, AluOp::v,"SETE_64")}, + {op2_setne_64 ,AluOp(2, 1, AluOp::v,"SETNE_64")}, + {op2_setgt_64 ,AluOp(2, 1, AluOp::v,"SETGT_64")}, + {op2_setge_64 ,AluOp(2, 1, AluOp::v,"SETGE_64")}, + {op2_min_64 ,AluOp(2, 1, AluOp::v,"MIN_64")}, + {op2_max_64 ,AluOp(2, 1, AluOp::v,"MAX_64")}, + {op2_dot4 ,AluOp(2, 1, AluOp::v,"DOT4")}, + {op2_dot4_ieee ,AluOp(2, 1, AluOp::v,"DOT4_IEEE")}, + {op2_cube ,AluOp(2, 1, AluOp::v,"CUBE")}, + {op2_pred_setgt_64 ,AluOp(2, 1, AluOp::v,"PRED_SETGT_64")}, + {op2_pred_sete_64 ,AluOp(2, 1, AluOp::v,"PRED_SETE_64")}, + {op2_pred_setge_64 ,AluOp(2, 1, AluOp::v,"PRED_SETGE_64")}, + {OP2V_MUL_64 ,AluOp(2, 1, AluOp::v,"MUL_64")}, + {op2_add_64 ,AluOp(2, 1, AluOp::v,"ADD_64")}, + {op2_sad_accum_prev_uint ,AluOp(2, 0, AluOp::v,"SAD_ACCUM_PREV_UINT")}, + {op2_dot ,AluOp(2, 1, AluOp::v,"DOT")}, + {op2_mul_prev ,AluOp(2, 1, AluOp::v,"MUL_PREV")}, + {op2_mul_ieee_prev ,AluOp(2, 1, AluOp::v,"MUL_IEEE_PREV")}, + {op2_add_prev ,AluOp(2, 1, AluOp::v,"ADD_PREV")}, + {op2_muladd_prev ,AluOp(2, 1, AluOp::v,"MULADD_PREV")}, + {op2_muladd_ieee_prev ,AluOp(2, 1, AluOp::v,"MULADD_IEEE_PREV")}, + {op2_interp_xy ,AluOp(2, 1, AluOp::v,"INTERP_XY")}, + {op2_interp_zw ,AluOp(2, 1, AluOp::v,"INTERP_ZW")}, + {op2_interp_x ,AluOp(2, 1, AluOp::v,"INTERP_X")}, + {op2_interp_z ,AluOp(2, 1, AluOp::v,"INTERP_Z")}, + + {op3_bfe_uint ,AluOp(3, 0, AluOp::v,"BFE_UINT")}, + {op3_bfe_int ,AluOp(3, 0, AluOp::v,"BFE_INT")}, + {op3_bfi_int ,AluOp(3, 0, AluOp::v,"BFI_INT")}, + {op3_fma ,AluOp(3, 1, AluOp::v,"FMA")}, + {op3_cndne_64 ,AluOp(3, 1, AluOp::v,"CNDNE_64")}, + {op3_fma_64 ,AluOp(3, 1, AluOp::v,"FMA_64")}, + {op3_lerp_uint ,AluOp(3, 0, AluOp::v,"LERP_UINT")}, + {op3_bit_align_int ,AluOp(3, 0, AluOp::v,"BIT_ALIGN_INT")}, + {op3_byte_align_int ,AluOp(3, 0, AluOp::v,"BYTE_ALIGN_INT")}, + {op3_sad_accum_uint ,AluOp(3, 0, AluOp::v,"SAD_ACCUM_UINT")}, + {op3_sad_accum_hi_uint ,AluOp(3, 0, AluOp::v,"SAD_ACCUM_HI_UINT")}, + {op3_muladd_uint24 ,AluOp(3, 0, AluOp::v,"MULADD_UINT24")}, + {op3_lds_idx_op ,AluOp(3, 0, AluOp::x,"LDS_IDX_OP")}, + {op3_muladd ,AluOp(3, 1, AluOp::a,"MULADD")}, + {op3_muladd_m2 ,AluOp(3, 1, AluOp::a,"MULADD_M2")}, + {op3_muladd_m4 ,AluOp(3, 1, AluOp::a,"MULADD_M4")}, + {op3_muladd_d2 ,AluOp(3, 1, AluOp::a,"MULADD_D2")}, + {op3_muladd_ieee ,AluOp(3, 1, AluOp::a,"MULADD_IEEE")}, + {op3_cnde ,AluOp(3, 1, AluOp::a,"CNDE")}, + {op3_cndgt ,AluOp(3, 1, AluOp::a,"CNDGT")}, + {op3_cndge ,AluOp(3, 1, AluOp::a,"CNDGE")}, + {op3_cnde_int ,AluOp(3, 0, AluOp::a,"CNDE_INT")}, + {op3_cndgt_int ,AluOp(3, 0, AluOp::a,"CNDGT_INT")}, + {op3_cndge_int ,AluOp(3, 0, AluOp::a,"CNDGE_INT")}, + {op3_mul_lit ,AluOp(3, 1, AluOp::t,"MUL_LIT")} +}; + +const std::map alu_src_const = { + {ALU_SRC_LDS_OQ_A, {false, "LDS_OQ_A"}}, + {ALU_SRC_LDS_OQ_B, {false, "LDS_OQ_B"}}, + {ALU_SRC_LDS_OQ_A_POP, {false, "LDS_OQ_A_POP"}}, + {ALU_SRC_LDS_OQ_B_POP, {false, "LDS_OQ_B_POP"}}, + {ALU_SRC_LDS_DIRECT_A, {false, "LDS_DIRECT_A"}}, + {ALU_SRC_LDS_DIRECT_B, {false, "LDS_DIRECT_B"}}, + {ALU_SRC_TIME_HI, {false, "TIME_HI"}}, + {ALU_SRC_TIME_LO, {false, "TIME_LO"}}, + {ALU_SRC_MASK_HI, {false, "MASK_HI"}}, + {ALU_SRC_MASK_LO, {false, "MASK_LO"}}, + {ALU_SRC_HW_WAVE_ID, {false, "HW_WAVE_ID"}}, + {ALU_SRC_SIMD_ID, {false, "SIMD_ID"}}, + {ALU_SRC_SE_ID, {false, "SE_ID"}}, + {ALU_SRC_HW_THREADGRP_ID, {false, "HW_THREADGRP_ID"}}, + {ALU_SRC_WAVE_ID_IN_GRP, {false, "WAVE_ID_IN_GRP"}}, + {ALU_SRC_NUM_THREADGRP_WAVES, {false, "NUM_THREADGRP_WAVES"}}, + {ALU_SRC_HW_ALU_ODD, {false, "HW_ALU_ODD"}}, + {ALU_SRC_LOOP_IDX, {false, "LOOP_IDX"}}, + {ALU_SRC_PARAM_BASE_ADDR, {false, "PARAM_BASE_ADDR"}}, + {ALU_SRC_NEW_PRIM_MASK, {false, "NEW_PRIM_MASK"}}, + {ALU_SRC_PRIM_MASK_HI, {false, "PRIM_MASK_HI"}}, + {ALU_SRC_PRIM_MASK_LO, {false, "PRIM_MASK_LO"}}, + {ALU_SRC_1_DBL_L, {false, "1.0L"}}, + {ALU_SRC_1_DBL_M, {false, "1.0H"}}, + {ALU_SRC_0_5_DBL_L, {false, "0.5L"}}, + {ALU_SRC_0_5_DBL_M, {false, "0.5H"}}, + {ALU_SRC_0, {false, "0"}}, + {ALU_SRC_1, {false, "1.0"}}, + {ALU_SRC_1_INT, {false, "1"}}, + {ALU_SRC_M_1_INT, {false, "-1"}}, + {ALU_SRC_0_5, {false, "0.5"}}, + {ALU_SRC_LITERAL, {true, "ALU_SRC_LITERAL"}}, + {ALU_SRC_PV, {true, "PV"}}, + {ALU_SRC_PS, {false, "PS"}} +}; + +const std::map lds_ops = { + {DS_OP_ADD , {2, "DS_ADD"}}, + {DS_OP_SUB , {2, "DS_SUB"}}, + {DS_OP_RSUB , {2, "DS_RSUB"}}, + {DS_OP_INC , {2, "DS_INC"}}, + {DS_OP_DEC , {2, "DS_DEC"}}, + {DS_OP_MIN_INT , {2, "DS_MIN_INT"}}, + {DS_OP_MAX_INT , {2, "DS_MAX_INT"}}, + {DS_OP_MIN_UINT , {2, "DS_MIN_UINT"}}, + {DS_OP_MAX_UINT , {2, "DS_MAX_UINT"}}, + {DS_OP_AND , {2, "DS_AND"}}, + {DS_OP_OR , {2, "DS_OR"}}, + {DS_OP_XOR , {2, "DS_XOR"}}, + {DS_OP_MSKOR , {3, "DS_MSKOR"}}, + {DS_OP_WRITE , {2, "DS_WRITE"}}, + {DS_OP_WRITE_REL , {3, "DS_WRITE_REL"}}, + {DS_OP_WRITE2 , {3, "DS_WRITE2"}}, + {DS_OP_CMP_STORE , {3, "DS_CMP_STORE"}}, + {DS_OP_CMP_STORE_SPF , {3, "DS_CMP_STORE_SPF"}}, + {DS_OP_BYTE_WRITE , {2, "DS_BYTE_WRITE"}}, + {DS_OP_SHORT_WRITE , {2, "DS_SHORT_WRITE"}}, + {DS_OP_ADD_RET , {2, "DS_ADD_RET"}}, + {DS_OP_SUB_RET , {2, "DS_SUB_RET"}}, + {DS_OP_RSUB_RET , {2, "DS_RSUB_RET"}}, + {DS_OP_INC_RET , {2, "DS_INC_RET"}}, + {DS_OP_DEC_RET , {2, "DS_DEC_RET"}}, + {DS_OP_MIN_INT_RET , {2, "DS_MIN_INT_RET"}}, + {DS_OP_MAX_INT_RET , {2, "DS_MAX_INT_RET"}}, + {DS_OP_MIN_UINT_RET , {2, "DS_MIN_UINT_RET"}}, + {DS_OP_MAX_UINT_RET , {2, "DS_MAX_UINT_RET"}}, + {DS_OP_AND_RET , {2, "DS_AND_RET"}}, + {DS_OP_OR_RET , {2, "DS_OR_RET"}}, + {DS_OP_XOR_RET , {2, "DS_XOR_RET"}}, + {DS_OP_MSKOR_RET , {3, "DS_MSKOR_RET"}}, + {DS_OP_XCHG_RET , {2, "DS_XCHG_RET"}}, + {DS_OP_XCHG_REL_RET , {3, "DS_XCHG_REL_RET"}}, + {DS_OP_XCHG2_RET , {3, "DS_XCHG2_RET"}}, + {DS_OP_CMP_XCHG_RET , {3, "DS_CMP_XCHG_RET"}}, + {DS_OP_CMP_XCHG_SPF_RET, {3, "DS_CMP_XCHG_SPF_RET"}}, + {DS_OP_READ_RET , {1, "DS_READ_RET"}}, + {DS_OP_READ_REL_RET , {1, "DS_READ_REL_RET"}}, + {DS_OP_READ2_RET , {2, "DS_READ2_RET"}}, + {DS_OP_READWRITE_RET , {3, "DS_READWRITE_RET"}}, + {DS_OP_BYTE_READ_RET , {1, "DS_BYTE_READ_RET"}}, + {DS_OP_UBYTE_READ_RET, {1, "DS_UBYTE_READ_RET"}}, + {DS_OP_SHORT_READ_RET, {1, "DS_SHORT_READ_RET"}}, + {DS_OP_USHORT_READ_RET, {1, "DS_USHORT_READ_RET"}}, + {DS_OP_ATOMIC_ORDERED_ALLOC_RET , {3, "DS_ATOMIC_ORDERED_ALLOC_RET"}} +}; + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_alu_defines.h b/src/gallium/drivers/r600/sfn/sfn_alu_defines.h new file mode 100644 index 00000000000..3d270bd2deb --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_alu_defines.h @@ -0,0 +1,377 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef r600_sfn_alu_defines_h +#define r600_sfn_alu_defines_h + +#include +#include + +namespace r600 { + +/* ALU op2 instructions 17:7 top three bits alwayss zero. */ +enum EAluOp { + op2_add = 0, + op2_mul = 1, + op2_mul_ieee = 2, + op2_max = 3, + op2_min = 4, + op2_max_dx10 = 5, + op2_min_dx10 = 6, + op2_sete = 8, + op2_setgt = 9, + op2_setge = 10, + op2_setne = 11, + op2_sete_dx10 = 12, + op2_setgt_dx10 = 13, + op2_setge_dx10 = 14, + op2_setne_dx10 = 15, + op1_fract = 16, + op1_trunc = 17, + op1_ceil = 18, + op1_rndne = 19, + op1_floor = 20, + op2_ashr_int = 21, + op2_lshr_int = 22, + op2_lshl_int = 23, + op1_mov = 25, + op0_nop = 26, + op2_mul_64 = 27, + op1_flt64_to_flt32 = 28, + op1v_flt32_to_flt64 = 29, + op2_pred_setgt_uint = 30, + op2_pred_setge_uint = 31, + op2_pred_sete = 32, + op2_pred_setgt = 33, + op2_pred_setge = 34, + op2_pred_setne = 35, + op1_pred_set_inv = 36, + op2_pred_set_pop = 37, + op0_pred_set_clr = 38, + op1_pred_set_restore = 39, + op2_pred_sete_push = 40, + op2_pred_setgt_push = 41, + op2_pred_setge_push = 42, + op2_pred_setne_push = 43, + op2_kille = 44, + op2_killgt = 45, + op2_killge = 46, + op2_killne = 47, + op2_and_int = 48, + op2_or_int = 49, + op2_xor_int = 50, + op1_not_int = 51, + op2_add_int = 52, + op2_sub_int = 53, + op2_max_int = 54, + op2_min_int = 55, + op2_max_uint = 56, + op2_min_uint = 57, + op2_sete_int = 58, + op2_setgt_int = 59, + op2_setge_int = 60, + op2_setne_int = 61, + op2_setgt_uint = 62, + op2_setge_uint = 63, + op2_killgt_uint = 64, + op2_killge_uint = 65, + op2_prede_int = 66, + op2_pred_setgt_int = 67, + op2_pred_setge_int = 68, + op2_pred_setne_int = 69, + op2_kille_int = 70, + op2_killgt_int = 71, + op2_killge_int = 72, + op2_killne_int = 73, + op2_pred_sete_push_int = 74, + op2_pred_setgt_push_int = 75, + op2_pred_setge_push_int = 76, + op2_pred_setne_push_int = 77, + op2_pred_setlt_push_int = 78, + op2_pred_setle_push_int = 79, + op1_flt_to_int = 80, + op1_bfrev_int = 81, + op2_addc_uint = 82, + op2_subb_uint = 83, + op0_group_barrier = 84, + op0_group_seq_begin = 85, + op0_group_seq_end = 86, + op2_set_mode = 87, + op1_set_cf_idx0 = 88, + op1_set_cf_idx1 = 89, + op2_set_lds_size = 90, + op1_exp_ieee = 129, + op1_log_clamped = 130, + op1_log_ieee = 131, + op1_recip_clamped = 132, + op1_recip_ff = 133, + op1_recip_ieee = 134, + op1_recipsqrt_clamped = 135, + op1_recipsqrt_ff = 136, + op1_recipsqrt_ieee1 = 137, + op1_sqrt_ieee = 138, + op1_sin = 141, + op1_cos = 142, + op2_mullo_int = 143, + op2_mulhi_int = 144, + op2_mullo_uint = 145, + op2_mulhi_uint = 146, + op1_recip_int = 147, + op1_recip_uint = 148, + op1_recip_64 = 149, + op1_recip_clamped_64 = 150, + op1_recipsqrt_64 = 151, + op1_recipsqrt_clamped_64 = 152, + op1_sqrt_64 = 153, + op1_flt_to_uint = 154, + op1_int_to_flt = 155, + op1_uint_to_flt = 156, + op2_bfm_int = 160, + op1_flt32_to_flt16 = 162, + op1_flt16_to_flt32 = 163, + op1_ubyte0_flt = 164, + op1_ubyte1_flt = 165, + op1_ubyte2_flt = 166, + op1_ubyte3_flt = 167, + op1_bcnt_int = 170, + op1_ffbh_uint = 171, + op1_ffbl_int = 172, + op1_ffbh_int = 173, + op1_flt_to_uint4 = 174, + op2_dot_ieee = 175, + op1_flt_to_int_rpi = 176, + op1_flt_to_int_floor = 177, + op2_mulhi_uint24 = 178, + op1_mbcnt_32hi_int = 179, + op1_offset_to_flt = 180, + op2_mul_uint24 = 181, + op1_bcnt_accum_prev_int = 182, + op1_mbcnt_32lo_accum_prev_int = 183, + op2_sete_64 = 184, + op2_setne_64 = 185, + op2_setgt_64 = 186, + op2_setge_64 = 187, + op2_min_64 = 188, + op2_max_64 = 189, + op2_dot4 = 190, + op2_dot4_ieee = 191, + op2_cube = 192, + op1_max4 = 193, + op1_frexp_64 = 196, + op1_ldexp_64 = 197, + op1_fract_64 = 198, + op2_pred_setgt_64 = 199, + op2_pred_sete_64 = 198, + op2_pred_setge_64 = 201, + OP2V_MUL_64 = 202, + op2_add_64 = 203, + op1_mova_int = 204, + op1v_flt64_to_flt32 = 205, + op1_flt32_to_flt64 = 206, + op2_sad_accum_prev_uint = 207, + op2_dot = 208, + op2_mul_prev = 209, + op2_mul_ieee_prev = 210, + op2_add_prev = 211, + op2_muladd_prev = 212, + op2_muladd_ieee_prev = 213, + op2_interp_xy = 214, + op2_interp_zw = 215, + op2_interp_x = 216, + op2_interp_z = 217, + op0_store_flags = 218, + op1_load_store_flags = 219, + op0_lds_1a = 220, + op0_lds_1a1d = 221, + op0_lds_2a = 223, + op1_interp_load_p0 = 224, + op1_interp_load_p10 = 125, + op1_interp_load_p20 = 126, + // op 3 all left shift 6 + op3_bfe_uint = 4<< 6, + op3_bfe_int = 5<< 6, + op3_bfi_int = 6<< 6, + op3_fma = 7<< 6, + op3_cndne_64 = 9<< 6, + op3_fma_64 = 10<< 6, + op3_lerp_uint = 11<< 6, + op3_bit_align_int = 12<< 6, + op3_byte_align_int = 13<< 6, + op3_sad_accum_uint = 14<< 6, + op3_sad_accum_hi_uint = 15<< 6, + op3_muladd_uint24 = 16<< 6, + op3_lds_idx_op = 17<< 6, + op3_muladd = 20<< 6, + op3_muladd_m2 = 21<< 6, + op3_muladd_m4 = 22<< 6, + op3_muladd_d2 = 23<< 6, + op3_muladd_ieee = 24<< 6, + op3_cnde = 25<< 6, + op3_cndgt = 26<< 6, + op3_cndge = 27<< 6, + op3_cnde_int = 28<< 6, + op3_cndgt_int = 29<< 6, + op3_cndge_int = 30<< 6, + op3_mul_lit = 31<< 6 +}; + + + +using AluOpFlags=std::bitset<32>; + +struct AluOp { + static constexpr int x = 1; + static constexpr int y = 2; + static constexpr int z = 4; + static constexpr int w = 8; + static constexpr int v = 15; + static constexpr int t = 16; + static constexpr int a = 31; + + AluOp(int ns, int f, int um, const char *n): + nsrc(ns), is_float(f), unit_mask(um), name(n) + { + } + + bool can_channel(int flags) const { + return flags & unit_mask; + } + + int nsrc: 4; + int is_float:1; + int unit_mask: 5; + const char *name; +}; + +extern const std::map alu_ops; + +enum AluInlineConstants { + ALU_SRC_LDS_OQ_A = 219, + ALU_SRC_LDS_OQ_B = 220, + ALU_SRC_LDS_OQ_A_POP = 221, + ALU_SRC_LDS_OQ_B_POP = 222, + ALU_SRC_LDS_DIRECT_A = 223, + ALU_SRC_LDS_DIRECT_B = 224, + ALU_SRC_TIME_HI = 227, + ALU_SRC_TIME_LO = 228, + ALU_SRC_MASK_HI = 229, + ALU_SRC_MASK_LO = 230, + ALU_SRC_HW_WAVE_ID = 231, + ALU_SRC_SIMD_ID = 232, + ALU_SRC_SE_ID = 233, + ALU_SRC_HW_THREADGRP_ID = 234, + ALU_SRC_WAVE_ID_IN_GRP = 235, + ALU_SRC_NUM_THREADGRP_WAVES = 236, + ALU_SRC_HW_ALU_ODD = 237, + ALU_SRC_LOOP_IDX = 238, + ALU_SRC_PARAM_BASE_ADDR = 240, + ALU_SRC_NEW_PRIM_MASK = 241, + ALU_SRC_PRIM_MASK_HI = 242, + ALU_SRC_PRIM_MASK_LO = 243, + ALU_SRC_1_DBL_L = 244, + ALU_SRC_1_DBL_M = 245, + ALU_SRC_0_5_DBL_L = 246, + ALU_SRC_0_5_DBL_M = 247, + ALU_SRC_0 = 248, + ALU_SRC_1 = 249, + ALU_SRC_1_INT = 250, + ALU_SRC_M_1_INT = 251, + ALU_SRC_0_5 = 252, + ALU_SRC_LITERAL = 253, + ALU_SRC_PV = 254, + ALU_SRC_PS = 255, + ALU_SRC_PARAM_BASE = 0x1C0, + ALU_SRC_UNKNOWN +}; + +struct AluInlineConstantDescr { + bool use_chan; + const char *descr; +}; + +extern const std::map alu_src_const; + +enum ESDOp { + DS_OP_ADD = 0, + DS_OP_SUB = 1, + DS_OP_RSUB = 2, + DS_OP_INC = 3, + DS_OP_DEC = 4, + DS_OP_MIN_INT = 5, + DS_OP_MAX_INT = 6, + DS_OP_MIN_UINT = 7, + DS_OP_MAX_UINT = 8, + DS_OP_AND = 9, + DS_OP_OR = 10, + DS_OP_XOR = 11, + DS_OP_MSKOR = 12, + DS_OP_WRITE = 13, + DS_OP_WRITE_REL = 14, + DS_OP_WRITE2 = 15, + DS_OP_CMP_STORE = 16, + DS_OP_CMP_STORE_SPF = 17, + DS_OP_BYTE_WRITE = 18, + DS_OP_SHORT_WRITE = 19, + DS_OP_ADD_RET = 32, + DS_OP_SUB_RET = 33, + DS_OP_RSUB_RET = 34, + DS_OP_INC_RET = 35, + DS_OP_DEC_RET = 36, + DS_OP_MIN_INT_RET = 37, + DS_OP_MAX_INT_RET = 38, + DS_OP_MIN_UINT_RET = 39, + DS_OP_MAX_UINT_RET = 40, + DS_OP_AND_RET = 41, + DS_OP_OR_RET = 42, + DS_OP_XOR_RET = 43, + DS_OP_MSKOR_RET = 44, + DS_OP_XCHG_RET = 45, + DS_OP_XCHG_REL_RET = 46, + DS_OP_XCHG2_RET = 47, + DS_OP_CMP_XCHG_RET = 48, + DS_OP_CMP_XCHG_SPF_RET = 49, + DS_OP_READ_RET = 50, + DS_OP_READ_REL_RET = 51, + DS_OP_READ2_RET = 52, + DS_OP_READWRITE_RET = 53, + DS_OP_BYTE_READ_RET = 54, + DS_OP_UBYTE_READ_RET = 55, + DS_OP_SHORT_READ_RET = 56, + DS_OP_USHORT_READ_RET = 57, + DS_OP_ATOMIC_ORDERED_ALLOC_RET = 63, + DS_OP_INVALID = 64 +}; + +struct LDSOp { + int nsrc; + const char *name; +}; + +extern const std::map lds_ops; + +} + +#endif // ALU_DEFINES_H diff --git a/src/gallium/drivers/r600/sfn/sfn_callstack.cpp b/src/gallium/drivers/r600/sfn/sfn_callstack.cpp new file mode 100644 index 00000000000..da423a657fa --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_callstack.cpp @@ -0,0 +1,111 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_callstack.h" + +namespace r600 { + +CallStack::CallStack(r600_bytecode& bc): + m_bc(bc) +{ + +} + +CallStack::~CallStack() +{ +} + +int CallStack::push(unsigned type) +{ + switch (type) { + case FC_PUSH_VPM: + ++m_bc.stack.push; + break; + case FC_PUSH_WQM: + ++m_bc.stack.push_wqm; + break; + case FC_LOOP: + ++m_bc.stack.loop; + break; + default: + assert(0); + } + + return update_max_depth(type); +} + +void CallStack::pop(unsigned type) +{ + switch(type) { + case FC_PUSH_VPM: + --m_bc.stack.push; + assert(m_bc.stack.push >= 0); + break; + case FC_PUSH_WQM: + --m_bc.stack.push_wqm; + assert(m_bc.stack.push_wqm >= 0); + break; + case FC_LOOP: + --m_bc.stack.loop; + assert(m_bc.stack.loop >= 0); + break; + default: + assert(0); + break; + } +} + +int CallStack::update_max_depth(unsigned type) +{ + + r600_stack_info& stack = m_bc.stack; + int elements; + int entries; + + int entry_size = stack.entry_size; + + elements = (stack.loop + stack.push_wqm ) * entry_size; + elements += stack.push; + + /* These next three lines are EVERGREEN specific and should + * be moved to a virtual function when other chipsets are to + * be supported */ + assert(m_bc.chip_class == EVERGREEN); + if (type == FC_PUSH_VPM || stack.push > 0) { + elements += 1; + } + + entry_size = 4; + + entries = (elements + (entry_size - 1)) / entry_size; + + if (entries > stack.max_entries) + stack.max_entries = entries; + + return elements; +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_callstack.h b/src/gallium/drivers/r600/sfn/sfn_callstack.h new file mode 100644 index 00000000000..e1babb7c16e --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_callstack.h @@ -0,0 +1,47 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_CALLSTACK_HH +#define SFN_CALLSTACK_HH + +#include "gallium/drivers/r600/r600_asm.h" + +namespace r600 { + +class CallStack { +public: + CallStack(r600_bytecode& bc); + ~CallStack(); + int push(unsigned type); + void pop(unsigned type); + int update_max_depth(unsigned type); +private: + r600_bytecode& m_bc; +}; + +} + +#endif // SFN_CALLSTACK_HH diff --git a/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.cpp b/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.cpp new file mode 100644 index 00000000000..e2346cca70c --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.cpp @@ -0,0 +1,195 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_conditionaljumptracker.h" +#include "sfn_debug.h" + +#include +#include +#include +#include + +namespace r600 { + +using std::stack; +using std::vector; +using std::shared_ptr; + +struct StackFrame { + + StackFrame(r600_bytecode_cf *s, JumpType t): + type(t), + start(s) + {} + + virtual ~StackFrame(); + + JumpType type; + r600_bytecode_cf *start; + vector mid; + + virtual void fixup_mid(r600_bytecode_cf *cf) = 0; + virtual void fixup_pop(r600_bytecode_cf *final) = 0; +}; + +using PStackFrame = shared_ptr; + +struct IfFrame : public StackFrame { + IfFrame(r600_bytecode_cf *s); + void fixup_mid(r600_bytecode_cf *cf) override; + void fixup_pop(r600_bytecode_cf *final) override; +}; + +struct LoopFrame : public StackFrame { + LoopFrame(r600_bytecode_cf *s); + void fixup_mid(r600_bytecode_cf *cf) override; + void fixup_pop(r600_bytecode_cf *final) override; +}; + +struct ConditionalJumpTrackerImpl { + ConditionalJumpTrackerImpl(); + stack m_jump_stack; + stack m_loop_stack; + int m_current_loop_stack_pos; +}; + +ConditionalJumpTrackerImpl::ConditionalJumpTrackerImpl(): + m_current_loop_stack_pos(0) +{ + +} + +ConditionalJumpTracker::~ConditionalJumpTracker() +{ + delete impl; +} + +ConditionalJumpTracker::ConditionalJumpTracker() +{ + impl = new ConditionalJumpTrackerImpl(); +} + +void ConditionalJumpTracker::push(r600_bytecode_cf *start, JumpType type) +{ + PStackFrame f; + switch (type) { + case jt_if: + f.reset(new IfFrame(start)); + break; + case jt_loop: + f.reset(new LoopFrame(start)); + impl->m_loop_stack.push(f); + break; + } + impl->m_jump_stack.push(f); +} + +bool ConditionalJumpTracker::pop(r600_bytecode_cf *final, JumpType type) +{ + if (impl->m_jump_stack.empty()) + return false; + + auto& frame = *impl->m_jump_stack.top(); + if (frame.type != type) + return false; + + frame.fixup_pop(final); + if (frame.type == jt_loop) + impl->m_loop_stack.pop(); + impl->m_jump_stack.pop(); + return true; +} + +bool ConditionalJumpTracker::add_mid(r600_bytecode_cf *source, JumpType type) +{ + if (impl->m_jump_stack.empty()) { + sfn_log << "Jump stack empty\n"; + return false; + } + + PStackFrame pframe; + if (type == jt_loop) { + if (impl->m_loop_stack.empty()) { + sfn_log << "Loop jump stack empty\n"; + return false; + } + pframe = impl->m_loop_stack.top(); + } else { + pframe = impl->m_jump_stack.top(); + } + + pframe->mid.push_back(source); + pframe->fixup_mid(source); + return true; +} + +IfFrame::IfFrame(r600_bytecode_cf *s): + StackFrame (s, jt_if) +{ +} + +StackFrame::~StackFrame() +{ +} + +void IfFrame::fixup_mid(r600_bytecode_cf *source) +{ + /* JUMP target is ELSE */ + start->cf_addr = source->id; +} + +void IfFrame::fixup_pop(r600_bytecode_cf *final) +{ + /* JUMP or ELSE target is one past last CF instruction */ + unsigned offset = final->eg_alu_extended ? 4 : 2; + auto src = mid.empty() ? start : mid[0]; + src->cf_addr = final->id + offset; + src->pop_count = 1; +} + +LoopFrame::LoopFrame(r600_bytecode_cf *s): + StackFrame(s, jt_loop) +{ +} + +void LoopFrame::fixup_mid(UNUSED r600_bytecode_cf *mid) +{ +} + +void LoopFrame::fixup_pop(r600_bytecode_cf *final) +{ + /* LOOP END addess is past LOOP START */ + final->cf_addr = start->id + 2; + + /* LOOP START addess is past LOOP END*/ + start->cf_addr = final->id + 2; + + /* BREAK amd CONINUE point at LOOP END*/ + for (auto m : mid) + m->cf_addr = final->id; +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h b/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h new file mode 100644 index 00000000000..76cc02a2705 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h @@ -0,0 +1,69 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_CONDITIONALJUMPTRACKER_H +#define SFN_CONDITIONALJUMPTRACKER_H + +#include "gallium/drivers/r600/r600_asm.h" + +namespace r600 { + +enum JumpType { + jt_loop, + jt_if +}; + +/** + Class to link the jump locations + +*/ + + +class ConditionalJumpTracker +{ +public: + ConditionalJumpTracker(); + ~ConditionalJumpTracker(); + + /* Mark the start of a loop or a if/else */ + + void push(r600_bytecode_cf *start, JumpType type); + + /* Mark the end of a loop or a if/else and fixup the jump sites */ + bool pop(r600_bytecode_cf *final, JumpType type); + + /* Add middle sites to the call frame i.e. continue, + * break inside loops, and else in if-then-else constructs. + */ + bool add_mid(r600_bytecode_cf *source, JumpType type); + +private: + struct ConditionalJumpTrackerImpl * impl; +}; + +} + +#endif // SFN_CONDITIONALJUMPTRACKER_H diff --git a/src/gallium/drivers/r600/sfn/sfn_debug.cpp b/src/gallium/drivers/r600/sfn/sfn_debug.cpp new file mode 100644 index 00000000000..a766ff63f2b --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_debug.cpp @@ -0,0 +1,139 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "util/u_debug.h" +#include "sfn_debug.h" + +namespace r600 { + +class stderr_streambuf : public std::streambuf +{ +public: + stderr_streambuf(); +protected: + int sync(); + int overflow(int c); + std::streamsize xsputn ( const char *s, std::streamsize n ); +}; + +stderr_streambuf::stderr_streambuf() +{ + +} + +int stderr_streambuf::sync() +{ + fflush(stderr); + return 0; +} + +int stderr_streambuf::overflow(int c) +{ + fputc(c, stderr); + return 0; +} + +static const struct debug_named_value sfn_debug_options[] = { + {"instr", SfnLog::instr, "Log all consumed nir instructions"}, + {"ir", SfnLog::r600ir, "Log created R600 IR"}, + {"cc", SfnLog::cc, "Log R600 IR to assembly code creation"}, + {"noerr", SfnLog::err, "Don't log shader conversion errors"}, + {"si", SfnLog::shader_info, "Log shader info (non-zero values)"}, + {"ts", SfnLog::test_shader, "Log shaders in tests"}, + {"reg", SfnLog::reg, "Log register allocation and lookup"}, + {"io", SfnLog::io, "Log shader in and output"}, + {"ass", SfnLog::assembly, "Log IR to assembly conversion"}, + {"flow", SfnLog::flow, "Log Flow instructions"}, + {"merge", SfnLog::merge, "Log register merge operations"}, + {"nomerge", SfnLog::nomerge, "Skup egister merge step"}, + {"tex", SfnLog::tex, "Log texture ops"}, + {"trans", SfnLog::trans, "Log generic translation messages"}, + DEBUG_NAMED_VALUE_END +}; + +SfnLog sfn_log; + +std::streamsize stderr_streambuf::xsputn ( const char *s, std::streamsize n ) +{ + std::streamsize i = n; + while (i--) + fputc(*s++, stderr); + return n; +} + +SfnLog::SfnLog(): + m_active_log_flags(0), + m_log_mask(0), + m_output(new stderr_streambuf()) +{ + m_log_mask = debug_get_flags_option("R600_NIR_DEBUG", sfn_debug_options, 0); + m_log_mask ^= err; +} + +SfnLog& SfnLog::operator << (SfnLog::LogFlag const l) +{ + m_active_log_flags = l; + return *this; +} + +SfnLog& SfnLog::operator << (UNUSED std::ostream & (*f)(std::ostream&)) +{ + if (m_active_log_flags & m_log_mask) + m_output << f; + return *this; +} + +SfnLog& SfnLog::operator << (nir_shader& sh) +{ + if (m_active_log_flags & m_log_mask) + nir_print_shader(&sh, stderr); + return *this; +} + +SfnLog& SfnLog::operator << (nir_instr &instr) +{ + if (m_active_log_flags & m_log_mask) + nir_print_instr(&instr, stderr); + return *this; +} + +SfnTrace::SfnTrace(SfnLog::LogFlag flag, const char *msg): + m_flag(flag), + m_msg(msg) +{ + sfn_log << m_flag << std::string(" ", 2 * m_indention++) + << "BEGIN: " << m_msg << "\n"; +} + +SfnTrace::~SfnTrace() +{ + sfn_log << m_flag << std::string(" ", 2 * m_indention--) + << "END: " << m_msg << "\n"; +} + +int SfnTrace::m_indention = 0; + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_debug.h b/src/gallium/drivers/r600/sfn/sfn_debug.h new file mode 100644 index 00000000000..840c3d1478c --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_debug.h @@ -0,0 +1,121 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_STDERR_STREAMLOG_H +#define SFN_STDERR_STREAMLOG_H + + +#include +#include +#include +#include "compiler/nir/nir.h" + +namespace r600 { +/* Implemnt some logging for shader-from-nir + +*/ + +class SfnLog { +public: + enum LogFlag { + instr = 1 << 0, + r600ir = 1 << 1, + cc = 1 << 2, + err = 1 << 3, + shader_info = 1 << 4, + test_shader = 1 << 5, + reg = 1 << 6, + io = 1 << 7, + assembly = 1 << 8, + flow = 1 << 9, + merge = 1 << 10, + tex = 1 << 11, + trans = 1 << 12, + all = (1 << 13) - 1, + nomerge = 1 << 16, + }; + + SfnLog(); + + /** a special handling to set the output level "inline" + \param l the level of the following messages + */ + SfnLog& operator << (LogFlag const l); + + /* general output routine; output is only given, if the log flags and the + * currently active log mask overlap + \returns a reference to this object + */ + template + SfnLog& operator << (const T& text) + { + if (m_active_log_flags & m_log_mask) + m_output << text; + + return *this; + } + + /* A funny construct to enable std::endl to work on this stream + idea of Dave Brondsema: + http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8567 + */ + SfnLog& operator << (std::ostream & (*f)(std::ostream&)); + + SfnLog& operator << (nir_shader &sh); + + SfnLog& operator << (nir_instr& instr); + + int has_debug_flag(uint64_t flag) { + return (m_log_mask & flag) == flag; + } + +private: + uint64_t m_active_log_flags; + uint64_t m_log_mask; + std::ostream m_output; +}; + +class SfnTrace { +public: + SfnTrace(SfnLog::LogFlag flag, const char *msg); + ~SfnTrace(); +private: + SfnLog::LogFlag m_flag; + const char *m_msg; + static int m_indention; +}; + + +#ifndef NDEBUG +#define SFN_TRACE_FUNC(LEVEL, MSG) SfnTrace __trace(LEVEL, MSG) +#else +#define SFN_TRACE_FUNC(LEVEL, MSG) +#endif + +extern SfnLog sfn_log; + +} +#endif // SFN_STDERR_STREAMBUF_H diff --git a/src/gallium/drivers/r600/sfn/sfn_defines.h b/src/gallium/drivers/r600/sfn/sfn_defines.h new file mode 100644 index 00000000000..e114979e4c5 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_defines.h @@ -0,0 +1,318 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef sfn_defines_h +#define sfn_defines_h + +#include "../r600_isa.h" + +namespace r600 { + + +enum EGWSOpCode { + cf_sema_v = 0, + cf_sema_p = 1, + cf_gws_barrier = 2, + cf_gws_init = 3, +}; + +/* CF ALU instructions [29:26], highest bit always set. */ +enum ECFAluOpCode { + cf_alu_undefined = 0, + cf_alu = CF_OP_ALU, + cf_alu_push_before = CF_OP_ALU_PUSH_BEFORE, + cf_alu_pop_after = CF_OP_ALU_POP_AFTER, + cf_alu_pop2_after = CF_OP_ALU_POP2_AFTER, + cf_alu_extended = CF_OP_ALU_EXT, + cf_alu_continue = CF_OP_ALU_CONTINUE, + cf_alu_break = CF_OP_ALU_BREAK, + cf_alu_else_after = CF_OP_ALU_ELSE_AFTER, +}; + +enum ECFAluOpCodeEG { + eg_cf_alu_undefined = 0, + eg_cf_alu = 8, + eg_cf_alu_push_before = 9, + eg_cf_alu_pop_after = 10, + eg_cf_alu_pop2_after = 11, + eg_cf_alu_extended = 12, + eg_cf_alu_continue = 13, + eg_cf_alu_break = 14, + eg_cf_alu_else_after = 15, +}; + + +enum ECFOpCode { + cf_nop = CF_OP_NOP, + cf_tc = CF_OP_TEX, + cf_vc = CF_OP_VTX, + cf_gds = CF_OP_GDS, + cf_loop_start = CF_OP_LOOP_START, + cf_loop_end = CF_OP_LOOP_END, + cf_loop_start_dx10 = CF_OP_LOOP_START_DX10, + cf_loop_start_no_al = CF_OP_LOOP_START_NO_AL, + cf_loop_continue = CF_OP_LOOP_CONTINUE, + cf_loop_break = CF_OP_LOOP_BREAK, + cf_jump = CF_OP_JUMP, + cf_push = CF_OP_PUSH, + cf_else = CF_OP_ELSE, + cf_pop = CF_OP_POP, + /* 15 - 17 reserved */ + cf_call = CF_OP_CALL, + cf_call_fs = CF_OP_CALL_FS, + cf_return = CF_OP_RET, + cf_emit_vertex = CF_OP_EMIT_VERTEX, + cf_emit_cut_vertex = CF_OP_EMIT_CUT_VERTEX, + cf_cut_vertex = CF_OP_CUT_VERTEX, + cf_kill = CF_OP_KILL, + /* 25 reserved */ + cf_wait_ack = CF_OP_WAIT_ACK, + cf_tc_ack = CF_OP_TEX_ACK, + cf_vc_ack = CF_OP_VTX_ACK, + cf_jump_table = CF_OP_JUMPTABLE, + cf_global_wave_sync = CF_OP_WAVE_SYNC, + cf_halt = CF_OP_HALT, + /* gap 32-63*/ + cf_mem_stream0_buf0 = CF_OP_MEM_STREAM0_BUF0, + cf_mem_stream0_buf1 = CF_OP_MEM_STREAM0_BUF1, + cf_mem_stream0_buf2 = CF_OP_MEM_STREAM0_BUF2, + cf_mem_stream0_buf3 = CF_OP_MEM_STREAM0_BUF3, + + cf_mem_stream1_buf0 = CF_OP_MEM_STREAM1_BUF0, + cf_mem_stream1_buf1 = CF_OP_MEM_STREAM1_BUF1, + cf_mem_stream1_buf2 = CF_OP_MEM_STREAM1_BUF2, + cf_mem_stream1_buf3 = CF_OP_MEM_STREAM1_BUF3, + + cf_mem_stream2_buf0 = CF_OP_MEM_STREAM2_BUF0, + cf_mem_stream2_buf1 = CF_OP_MEM_STREAM2_BUF1, + cf_mem_stream2_buf2 = CF_OP_MEM_STREAM2_BUF2, + cf_mem_stream2_buf3 = CF_OP_MEM_STREAM2_BUF3, + + cf_mem_stream3_buf0 = CF_OP_MEM_STREAM3_BUF0, + cf_mem_stream3_buf1 = CF_OP_MEM_STREAM3_BUF1, + cf_mem_stream3_buf2 = CF_OP_MEM_STREAM3_BUF2, + cf_mem_stream3_buf3 = CF_OP_MEM_STREAM3_BUF3, + + cf_mem_write_scratch = CF_OP_MEM_SCRATCH , + /* reserved 81 */ + cf_mem_ring = CF_OP_MEM_RING, + cf_export = CF_OP_EXPORT, + cf_export_done = CF_OP_EXPORT_DONE, + cf_mem_export = CF_OP_MEM_EXPORT, + cf_mem_rat = CF_OP_MEM_RAT, + cf_mem_rat_cacheless = CF_OP_MEM_RAT_NOCACHE, + + cf_mem_ring1 = CF_OP_MEM_RING1, + cf_mem_ring2 = CF_OP_MEM_RING2, + cf_mem_ring3 = CF_OP_MEM_RING3, + cf_mem_export_combined = CF_OP_MEM_MEM_COMBINED, + cf_mem_rat_combined_cacheless = CF_OP_MEM_RAT_COMBINED_NOCACHE + +}; + +enum ECFOpCodeEG { + eg_cf_nop = 0, + eg_cf_tc = 1, + eg_cf_vc = 2, + eg_cf_gds = 3, + eg_cf_loop_start = 4, + eg_cf_loop_end = 5, + eg_cf_loop_start_dx10 = 6, + eg_cf_loop_start_no_al = 7, + eg_cf_loop_continue = 8, + eg_cf_loop_break = 9, + eg_cf_jump = 10, + eg_cf_push = 11, + eg_cf_else = 13, + eg_cf_pop = 14, + /* 15 - 17 reserved */ + eg_cf_call = 18, + eg_cf_call_fs, + eg_cf_return, + eg_cf_emit_vertex, + eg_cf_emit_cut_vertex, + eg_cf_cut_vertex, + eg_cf_kill, + /* 25 reserved */ + eg_cf_wait_ack = 26, + eg_cf_tc_ack, + eg_cf_vc_ack, + eg_cf_jump_table, + eg_cf_global_wave_sync, + eg_cf_halt, + /* gap 32-63*/ + eg_cf_mem_stream0_buf0 = 64, + eg_cf_mem_stream0_buf1, + eg_cf_mem_stream0_buf2, + eg_cf_mem_stream0_buf3, + + eg_cf_mem_stream1_buf0, + eg_cf_mem_stream1_buf1, + eg_cf_mem_stream1_buf2, + eg_cf_mem_stream1_buf3, + + eg_cf_mem_stream2_buf0, + eg_cf_mem_stream2_buf1, + eg_cf_mem_stream2_buf2, + eg_cf_mem_stream2_buf3, + + eg_cf_mem_stream3_buf0, + eg_cf_mem_stream3_buf1, + eg_cf_mem_stream3_buf2, + eg_cf_mem_stream3_buf3, + + eg_cf_mem_write_scratch, + /* reserved 81 */ + eg_cf_mem_ring = 82, + eg_cf_export, + eg_cf_export_done, + eg_cf_mem_export, + eg_cf_mem_rat, + eg_cf_mem_rat_cacheless, + + eg_cf_mem_ring1, + eg_cf_mem_ring2, + eg_cf_mem_ring3, + eg_cf_mem_export_combined, + eg_cf_mem_rat_combined_cacheless +}; + + +enum EVFetchInstr { + vc_fetch = FETCH_OP_VFETCH, + vc_semantic = FETCH_OP_SEMFETCH, + vc_get_buf_resinfo = FETCH_OP_GET_BUFFER_RESINFO, + vc_read_scratch = FETCH_OP_READ_SCRATCH, + vc_unknown +}; + +enum EVFetchType { + vertex_data = 0, + instance_data = 1, + no_index_offset = 2 +}; + +enum EVTXDataFormat { + fmt_invalid = 0, + fmt_8 = 1, + fmt_4_4 = 2, + fmt_3_3_2 = 3, + fmt_reserved_4 = 4, + fmt_16 = 5, + fmt_16_float = 6, + fmt_8_8 = 7, + fmt_5_6_5 = 8, + fmt_6_5_5 = 9, + fmt_1_5_5_5 = 10, + fmt_4_4_4_4 = 11, + fmt_5_5_5_1 = 12, + fmt_32 = 13, + fmt_32_float = 14, + fmt_16_16 = 15, + fmt_16_16_float = 16, + fmt_8_24 = 17, + fmt_8_24_float = 18, + fmt_24_8 = 19, + fmt_24_8_float = 20, + fmt_10_11_11 = 21, + fmt_10_11_11_float = 22, + fmt_11_11_10 = 23, + fmt_11_11_10_float = 24, + fmt_2_10_10_10 = 25, + fmt_8_8_8_8 = 26, + fmt_10_10_10_2 = 27, + fmt_x24_8_32_float = 28, + fmt_32_32 = 29, + fmt_32_32_float = 30, + fmt_16_16_16_16 = 31, + fmt_16_16_16_16_float = 32, + fmt_reserved_33 = 33, + fmt_32_32_32_32 = 34, + fmt_32_32_32_32_float = 35, + fmt_reserved_36 = 36, + fmt_1 = 37, + fmt_1_reversed = 38, + fmt_gb_gr = 39, + fmt_bg_rg = 40, + fmt_32_as_8 = 41, + fmt_32_as_8_8 = 42, + fmt_5_9_9_9_sharedexp = 43, + fmt_8_8_8 = 44, + fmt_16_16_16 = 45, + fmt_16_16_16_float = 46, + fmt_32_32_32 = 47, + fmt_32_32_32_float = 48, + fmt_bc1 = 49, + fmt_bc2 = 50, + fmt_bc3 = 51, + fmt_bc4 = 52, + fmt_bc5 = 53, + fmt_apc0 = 54, + fmt_apc1 = 55, + fmt_apc2 = 56, + fmt_apc3 = 57, + fmt_apc4 = 58, + fmt_apc5 = 59, + fmt_apc6 = 60, + fmt_apc7 = 61, + fmt_ctx1 = 62, + fmt_reserved_63 = 63 +}; + +enum EVFetchNumFormat { + vtx_nf_norm = 0, + vtx_nf_int = 1, + vtx_nf_scaled = 2 +}; + +enum EVFetchEndianSwap { + vtx_es_none = 0, + vtx_es_8in16 = 1, + vtx_es_8in32 = 2 +}; + +enum EVFetchFlagShift { + vtx_fetch_whole_quad, + vtx_use_const_field, + vtx_format_comp_signed, + vtx_srf_mode, + vtx_buf_no_stride, + vtx_alt_const, + vtx_use_tc, + vtx_vpm, + vtx_unknwon +}; + +enum EBufferIndexMode { + bim_none, + bim_zero, + bim_one, + bim_invalid +}; + +} + +#endif // DEFINES_H diff --git a/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp b/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp new file mode 100644 index 00000000000..056a54a54bf --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp @@ -0,0 +1,1301 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#include "sfn_emitaluinstruction.h" +#include "sfn_debug.h" + +#include "gallium/drivers/r600/r600_shader.h" + +namespace r600 { + +using std::vector; + +EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor): + EmitInstruction (processor) +{ + +} + +bool EmitAluInstruction::do_emit(nir_instr* ir) +{ + const nir_alu_instr& instr = *nir_instr_as_alu(ir); + + r600::sfn_log << SfnLog::instr << "emit '" + << *ir + << " bitsize: " << static_cast(instr.dest.dest.ssa.bit_size) + << "' (" << __func__ << ")\n"; + + split_constants(instr); + + switch (instr.op) { + case nir_op_b2f32: return emit_alu_b2f(instr); + case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int); + case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10); + case nir_op_mov:return emit_alu_op1(instr, op1_mov); + case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc); + case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs}); + case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg}); + case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp}); + case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee); + case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1); + case nir_op_fsin: return emit_alu_trig_op1(instr, op1_sin); + case nir_op_fcos: return emit_alu_trig_op1(instr, op1_cos); + case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee); + case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped); + + case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne); + case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee); + case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt); + case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt); + case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int); + case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint); + + case nir_op_fceil: return emit_alu_op1(instr, op1_ceil); + case nir_op_ffract: return emit_alu_op1(instr, op1_fract); + case nir_op_ffloor: return emit_alu_op1(instr, op1_floor); + + case nir_op_fsign: return emit_fsign(instr); + case nir_op_fdph: return emit_fdph(instr); + + case nir_op_ibitfield_extract: return emit_bitfield_extract(instr, op3_bfe_int); + case nir_op_ubitfield_extract: return emit_bitfield_extract(instr, op3_bfe_uint); + case nir_op_bitfield_insert: return emit_bitfield_insert(instr); + case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int); + case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int); + + case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int); + case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int); + case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int); + case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int); + case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int); + case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse); + case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int); + case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int); + case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int); + case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int); + case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int); + case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint); + case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint); + case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint); + case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int); + case nir_op_inot: return emit_alu_op1(instr, op1_not_int); + case nir_op_iabs: return emit_alu_iabs(instr); + case nir_op_ineg: return emit_alu_ineg(instr); + case nir_op_idiv: return emit_alu_div_int(instr, true, false); + case nir_op_udiv: return emit_alu_div_int(instr, false, false); + case nir_op_umod: return emit_alu_div_int(instr, false, true); + case nir_op_isign: return emit_alu_isign(instr); + + case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint); + case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse); + case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int); + + case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse); + + case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10); + case nir_op_fne: return emit_alu_op2(instr, op2_setne_dx10); + case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10); + + case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10); + case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10); + case nir_op_fmul: return emit_alu_op2(instr, op2_mul_ieee); + case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int); + case nir_op_fadd: return emit_alu_op2(instr, op2_add); + case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1); + case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int); + case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int); + case nir_op_fdot2: return emit_dot(instr, 2); + case nir_op_fdot3: return emit_dot(instr, 3); + case nir_op_fdot4: return emit_dot(instr, 4); + + case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false); + case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false); + case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false); + + case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true); + case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true); + case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true); + + case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false); + case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false); + case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false); + + case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true); + case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true); + case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true); + + + case nir_op_ffma: return emit_alu_op3(instr, op3_muladd_ieee); + case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1}); + case nir_op_vec2: return emit_create_vec(instr, 2); + case nir_op_vec3: return emit_create_vec(instr, 3); + case nir_op_vec4: return emit_create_vec(instr, 4); + + case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int); + case nir_op_ufind_msb: return emit_find_msb(instr, false); + case nir_op_ifind_msb: return emit_find_msb(instr, true); + case nir_op_b2i32: return emit_b2i32(instr); + case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr); + case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0); + case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1); + case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr); + case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr); + case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr); + + + /* These are in the ALU instruction list, but they should be texture instructions */ + case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true); + case nir_op_fddx_coarse: + case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false); + + case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v, true); + case nir_op_fddy_coarse: + case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false); + + default: + return false; + } +} + +void EmitAluInstruction::split_constants(const nir_alu_instr& instr) +{ + const nir_op_info *op_info = &nir_op_infos[instr.op]; + if (op_info->num_inputs < 2) + return; + + int nconst = 0; + std::array c; + std::array idx; + for (unsigned i = 0; i < op_info->num_inputs; ++i) { + PValue src = from_nir(instr.src[i], 0); + assert(src); + if (src->type() == Value::kconst) { + c[nconst] = src; + + idx[nconst++] = i; + } + } + if (nconst < 2) + return; + + unsigned sel = c[0]->sel(); + sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ; + + for (int i = 1; i < nconst; ++i) { + sfn_log << "sel[" << i << "] = " << c[i]->sel() << "\n"; + if (c[i]->sel() != sel) { + load_uniform(instr.src[idx[i]]); + } + } +} + +bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr) +{ + if (instr.src[0].negate || instr.src[0].abs) { + std::cerr << "source modifiers not supported with int ops\n"; + return false; + } + + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i), + from_nir(instr.src[0], i), write); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return true; +} + +bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode, + const AluOpFlags& flags) +{ + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(opcode, from_nir(instr.dest, i), + from_nir(instr.src[0], i), write); + + if (flags.test(alu_src0_abs) || instr.src[0].abs) + ir->set_flag(alu_src0_abs); + + if (instr.src[0].negate ^ flags.test(alu_src0_neg)) + ir->set_flag(alu_src0_neg); + + if (flags.test(alu_dst_clamp) || instr.dest.saturate) + ir->set_flag(alu_dst_clamp); + + emit_instruction(ir); + } + } + make_last(ir); + + return true; +} + +bool EmitAluInstruction::emit_alu_trig_op1(const nir_alu_instr& instr, EAluOp opcode) +{ + // normalize by dividing by 2*PI, shift by 0.5, take fraction, and + // then shift back + + const float inv_2_pi = 0.15915494f; + + PValue v[4]; // this might need some additional temp register creation + for (unsigned i = 0; i < 4 ; ++i) + v[i] = from_nir(instr.dest, i); + + PValue inv_pihalf = PValue(new LiteralValue(inv_2_pi, 0)); + AluInstruction *ir = nullptr; + for (unsigned i = 0; i < 4 ; ++i) { + if (!(instr.dest.write_mask & (1 << i))) + continue; + ir = new AluInstruction(op3_muladd_ieee, v[i], + {from_nir(instr.src[0],i), inv_pihalf, Value::zero_dot_5}, + {alu_write}); + if (instr.src[0].negate) ir->set_flag(alu_src0_neg); + emit_instruction(ir); + } + make_last(ir); + + for (unsigned i = 0; i < 4 ; ++i) { + if (!(instr.dest.write_mask & (1 << i))) + continue; + ir = new AluInstruction(op1_fract, v[i], v[i], {alu_write}); + emit_instruction(ir); + } + make_last(ir); + + for (unsigned i = 0; i < 4 ; ++i) { + if (!(instr.dest.write_mask & (1 << i))) + continue; + ir = new AluInstruction(op2_add, v[i], v[i], Value::zero_dot_5, write); + ir->set_flag(alu_src1_neg); + emit_instruction(ir); + } + make_last(ir); + + for (unsigned i = 0; i < 4 ; ++i) { + if (!(instr.dest.write_mask & (1 << i))) + continue; + + ir = new AluInstruction(opcode, v[i], v[i], last_write); + emit_instruction(ir); + } + return true; +} + +bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode, + bool absolute) +{ + AluInstruction *ir = nullptr; + std::set src_idx; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(opcode, from_nir(instr.dest, i), + from_nir(instr.src[0], i), last_write); + if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs); + if (instr.src[0].negate) ir->set_flag(alu_src0_neg); + if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); + emit_instruction(ir); + } + } + return true; +} + +bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op) +{ + AluInstruction *ir = nullptr; + std::array v; + + for (int i = 0; i < 4; ++i) { + if (!(instr.dest.write_mask & (1 << i))) + continue; + v[i] = from_nir(instr.dest, i); + ir = new AluInstruction(op1_trunc, v[i], from_nir(instr.src[0], i), {alu_write}); + if (instr.src[0].abs) ir->set_flag(alu_src0_abs); + if (instr.src[0].negate) ir->set_flag(alu_src0_neg); + emit_instruction(ir); + } + make_last(ir); + + for (int i = 0; i < 4; ++i) { + if (!(instr.dest.write_mask & (1 << i))) + continue; + ir = new AluInstruction(op, v[i], v[i], {alu_write}); + emit_instruction(ir); + if (op == op1_flt_to_uint) + make_last(ir); + } + make_last(ir); + + return true; +} + +bool EmitAluInstruction::emit_find_msb(const nir_alu_instr& instr, bool sgn) +{ + int sel_tmp = allocate_temp_register(); + int sel_tmp2 = allocate_temp_register(); + GPRVector tmp(sel_tmp, {0,1,2,3}); + GPRVector tmp2(sel_tmp2, {0,1,2,3}); + AluInstruction *ir = nullptr; + EAluOp opcode = sgn ? op1_ffbh_int : op1_ffbh_uint; + for (int i = 0; i < 4; ++i) { + if (!(instr.dest.write_mask & (1 << i))) + continue; + + ir = new AluInstruction(opcode, tmp.reg_i(i), from_nir(instr.src[0], i), write); + emit_instruction(ir); + } + make_last(ir); + + for (int i = 0; i < 4 ; ++i) { + if (!(instr.dest.write_mask & (1 << i))) + continue; + + ir = new AluInstruction(op2_sub_int, tmp2.reg_i(i), + PValue(new LiteralValue(31u, 0)), tmp.reg_i(i), write); + emit_instruction(ir); + } + make_last(ir); + + for (int i = 0; i < 4 ; ++i) { + if (!(instr.dest.write_mask & (1 << i))) + continue; + + ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), tmp.reg_i(i), + tmp2.reg_i(i), tmp.reg_i(i), write); + emit_instruction(ir); + } + make_last(ir); + + return true; +} + +bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr) +{ + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (!(instr.dest.write_mask & (1 << i))) + continue; + + ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i), + from_nir(instr.src[0], i), Value::one_i, write); + emit_instruction(ir); + } + make_last(ir); + + return true; +} + +bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr) +{ + AluInstruction *ir = nullptr; + for (unsigned i = 0; i < 2; ++i) { + if (!(instr.dest.write_mask & (1 << i))) + continue; + ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), + from_nir(instr.src[0], i), write); + emit_instruction(ir); + } + ir->set_flag(alu_last_instr); + return true; +} + +bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp) +{ + emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0), + from_nir(instr.src[0], comp), last_write)); + return true; +} + +bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc) +{ + AluInstruction *ir = nullptr; + std::set src_slot; + for(unsigned i = 0; i < nc; ++i) { + if (instr.dest.write_mask & (1 << i)){ + auto src = from_nir(instr.src[i], 0); + ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write); + if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); + + // FIXME: This is a rather crude approach to fix the problem that + // r600 can't read from four different slots of the same component + // here we check only for the register index + if (src->type() == Value::gpr) + src_slot.insert(src->sel()); + if (src_slot.size() >= 3) { + src_slot.clear(); + ir->set_flag(alu_last_instr); + } + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return true; +} + +bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n) +{ + const nir_alu_src& src0 = instr.src[0]; + const nir_alu_src& src1 = instr.src[1]; + + AluInstruction *ir = nullptr; + for (int i = 0; i < n ; ++i) { + ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i), + from_nir(src0, i), from_nir(src1, i), + instr.dest.write_mask & (1 << i) ? write : empty); + + if (src0.negate) ir->set_flag(alu_src0_neg); + if (src0.abs) ir->set_flag(alu_src0_abs); + if (src1.negate) ir->set_flag(alu_src1_neg); + if (src1.abs) ir->set_flag(alu_src1_abs); + + if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); + emit_instruction(ir); + } + for (int i = n; i < 4 ; ++i) { + ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i), + Value::zero, Value::zero, + instr.dest.write_mask & (1 << i) ? write : empty); + emit_instruction(ir); + } + + if (ir) + ir->set_flag(alu_last_instr); + return true; +} + +bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr) +{ + const nir_alu_src& src0 = instr.src[0]; + const nir_alu_src& src1 = instr.src[1]; + + AluInstruction *ir = nullptr; + for (int i = 0; i < 3 ; ++i) { + ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i), + from_nir(src0, i), from_nir(src1, i), + instr.dest.write_mask & (1 << i) ? write : empty); + if (src0.negate) ir->set_flag(alu_src0_neg); + if (src0.abs) ir->set_flag(alu_src0_abs); + if (src1.negate) ir->set_flag(alu_src1_neg); + if (src1.abs) ir->set_flag(alu_src1_abs); + if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); + emit_instruction(ir); + } + + ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f, + from_nir(src1, 3), (instr.dest.write_mask) & (1 << 3) ? write : empty); + if (src1.negate) ir->set_flag(alu_src1_neg); + if (src1.abs) ir->set_flag(alu_src1_abs); + emit_instruction(ir); + + ir->set_flag(alu_last_instr); + return true; + +} + +bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op) +{ + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)) { + ir = new AluInstruction(op, from_nir(instr.dest, i), + from_nir(instr.src[0], i), Value::zero, + write); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return true; +} + +bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr) +{ + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i), + from_nir(instr.src[0], i), Value::one_f, write); + if (instr.src[0].negate) ir->set_flag(alu_src0_neg); + if (instr.src[0].abs) ir->set_flag(alu_src0_abs); + if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return true; +} + +bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all) +{ + + AluInstruction *ir = nullptr; + PValue v[4]; // this might need some additional temp register creation + for (unsigned i = 0; i < 4 ; ++i) + v[i] = from_nir(instr.dest, i); + + EAluOp combine = all ? op2_and_int : op2_or_int; + + /* For integers we can not use the modifiers, so this needs some emulation */ + /* Should actually be lowered with NIR */ + if (instr.src[0].negate == instr.src[1].negate && + instr.src[0].abs == instr.src[1].abs) { + + for (unsigned i = 0; i < nc ; ++i) { + ir = new AluInstruction(op, v[i], from_nir(instr.src[0], i), + from_nir(instr.src[1], i), write); + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); + } else { + std::cerr << "Negate in iequal/inequal not (yet) supported\n"; + return false; + } + + for (unsigned i = 0; i < nc/2 ; ++i) { + ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write); + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); + + if (nc > 2) { + ir = new AluInstruction(combine, v[0], v[0], v[2], last_write); + emit_instruction(ir); + } + + return true; +} + +bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all) +{ + AluInstruction *ir = nullptr; + PValue v[4]; // this might need some additional temp register creation + for (unsigned i = 0; i < 4 ; ++i) + v[i] = from_nir(instr.dest, i); + + for (unsigned i = 0; i < nc ; ++i) { + ir = new AluInstruction(op, v[i], from_nir(instr.src[0],i), + from_nir(instr.src[1],i), write); + + if (instr.src[0].abs) + ir->set_flag(alu_src0_abs); + if (instr.src[0].negate) + ir->set_flag(alu_src0_neg); + + if (instr.src[1].abs) + ir->set_flag(alu_src1_abs); + if (instr.src[1].negate) + ir->set_flag(alu_src1_neg); + + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); + + for (unsigned i = 0; i < nc ; ++i) { + ir = new AluInstruction(op1_max4, v[i], v[i], write); + if (all) ir->set_flag(alu_src0_neg); + emit_instruction(ir); + } + + for (unsigned i = nc; i < 4 ; ++i) { + ir = new AluInstruction(op1_max4, v[i], + all ? Value::one_f : Value::zero, write); + if (all) + ir->set_flag(alu_src0_neg); + + emit_instruction(ir); + } + + ir->set_flag(alu_last_instr); + + if (all) + op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10; + else + op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10; + + ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write); + if (all) + ir->set_flag(alu_src1_neg); + emit_instruction(ir); + + return true; +} + +bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all) +{ + AluInstruction *ir = nullptr; + PValue v[4]; // this might need some additional temp register creation + for (unsigned i = 0; i < 4 ; ++i) + v[i] = from_nir(instr.dest, i); + + for (unsigned i = 0; i < 2 ; ++i) { + ir = new AluInstruction(op, v[i], from_nir(instr.src[0],i), + from_nir(instr.src[1],i), write); + if (instr.src[0].abs) + ir->set_flag(alu_src0_abs); + if (instr.src[0].negate) + ir->set_flag(alu_src0_neg); + + if (instr.src[1].abs) + ir->set_flag(alu_src1_abs); + if (instr.src[1].negate) + ir->set_flag(alu_src1_neg); + + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); + + op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int; + ir = new AluInstruction(op, v[0], v[0], v[1], last_write); + emit_instruction(ir); + + return true; +} + +bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode) +{ + const nir_alu_src& src0 = instr.src[0]; + const nir_alu_src& src1 = instr.src[1]; + + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(opcode, from_nir(instr.dest, i), from_nir(src0, i), from_nir(src1, i), last_write); + if (src0.negate) ir->set_flag(alu_src0_neg); + if (src0.abs) ir->set_flag(alu_src0_abs); + if (src1.negate) ir->set_flag(alu_src1_neg); + if (src1.abs) ir->set_flag(alu_src1_abs); + if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); + emit_instruction(ir); + } + } + return true; +} + +bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts) +{ + + const nir_alu_src& src0 = instr.src[0]; + const nir_alu_src& src1 = instr.src[1]; + + if (src0.negate || src1.negate || + src0.abs || src1.abs) { + std::cerr << "R600: don't support modifiers with integer operations"; + return false; + } + return emit_alu_op2(instr, opcode, opts); +} + +bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops) +{ + const nir_alu_src *src0 = &instr.src[0]; + const nir_alu_src *src1 = &instr.src[1]; + + if (ops & op2_opt_reverse) + std::swap(src0, src1); + + bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate; + + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(opcode, from_nir(instr.dest, i), + from_nir(*src0, i), from_nir(*src1, i), write); + + if (src0->negate) ir->set_flag(alu_src0_neg); + if (src0->abs) ir->set_flag(alu_src0_abs); + if (src1_negate) ir->set_flag(alu_src1_neg); + if (src1->abs) ir->set_flag(alu_src1_abs); + if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return true; +} + +bool EmitAluInstruction::emit_alu_op2_split_src_mods(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops) +{ + const nir_alu_src *src0 = &instr.src[0]; + const nir_alu_src *src1 = &instr.src[1]; + + if (ops & op2_opt_reverse) + std::swap(src0, src1); + + GPRVector::Values v0; + for (int i = 0; i < 4 ; ++i) + v0[i] = from_nir(*src0, i); + + GPRVector::Values v1; + for (int i = 0; i < 4 ; ++i) + v1[i] = from_nir(*src1, i); + + if (src0->abs || src0->negate) { + int src0_tmp = allocate_temp_register(); + GPRVector::Values v0_temp; + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)) { + v0_temp[i] = PValue(new GPRValue(src0_tmp, i)); + ir = new AluInstruction(op1_mov, v0_temp[i], v0[i], write); + if (src0->abs) ir->set_flag(alu_src0_abs); + if (src0->negate) ir->set_flag(alu_src0_neg); + emit_instruction(ir); + v0[i] = v0_temp[i]; + } + } + if (ir) + ir->set_flag(alu_last_instr); + } + + if (src1->abs || src1->negate) { + int src1_tmp = allocate_temp_register(); + GPRVector::Values v1_temp; + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)) { + v1_temp[i] = PValue(new GPRValue(src1_tmp, i)); + ir = new AluInstruction(op1_mov, v1_temp[i], v1[i], {alu_write}); + if (src1->abs) ir->set_flag(alu_src0_abs); + if (src1->negate) ir->set_flag(alu_src0_neg); + emit_instruction(ir); + v1[i] = v1_temp[i]; + } + } + if (ir) + ir->set_flag(alu_last_instr); + } + + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(opcode, from_nir(instr.dest, i), {v0[i], v1[i]}, {alu_write}); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return true; +} + + +bool EmitAluInstruction::emit_alu_isign(const nir_alu_instr& instr) +{ + int sel_tmp = allocate_temp_register(); + GPRVector tmp(sel_tmp, {0,1,2,3}); + + AluInstruction *ir = nullptr; + PValue help[4]; + + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + help[i] = from_nir(instr.dest, i); + auto s = from_nir(instr.src[0], i); + ir = new AluInstruction(op3_cndgt_int, help[i], s, Value::one_i, s, write); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, help[i], write); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + + ir = new AluInstruction(op3_cndgt_int, help[i], tmp.reg_i(i), + PValue(new LiteralValue(-1,0)), help[i], write); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return true; +} + +bool EmitAluInstruction::emit_fsign(const nir_alu_instr& instr) +{ + PValue help[4]; + PValue src[4]; + AluInstruction *ir = nullptr; + + for (int i = 0; i < 4 ; ++i) { + help[i] = from_nir(instr.dest, i); + src[i] = from_nir(instr.src[0], i); + } + + if (instr.src[0].abs) { + + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(op2_setgt, help[i], src[i], Value::zero, write); + ir->set_flag(alu_src0_abs); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + + if (instr.src[0].negate) { + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(op1_mov, help[i], help[i], write); + ir->set_flag(alu_src0_neg); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + } + + return true; + } + + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(op3_cndgt, help[i], src[i], Value::one_f, src[i], write); + if (instr.src[0].negate) { + ir->set_flag(alu_src0_neg); + ir->set_flag(alu_src2_neg); + } + emit_instruction(ir); + } + } + + if (ir) + ir->set_flag(alu_last_instr); + + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(op3_cndgt, help[i], help[i], Value::one_f, help[i], write); + ir->set_flag(alu_src0_neg); + ir->set_flag(alu_src1_neg); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return true; +} + +bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode, + std::array reorder) +{ + const nir_alu_src *src[3]; + src[0] = &instr.src[reorder[0]]; + src[1] = &instr.src[reorder[1]]; + src[2] = &instr.src[reorder[2]]; + + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(opcode, from_nir(instr.dest, i), + from_nir(*src[0], i), from_nir(*src[1], i), + from_nir(*src[2], i), write); + + if (src[0]->negate) ir->set_flag(alu_src0_neg); + if (src[1]->negate) ir->set_flag(alu_src1_neg); + if (src[2]->negate) ir->set_flag(alu_src2_neg); + + if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); + ir->set_flag(alu_write); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return true; +} + +bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr) +{ + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero, + from_nir(instr.src[0], i), write); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + + return true; +} + +static const char swz[] = "xyzw01?_"; + + + +bool EmitAluInstruction::emit_alu_iabs(const nir_alu_instr& instr) +{ + int sel_tmp = allocate_temp_register(); + GPRVector tmp(sel_tmp, {0,1,2,3}); + + std::array src; + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + src[i] = from_nir(instr.src[0],i); + ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, src[i], write); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), src[i], + src[i], tmp.reg_i(i), write); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return true; +} + +bool EmitAluInstruction::emit_alu_div_int(const nir_alu_instr& instr, bool use_signed, bool mod) +{ + + int sel_tmp = allocate_temp_register(); + int sel_tmp0 = allocate_temp_register(); + int sel_tmp1 = allocate_temp_register(); + + PValue asrc1(new GPRValue(sel_tmp, 0)); + PValue asrc2(new GPRValue(sel_tmp, 1)); + PValue rsign(new GPRValue(sel_tmp, 2)); + PValue err(new GPRValue(sel_tmp, 3)); + + GPRVector tmp0(sel_tmp0, {0,1,2,3}); + GPRVector tmp1(sel_tmp1, {0,1,2,3}); + + std::array src0; + std::array src1; + + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)) { + src0[i] = from_nir(instr.src[0], i); + src1[i] = from_nir(instr.src[1], i); + } + } + + + for (int i = 3; i >= 0 ; --i) { + if (!(instr.dest.write_mask & (1 << i))) + continue; + if (use_signed) { + emit_instruction(op2_sub_int, asrc1, {Value::zero, src0[i]}, {alu_write}); + emit_instruction(op2_sub_int, asrc2, {Value::zero, src1[i]}, {alu_write}); + emit_instruction(op2_xor_int, rsign, {src0[i], src1[i]}, {alu_write, alu_last_instr}); + + + emit_instruction(op3_cndge_int, asrc1, {src0[i], src0[i], asrc1}, {alu_write}); + emit_instruction(op3_cndge_int, asrc2, {src1[i], src1[i], asrc2}, {alu_write, alu_last_instr}); + } else { + asrc1 = src0[i]; + asrc2 = src1[i]; + } + + emit_instruction(op1_recip_uint, tmp0.x(), {asrc2}, {alu_write, alu_last_instr}); + + emit_instruction(op2_mullo_uint, tmp0.z(), {tmp0.x(), asrc2}, {alu_write, alu_last_instr}); + + emit_instruction(op2_sub_int, tmp0.w(), {Value::zero, tmp0.z()}, {alu_write}); + emit_instruction(op2_mulhi_uint, tmp0.y(), {tmp0.x(), asrc2 }, {alu_write, alu_last_instr}); + + emit_instruction(op3_cnde_int, tmp0.z(), {tmp0.y(), tmp0.w(), tmp0.z()}, {alu_write, alu_last_instr}); + + emit_instruction(op2_mulhi_uint, err, {tmp0.z(), tmp0.x()}, {alu_write, alu_last_instr}); + + emit_instruction(op2_sub_int, tmp1.x(), {tmp0.x(), err}, {alu_write}); + emit_instruction(op2_add_int, tmp1.y(), {tmp0.x(), err}, {alu_write, alu_last_instr}); + + emit_instruction(op3_cnde_int, tmp0.x(), {tmp0.y(), tmp1.y(), tmp1.x()}, {alu_write, alu_last_instr}); + + emit_instruction(op2_mulhi_uint, tmp0.z(), {tmp0.x(), asrc1 }, {alu_write, alu_last_instr}); + emit_instruction(op2_mullo_uint, tmp0.y(), {tmp0.z(), asrc2 }, {alu_write, alu_last_instr}); + + emit_instruction(op2_sub_int, tmp0.w(), {asrc1, tmp0.y()}, {alu_write, alu_last_instr}); + + + emit_instruction(op2_setge_uint, tmp1.x(), {tmp0.w(), asrc2}, {alu_write}); + emit_instruction(op2_setge_uint, tmp1.y(), {asrc1, tmp0.y()}, {alu_write}); + + if (mod) { + emit_instruction(op2_sub_int, tmp1.z(), {tmp0.w(), asrc2}, {alu_write}); + emit_instruction(op2_add_int, tmp1.w(), {tmp0.w(), asrc2}, {alu_write, alu_last_instr}); + } else { + emit_instruction(op2_add_int, tmp1.z(), {tmp0.z(), Value::one_i}, {alu_write}); + emit_instruction(op2_sub_int, tmp1.w(), {tmp0.z(), Value::one_i}, {alu_write, alu_last_instr}); + } + + emit_instruction(op2_and_int, tmp1.x(), {tmp1.x(), tmp1.y()}, {alu_write, alu_last_instr}); + + if (mod) + emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.x(), tmp0.w(), tmp1.z()}, {alu_write, alu_last_instr}); + else + emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.x(), tmp0.z(), tmp1.z()}, {alu_write, alu_last_instr}); + + if (use_signed) { + emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr}); + emit_instruction(op2_sub_int, tmp0.y(), {Value::zero, tmp0.z()}, {alu_write, alu_last_instr}); + + if (mod) + emit_instruction(op3_cndge_int, from_nir(instr.dest, i), {src0[i], tmp0.z(), tmp0.y()}, + {alu_write, alu_last_instr}); + else + emit_instruction(op3_cndge_int, from_nir(instr.dest, i), {rsign, tmp0.z(), tmp0.y()}, + {alu_write, alu_last_instr}); + } else { + emit_instruction(op3_cnde_int, from_nir(instr.dest, i), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr}); + } + } + return true; +} + +void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src, GPRVector::Values& s, + GPRVector::Values& v, int ncomp) +{ + + AluInstruction *alu = nullptr; + for (int i = 0; i < ncomp; ++i) { + alu = new AluInstruction(op1_mov, v[i], s[i], {alu_write}); + if (src.abs) + alu->set_flag(alu_src0_abs); + if (src.negate) + alu->set_flag(alu_src0_neg); + emit_instruction(alu); + } + make_last(alu); +} + +bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op, + bool fine) +{ + + GPRVector::Values v; + GPRVector::Values s; + GPRVector::Values *source = &s; + std::array writemask = {0,1,2,3}; + + int ncomp = instr.src[0].src.is_ssa ? instr.src[0].src.ssa->num_components : + instr.src[0].src.reg.reg->num_components; + + for (int i = 0; i < 4; ++i) { + writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7; + v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0); + s[i] = from_nir(instr.src[0], (i < ncomp) ? i : 0); + } + + if (instr.src[0].abs || instr.src[0].negate) { + split_alu_modifiers(instr.src[0], s, v, ncomp); + source = &v; + } + + /* This is querying the dreivatives of the output fb, so we would either need + * access to the neighboring pixels or to the framebuffer. Neither is currently + * implemented */ + GPRVector dst(v); + GPRVector src(*source); + + auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue()); + tex->set_dest_swizzle(writemask); + + if (fine) { + std::cerr << "Sewt fine flag\n"; + tex->set_flag(TexInstruction::grad_fine); + } + + emit_instruction(tex); + + return true; +} + +bool EmitAluInstruction::emit_bitfield_extract(const nir_alu_instr& instr, EAluOp opcode) +{ + int itmp = allocate_temp_register(); + std::array tmp; + std::array dst; + std::array src0; + std::array shift; + + PValue l32(new LiteralValue(32)); + unsigned write_mask = instr.dest.write_mask; + + AluInstruction *ir = nullptr; + for (int i = 0; i < 4; i++) { + if (!(write_mask & (1<set_flag(alu_last_instr); + + for (int i = 0; i < 4; i++) { + if (!(write_mask & (1<set_flag(alu_last_instr); + + + for (int i = 0; i < 4; i++) { + if (!(write_mask & (1<set_flag(alu_last_instr); + + for (int i = 0; i < 4; i++) { + if (!(write_mask & (1<set_flag(alu_last_instr); + + return true; +} + +bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr) +{ + emit_instruction(op2_lshr_int, from_nir(instr.dest, 0), + {from_nir(instr.src[0], 0), PValue(new LiteralValue(16))}, + {alu_write, alu_last_instr}); + + emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0), + {from_nir(instr.dest, 0)},{alu_write, alu_last_instr}); + + return true; +} + +bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr) +{ + emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0), + {from_nir(instr.src[0], 0)},{alu_write, alu_last_instr}); + return true; +} + +bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr) +{ + int it0 = allocate_temp_register(); + PValue x(new GPRValue(it0, 0)); + PValue y(new GPRValue(it0, 1)); + + emit_instruction(op1_flt32_to_flt16, x,{from_nir(instr.src[0], 0)},{alu_write}); + emit_instruction(op1_flt32_to_flt16, y,{from_nir(instr.src[1], 0)},{alu_write, alu_last_instr}); + + emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr}); + + emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr}); + + return true; +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h b/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h new file mode 100644 index 00000000000..ede38fbc163 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h @@ -0,0 +1,116 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_EMITALUINSTRUCTION_H +#define SFN_EMITALUINSTRUCTION_H + +#include "sfn_emitinstruction.h" + +#include "sfn_alu_defines.h" +#include "sfn_instruction_alu.h" +#include "sfn_instruction_tex.h" + +namespace r600 { + + +class EmitAluInstruction : public EmitInstruction +{ +public: + EmitAluInstruction(ShaderFromNirProcessor& processor); + +private: + + enum AluOp2Opts { + op2_opt_none = 0, + op2_opt_reverse = 1, + op2_opt_neg_src1 = 1 << 1 + }; + + bool do_emit(nir_instr* instr) override; + + void split_constants(const nir_alu_instr& instr); + + bool emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode, const AluOpFlags &flags = 0); + bool emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none); + bool emit_alu_op2_split_src_mods(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none); + + bool emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode); + + bool emit_alu_inot(const nir_alu_instr& instr); + bool emit_alu_iabs(const nir_alu_instr& instr); + bool emit_alu_ineg(const nir_alu_instr& instr); + bool emit_alu_isign(const nir_alu_instr& instr); + bool emit_alu_div_int(const nir_alu_instr& instr, bool use_signed, bool mod); + bool emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none); + + bool emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode, std::array reorder={0,1,2}); + bool emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode, bool absolute = false); + bool emit_alu_trig_op1(const nir_alu_instr& instr, EAluOp opcode); + + bool emit_alu_b2f(const nir_alu_instr& instr); + bool emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op); + bool emit_dot(const nir_alu_instr& instr, int n); + bool emit_fsign(const nir_alu_instr& instr); + bool emit_create_vec(const nir_alu_instr& instr, unsigned nc); + bool emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all); + bool emit_any_iequal(const nir_alu_instr& instr, unsigned nc); + + bool emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all); + bool emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all); + + bool emit_fdph(const nir_alu_instr &instr); + bool emit_discard_if(const nir_intrinsic_instr *instr); + + bool emit_find_msb(const nir_alu_instr& instr, bool sgn); + bool emit_b2i32(const nir_alu_instr& instr); + bool emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op); + bool emit_pack_64_2x32_split(const nir_alu_instr& instr); + bool emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp); + + bool emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op, bool fine); + bool emit_bitfield_extract(const nir_alu_instr& instr, EAluOp opcode); + bool emit_bitfield_insert(const nir_alu_instr& instr); + bool emit_unpack_32_2x16_split_y(const nir_alu_instr& instr); + bool emit_unpack_32_2x16_split_x(const nir_alu_instr& instr); + bool emit_pack_32_2x16_split(const nir_alu_instr& instr); + +private: + void make_last(AluInstruction *ir) const; + void split_alu_modifiers(const nir_alu_src &src, GPRVector::Values& s, GPRVector::Values& v, int ncomp); + + using vreg = std::array; + +}; + +inline void EmitAluInstruction::make_last(AluInstruction *ir) const +{ + if (ir) + ir->set_flag(alu_last_instr); +} + +} + +#endif // SFN_EMITALUINSTRUCTION_H diff --git a/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp b/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp new file mode 100644 index 00000000000..e433cd1255e --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp @@ -0,0 +1,157 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_emitinstruction.h" + +#include "sfn_shader_base.h" + +namespace r600 { + +EmitInstruction::EmitInstruction(ShaderFromNirProcessor& processor): + m_proc(processor) +{ + +} + +EmitInstruction::~EmitInstruction() +{ +} + +bool EmitInstruction::emit(nir_instr* instr) +{ + return do_emit(instr); +} + +PValue EmitInstruction::from_nir(const nir_src& v, unsigned component, unsigned swizzled) +{ + return m_proc.from_nir(v, component, swizzled); +} + +PValue EmitInstruction::from_nir(const nir_alu_src& v, unsigned component) +{ + return m_proc.from_nir(v, component); +} + +PValue EmitInstruction::from_nir(const nir_tex_src& v, unsigned component) +{ + return m_proc.from_nir(v, component); +} + +PValue EmitInstruction::from_nir(const nir_alu_dest& v, unsigned component) +{ + return m_proc.from_nir(v, component); +} + +PValue EmitInstruction::from_nir(const nir_dest& v, unsigned component) +{ + return m_proc.from_nir(v, component); +} + +PValue EmitInstruction::from_nir(const nir_src& v, unsigned component) +{ + return m_proc.from_nir(v, component); +} + +void EmitInstruction::emit_instruction(Instruction *ir) +{ + return m_proc.emit_instruction(ir); +} + +bool EmitInstruction::emit_instruction(EAluOp opcode, PValue dest, + std::vector src0, + const std::set& m_flags) +{ + return m_proc.emit_instruction(opcode, dest,src0, m_flags); +} + +const nir_variable * +EmitInstruction::get_deref_location(const nir_src& v) const +{ + return m_proc.get_deref_location(v); +} + +PValue EmitInstruction::from_nir_with_fetch_constant(const nir_src& src, unsigned component) +{ + return m_proc.from_nir_with_fetch_constant(src, component); +} + +GPRVector *EmitInstruction::vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask, + const GPRVector::Swizzle& swizzle) +{ + return m_proc.vec_from_nir_with_fetch_constant(src, mask, swizzle); +} + +void EmitInstruction::load_uniform(const nir_alu_src& src) +{ + m_proc.load_uniform(src); +} + +int EmitInstruction::lookup_register_index(const nir_src& src) const +{ + return m_proc.lookup_register_index(src); +} + +int EmitInstruction::allocate_temp_register() +{ + return m_proc.allocate_temp_register(); +} + +int EmitInstruction::lookup_register_index(const nir_dest& dst) +{ + return m_proc.lookup_register_index(dst); +} + +const nir_load_const_instr* +EmitInstruction::get_literal_register(const nir_src& src) const +{ + if (src.is_ssa) + return m_proc.get_literal_constant(src.ssa->index); + else + return nullptr; +} + +PValue EmitInstruction::get_temp_register() +{ + return m_proc.get_temp_register(); +} + +GPRVector EmitInstruction::get_temp_vec4() +{ + return m_proc.get_temp_vec4(); +} + +PValue EmitInstruction::create_register_from_nir_src(const nir_src& src, unsigned swizzle) +{ + return m_proc.create_register_from_nir_src(src, swizzle); +} + +const std::set EmitInstruction::empty = {}; +const std::set EmitInstruction::write = {alu_write}; +const std::set EmitInstruction::last_write = {alu_write, alu_last_instr}; +const std::set EmitInstruction::last = {alu_last_instr}; + +} + diff --git a/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h b/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h new file mode 100644 index 00000000000..cfec84a959d --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h @@ -0,0 +1,96 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef EMITINSTRUCTION_H +#define EMITINSTRUCTION_H + +#include "compiler/nir/nir.h" +#include "sfn_value.h" +#include "sfn_instruction_alu.h" + +namespace r600 { + +class ShaderFromNirProcessor; + +class EmitInstruction +{ +public: + EmitInstruction(ShaderFromNirProcessor& processor); + virtual ~EmitInstruction(); + bool emit(nir_instr* instr); + + static const std::set empty; + static const std::set write; + static const std::set last_write; + static const std::set last; + +protected: + virtual bool do_emit(nir_instr* instr) = 0; + + // forwards from ValuePool + PValue from_nir(const nir_src& v, unsigned component, unsigned swizzled); + PValue from_nir(const nir_src& v, unsigned component); + PValue from_nir(const nir_alu_src& v, unsigned component); + PValue from_nir(const nir_tex_src& v, unsigned component); + PValue from_nir(const nir_alu_dest& v, unsigned component); + PValue from_nir(const nir_dest& v, unsigned component); + + const nir_load_const_instr* get_literal_register(const nir_src& src) const; + + int lookup_register_index(const nir_src& src) const; + int lookup_register_index(const nir_dest& dst); + PValue create_register_from_nir_src(const nir_src& src, unsigned comp); + + int allocate_temp_register(); + + PValue get_temp_register(); + GPRVector get_temp_vec4(); + + // forwards from ShaderFromNirProcessor + void emit_instruction(Instruction *ir); + bool emit_instruction(EAluOp opcode, PValue dest, + std::vector src0, + const std::set& m_flags); + + PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component); + GPRVector *vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask, + const GPRVector::Swizzle& swizzle); + + void load_uniform(const nir_alu_src& src); + const nir_variable *get_deref_location(const nir_src& v) const; + + + +private: + + ShaderFromNirProcessor& m_proc; +}; + +} + + + +#endif // EMITINSTRUCTION_H diff --git a/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp b/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp new file mode 100644 index 00000000000..4e43becc8be --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp @@ -0,0 +1,974 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_emittexinstruction.h" +#include "sfn_shader_base.h" +#include "sfn_instruction_fetch.h" + +namespace r600 { + +EmitTexInstruction::EmitTexInstruction(ShaderFromNirProcessor &processor): + EmitInstruction (processor) +{ +} + +bool EmitTexInstruction::do_emit(nir_instr* instr) +{ + nir_tex_instr* ir = nir_instr_as_tex(instr); + + TexInputs src; + if (!get_inputs(*ir, src)) + return false; + + if (ir->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { + switch (ir->op) { + case nir_texop_tex: + return emit_cube_tex(ir, src); + case nir_texop_txf: + return emit_cube_txf(ir, src); + case nir_texop_txb: + return emit_cube_txb(ir, src); + case nir_texop_txl: + return emit_cube_txl(ir, src); + case nir_texop_txs: + return emit_tex_txs(ir, src, {0,1,2,3}); + case nir_texop_txd: + return emit_cube_txd(ir, src); + case nir_texop_lod: + return emit_cube_lod(ir, src); + case nir_texop_tg4: + return emit_cube_tg4(ir, src); + case nir_texop_query_levels: + return emit_tex_txs(ir, src, {3,7,7,7}); + default: + return false; + } + } else if (ir->sampler_dim == GLSL_SAMPLER_DIM_BUF) { + switch (ir->op) { + case nir_texop_txf: + return emit_buf_txf(ir, src); + case nir_texop_txs: + return emit_tex_txs(ir, src, {0,1,2,3}); + default: + return false; + } + } else { + switch (ir->op) { + case nir_texop_tex: + return emit_tex_tex(ir, src); + case nir_texop_txf: + return emit_tex_txf(ir, src); + case nir_texop_txb: + return emit_tex_txb(ir, src); + case nir_texop_txl: + return emit_tex_txl(ir, src); + case nir_texop_txd: + return emit_tex_txd(ir, src); + case nir_texop_txs: + return emit_tex_txs(ir, src, {0,1,2,3}); + case nir_texop_lod: + return emit_tex_lod(ir, src); + case nir_texop_tg4: + return emit_tex_tg4(ir, src); + case nir_texop_txf_ms: + return emit_tex_txf_ms(ir, src); + case nir_texop_query_levels: + return emit_tex_txs(ir, src, {3,7,7,7}); + default: + return false; + } + } +} + +bool EmitTexInstruction::emit_cube_txf(UNUSED nir_tex_instr* instr, UNUSED TexInputs &src) +{ + return false; +} + +bool EmitTexInstruction::emit_cube_txd(nir_tex_instr* instr, TexInputs& tex_src) +{ + + assert(instr->src[0].src.is_ssa); + + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast(instr) + << "' (" << __func__ << ")\n"; + + auto tex_op = TexInstruction::sample_g; + + std::array v; + for (int i = 0; i < 4; ++i) + v[i] = from_nir(instr->dest, i); + + GPRVector cubed(v); + emit_cube_prep(tex_src.coord, cubed, instr->is_array); + + std::array dst_elms; + std::array src_elms; + + const uint16_t lookup[4] = {1, 0, 3, 2}; + for (uint16_t i = 0; i < 4; ++i) { + dst_elms[i] = v[i]; + src_elms[i] = cubed.reg_i(lookup[i]); + } + + GPRVector empty_dst(0, {7,7,7,7}); + + if (instr->is_shadow) { + emit_instruction(new AluInstruction(op1_mov, src_elms[3], tex_src.comperator, + {alu_last_instr, alu_write})); + tex_op = TexInstruction::sample_c_g; + } + + + PValue half(new LiteralValue(0.5f)); + for (int i = 0; i < 3; ++i) { + emit_instruction(new AluInstruction(op2_mul_ieee, tex_src.ddx.reg_i(i), {tex_src.ddx.reg_i(i), half}, + {alu_last_instr, alu_write})); + } + for (int i = 0; i < 3; ++i) { + emit_instruction(new AluInstruction(op2_mul_ieee, tex_src.ddy.reg_i(i), {tex_src.ddy.reg_i(i), half}, + {alu_last_instr, alu_write})); + } + + auto sampler = get_samplerr_id(instr->sampler_index, tex_src.sampler_deref); + assert(!sampler.indirect); + + TexInstruction *irgh = new TexInstruction(TexInstruction::set_gradient_h, empty_dst, tex_src.ddx, + sampler.id, sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset); + irgh->set_dest_swizzle({7,7,7,7}); + + TexInstruction *irgv = new TexInstruction(TexInstruction::set_gradient_v, empty_dst, tex_src.ddy, + sampler.id, sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset); + irgv->set_dest_swizzle({7,7,7,7}); + + GPRVector dst(dst_elms); + GPRVector src(src_elms); + TexInstruction *ir = new TexInstruction(tex_op, dst, src, instr->sampler_index, + sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset); + + set_rect_coordinate_flags(instr, ir); + //set_offsets(ir, tex_src.offset); + + emit_instruction(irgh); + emit_instruction(irgv); + emit_instruction(ir); + return true; +} + + +bool EmitTexInstruction::emit_cube_txl(nir_tex_instr* instr, TexInputs& tex_src) +{ + assert(instr->src[0].src.is_ssa); + + if (instr->is_shadow) + return false; + + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast(instr) + << "' (" << __func__ << ")\n"; + + std::array v; + for (int i = 0; i < 4; ++i) + v[i] = from_nir(instr->dest, i); + + GPRVector cubed(v); + emit_cube_prep(tex_src.coord, cubed, instr->is_array); + + std::array dst_elms; + std::array src_elms; + + const uint16_t lookup[4] = {1, 0, 3, 2}; + for (uint16_t i = 0; i < 4; ++i) { + dst_elms[i] = v[i]; + src_elms[i] = cubed.reg_i(lookup[i]); + } + + auto *ir = new AluInstruction(op1_mov, src_elms[3], tex_src.lod, + {alu_last_instr, alu_write}); + emit_instruction(ir); + + GPRVector src(src_elms); + GPRVector dst(dst_elms); + + auto sampler = get_samplerr_id(instr->sampler_index, tex_src.sampler_deref); + assert(!sampler.indirect); + + auto tir = new TexInstruction(TexInstruction::sample_l, dst, src, + sampler.id,sampler.id + R600_MAX_CONST_BUFFERS, + tex_src.sampler_offset); + + if (instr->is_array) + tir->set_flag(TexInstruction::z_unnormalized); + + emit_instruction(tir); + return true; +} + +bool EmitTexInstruction::emit_cube_lod(nir_tex_instr* instr, TexInputs& src) +{ + auto tex_op = TexInstruction::get_tex_lod; + + std::array v; + for (int i = 0; i < 4; ++i) + v[i] = from_nir(instr->dest, i); + + GPRVector cubed(v); + emit_cube_prep(src.coord, cubed, instr->is_array); + + auto sampler = get_samplerr_id(instr->sampler_index, src.sampler_deref); + assert(!sampler.indirect); + + auto dst = make_dest(*instr); + auto irt = new TexInstruction(tex_op, dst, cubed, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, + src.sampler_offset); + + emit_instruction(irt); + return true; + +} + + +bool EmitTexInstruction::emit_cube_txb(nir_tex_instr* instr, TexInputs& tex_src) +{ + assert(instr->src[0].src.is_ssa); + + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast(instr) + << "' (" << __func__ << ")\n"; + + std::array v; + for (int i = 0; i < 4; ++i) + v[i] = from_nir(instr->dest, i); + + GPRVector cubed(v); + emit_cube_prep(tex_src.coord, cubed, instr->is_array); + + std::array dst_elms; + std::array src_elms; + + const uint16_t lookup[4] = {1, 0, 3, 2}; + for (uint16_t i = 0; i < 4; ++i) { + dst_elms[i] = v[i]; + src_elms[i] = v[lookup[i]]; + } + + GPRVector src(src_elms); + GPRVector dst(dst_elms); + + auto tex_op = TexInstruction::sample_lb; + if (!instr->is_shadow) { + emit_instruction(new AluInstruction(op1_mov, src_elms[3], tex_src.bias, + {alu_last_instr, alu_write})); + } else { + emit_instruction(new AluInstruction(op1_mov, src_elms[3], tex_src.comperator, + {alu_last_instr, alu_write})); + tex_op = TexInstruction::sample_c_lb; + } + + auto sampler = get_samplerr_id(instr->sampler_index, tex_src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + auto tir = new TexInstruction(tex_op, dst, src, + sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset); + emit_instruction(tir); + return true; + +} + +bool EmitTexInstruction::emit_cube_tex(nir_tex_instr* instr, TexInputs& tex_src) +{ + std::array v; + for (int i = 0; i < 4; ++i) + v[i] = from_nir(instr->dest, i); + + auto tex_op = TexInstruction::sample; + GPRVector cubed(v); + emit_cube_prep(tex_src.coord, cubed, instr->is_array); + + std::array dst_elms; + std::array src_elms; + + const uint16_t lookup[4] = {1, 0, 3, 2}; + for (uint16_t i = 0; i < 4; ++i) { + dst_elms[i] = v[i]; + src_elms[i] = v[lookup[i]]; + } + + if (instr->is_shadow) { + emit_instruction(new AluInstruction(op1_mov, src_elms[3], tex_src.comperator, + {alu_last_instr, alu_write})); + tex_op = TexInstruction::sample_c; + } + + GPRVector dst(dst_elms); + GPRVector src(src_elms); + + auto sampler = get_samplerr_id(instr->sampler_index, tex_src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + auto tir = new TexInstruction(tex_op, dst, src, + sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset); + if (instr->is_array) + tir->set_flag(TexInstruction::z_unnormalized); + + emit_instruction(tir); + return true; + +} + +bool EmitTexInstruction::emit_cube_prep(const GPRVector& coord, GPRVector& cubed, bool is_array) +{ + AluInstruction *ir = nullptr; + const uint16_t src0_chan[4] = {2, 2, 0, 1}; + const uint16_t src1_chan[4] = {1, 0, 2, 2}; + + for (int i = 0; i < 4; ++i) { + ir = new AluInstruction(op2_cube, cubed.reg_i(i), coord.reg_i(src0_chan[i]), + coord.reg_i(src1_chan[i]), {alu_write}); + + emit_instruction(ir); + } + ir->set_flag(alu_last_instr); + + ir = new AluInstruction(op1_recip_ieee, cubed.reg_i(2), cubed.reg_i(2), {alu_write, alu_last_instr}); + ir->set_flag(alu_src0_abs); + emit_instruction(ir); + + PValue one_p_5(new LiteralValue(1.5f)); + for (int i = 0; i < 2; ++i) { + ir = new AluInstruction(op3_muladd, cubed.reg_i(i), cubed.reg_i(i), cubed.reg_i(2), + one_p_5, {alu_write}); + emit_instruction(ir); + } + ir->set_flag(alu_last_instr); + + if (is_array) { + auto face = cubed.reg_i(3); + PValue array_index = get_temp_register(); + + ir = new AluInstruction(op1_rndne, array_index, coord.reg_i(3), {alu_write, alu_last_instr}); + emit_instruction(ir); + + ir = new AluInstruction(op2_max, array_index, {array_index, Value::zero}, {alu_write, alu_last_instr}); + emit_instruction(ir); + + ir = new AluInstruction(op3_muladd, face, {array_index, PValue (new LiteralValue(8.0f)), face}, + {alu_write, alu_last_instr}); + emit_instruction(ir); + } + + return true; +} + +bool EmitTexInstruction::emit_buf_txf(nir_tex_instr* instr, TexInputs &src) +{ + auto dst = make_dest(*instr); + + auto ir = new FetchInstruction(vc_fetch, no_index_offset, dst, src.coord.reg_i(0), 0, + instr->texture_index + R600_MAX_CONST_BUFFERS, + PValue(), bim_none); + ir->set_flag(vtx_use_const_field); + emit_instruction(ir); + return true; +} + +bool EmitTexInstruction::emit_tex_tex(nir_tex_instr* instr, TexInputs& src) +{ + + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast(instr) + << "' (" << __func__ << ")\n"; + + auto tex_op = TexInstruction::sample; + + auto sampler = get_samplerr_id(instr->sampler_index, src.sampler_deref); + assert(!sampler.indirect); + + if (instr->is_shadow) { + emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator, + {alu_last_instr, alu_write})); + tex_op = TexInstruction::sample_c; + } + + auto dst = make_dest(*instr); + auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + if (instr->is_array) + handle_array_index(*instr, src.coord, irt); + + set_rect_coordinate_flags(instr, irt); + set_offsets(irt, src.offset); + + emit_instruction(irt); + return true; +} + +bool EmitTexInstruction::emit_tex_txd(nir_tex_instr* instr, TexInputs& src) +{ + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast(instr) + << "' (" << __func__ << ")\n"; + + auto tex_op = TexInstruction::sample_g; + auto dst = make_dest(*instr); + + GPRVector empty_dst(0,{7,7,7,7}); + + if (instr->is_shadow) { + emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator, + {alu_last_instr, alu_write})); + tex_op = TexInstruction::sample_c_g; + } + + auto sampler = get_samplerr_id(instr->sampler_index, src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + TexInstruction *irgh = new TexInstruction(TexInstruction::set_gradient_h, empty_dst, src.ddx, + sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + irgh->set_dest_swizzle({7,7,7,7}); + + TexInstruction *irgv = new TexInstruction(TexInstruction::set_gradient_v, empty_dst, src.ddy, + sampler.id, sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + irgv->set_dest_swizzle({7,7,7,7}); + + TexInstruction *ir = new TexInstruction(tex_op, dst, src.coord, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + if (instr->is_array) + handle_array_index(*instr, src.coord, ir); + + set_rect_coordinate_flags(instr, ir); + set_offsets(ir, src.offset); + + emit_instruction(irgh); + emit_instruction(irgv); + emit_instruction(ir); + return true; +} + +bool EmitTexInstruction::emit_tex_txf(nir_tex_instr* instr, TexInputs& src) +{ + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast(instr) + << "' (" << __func__ << ")\n"; + + auto dst = make_dest(*instr); + + emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, {alu_write, alu_last_instr})); + + auto sampler = get_samplerr_id(instr->sampler_index, src.sampler_deref); + assert(!sampler.indirect); + + /* txf doesn't need rounding for the array index, but 1D has the array index + * in the z component */ + if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D) + src.coord.set_reg_i(2, src.coord.reg_i(1)); + + auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord, + sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + + + if (src.offset) { + assert(src.offset->is_ssa); + AluInstruction *ir = nullptr; + for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) { + ir = new AluInstruction(op2_add_int, src.coord.reg_i(i), + {src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write}); + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); + } + + emit_instruction(tex_ir); + return true; +} + +bool EmitTexInstruction::emit_tex_lod(nir_tex_instr* instr, TexInputs& src) +{ + auto tex_op = TexInstruction::get_tex_lod; + + auto sampler = get_samplerr_id(instr->sampler_index, src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + auto dst = make_dest(*instr); + auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + irt->set_dest_swizzle({1,0,7,7}); + emit_instruction(irt); + return true; + +} + +bool EmitTexInstruction::emit_tex_txl(nir_tex_instr* instr, TexInputs& src) +{ + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast(instr) + << "' (" << __func__ << ")\n"; + + auto tex_op = TexInstruction::sample_l; + emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, + {alu_last_instr, alu_write})); + + if (instr->is_shadow) { + emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator, + {alu_last_instr, alu_write})); + tex_op = TexInstruction::sample_c_l; + } + + auto sampler = get_samplerr_id(instr->sampler_index, src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + auto dst = make_dest(*instr); + auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + + if (instr->is_array) + handle_array_index(*instr, src.coord, irt); + + set_rect_coordinate_flags(instr, irt); + set_offsets(irt, src.offset); + + emit_instruction(irt); + return true; +} + +bool EmitTexInstruction::emit_tex_txb(nir_tex_instr* instr, TexInputs& src) +{ + auto tex_op = TexInstruction::sample_lb; + + std::array in_swizzle = {0,1,2,3}; + + emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.bias, + {alu_last_instr, alu_write})); + + if (instr->is_shadow) { + emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator, + {alu_last_instr, alu_write})); + tex_op = TexInstruction::sample_c_lb; + } + + GPRVector tex_src(src.coord, in_swizzle); + + auto sampler = get_samplerr_id(instr->sampler_index, src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + auto dst = make_dest(*instr); + auto irt = new TexInstruction(tex_op, dst, tex_src, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + if (instr->is_array) + handle_array_index(*instr, tex_src, irt); + + set_rect_coordinate_flags(instr, irt); + set_offsets(irt, src.offset); + + emit_instruction(irt); + return true; +} + +bool EmitTexInstruction::emit_tex_txs(nir_tex_instr* instr, TexInputs& tex_src, + const std::array& dest_swz) +{ + std::array dst_elms; + std::array src_elms; + + for (uint16_t i = 0; i < 4; ++i) { + dst_elms[i] = from_nir(instr->dest, (i < instr->dest.ssa.num_components) ? i : 7); + } + + GPRVector dst(dst_elms); + + if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { + emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)), + instr->sampler_index + R600_MAX_CONST_BUFFERS, + bim_none)); + } else { + for (uint16_t i = 0; i < 4; ++i) + src_elms[i] = tex_src.lod; + GPRVector src(src_elms); + + auto sampler = get_samplerr_id(instr->sampler_index, tex_src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + auto ir = new TexInstruction(TexInstruction::get_resinfo, dst, src, + sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset); + ir->set_dest_swizzle(dest_swz); + emit_instruction(ir); + } + + return true; + +} + +bool EmitTexInstruction::emit_tex_tg4(nir_tex_instr* instr, TexInputs& src) +{ + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast(instr) + << "' (" << __func__ << ")\n"; + + auto tex_op = TexInstruction::gather4; + + if (instr->is_shadow) { + emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator, + {alu_last_instr, alu_write})); + tex_op = TexInstruction::gather4_c; + } + + auto sampler = get_samplerr_id(instr->sampler_index, src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + bool literal_offset = false; + if (src.offset) { + literal_offset = src.offset->is_ssa && get_literal_register(*src.offset); + r600::sfn_log << SfnLog::tex << " really have offsets and they are " << + (literal_offset ? "literal" : "varying") << + "\n"; + + if (!literal_offset) { + GPRVector::Swizzle swizzle = {4,4,4,4}; + for (unsigned i = 0; i < instr->coord_components; ++i) + swizzle[i] = i; + + std::unique_ptr ofs(vec_from_nir_with_fetch_constant(*src.offset, + ( 1 << instr->coord_components) -1, + swizzle)); + GPRVector dummy(0, {7,7,7,7}); + tex_op = (tex_op == TexInstruction::gather4_c) ? + TexInstruction::gather4_c_o : TexInstruction::gather4_o; + + auto set_ofs = new TexInstruction(TexInstruction::set_offsets, dummy, + *ofs, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + set_ofs->set_dest_swizzle({7,7,7,7}); + emit_instruction(set_ofs); + } + } + + + /* pre CAYMAN needs swizzle */ + auto dst = make_dest(*instr); + auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + + irt->set_dest_swizzle({1,2,0,3}); + irt->set_gather_comp(instr->component); + + if (instr->is_array) + handle_array_index(*instr, src.coord, irt); + + if (literal_offset) { + r600::sfn_log << SfnLog::tex << "emit literal offsets\n"; + set_offsets(irt, src.offset); + } + + set_rect_coordinate_flags(instr, irt); + + emit_instruction(irt); + return true; +} + +bool EmitTexInstruction::emit_cube_tg4(nir_tex_instr* instr, TexInputs& tex_src) +{ + std::array v; + for (int i = 0; i < 4; ++i) + v[i] = from_nir(instr->dest, i); + + auto tex_op = TexInstruction::gather4; + GPRVector cubed(v); + emit_cube_prep(tex_src.coord, cubed, instr->is_array); + + std::array dst_elms; + std::array src_elms; + + const uint16_t lookup[4] = {1, 0, 3, 2}; + for (uint16_t i = 0; i < 4; ++i) { + dst_elms[i] = v[i]; + src_elms[i] = v[lookup[i]]; + } + + if (instr->is_shadow) { + emit_instruction(new AluInstruction(op1_mov, src_elms[3], tex_src.comperator, + {alu_last_instr, alu_write})); + tex_op = TexInstruction::gather4_c; + } + + GPRVector dst(dst_elms); + GPRVector src(src_elms); + + auto sampler = get_samplerr_id(instr->sampler_index, tex_src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + auto tir = new TexInstruction(tex_op, dst, src, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset); + + tir->set_gather_comp(instr->component); + + tir->set_dest_swizzle({1, 2, 0, 3}); + + if (instr->is_array) + tir->set_flag(TexInstruction::z_unnormalized); + + emit_instruction(tir); + return true; +} + +bool EmitTexInstruction::emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src) +{ + assert(instr->src[0].src.is_ssa); + + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast(instr) + << "' (" << __func__ << ")\n"; + + auto sampler = get_samplerr_id(instr->sampler_index, src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + int sample_id = allocate_temp_register(); + + GPRVector sample_id_dest(sample_id, {0,7,7,7}); + PValue help(new GPRValue(sample_id, 1)); + + /* FIXME: Texture destination registers must be handled differently, + * because the swizzle identfies which source componnet has to be written + * at a certain position, and the target register is actually different. + * At this point we just add a helper register, but for later work (scheduling + * and optimization on the r600 IR level, this needs to be implemented + * differently */ + + + emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), + src.ms_index, + {alu_write, alu_last_instr})); + + auto tex_sample_id_ir = new TexInstruction(TexInstruction::ld, sample_id_dest, src.coord, + sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + tex_sample_id_ir->set_flag(TexInstruction::x_unnormalized); + tex_sample_id_ir->set_flag(TexInstruction::y_unnormalized); + tex_sample_id_ir->set_flag(TexInstruction::z_unnormalized); + tex_sample_id_ir->set_flag(TexInstruction::w_unnormalized); + tex_sample_id_ir->set_inst_mode(1); + + emit_instruction(tex_sample_id_ir); + + emit_instruction(new AluInstruction(op2_mullo_int, help, + {src.ms_index, PValue(new LiteralValue(4))}, + {alu_write, alu_last_instr})); + + emit_instruction(new AluInstruction(op2_lshr_int, src.coord.reg_i(3), + {sample_id_dest.reg_i(0), help}, + {alu_write, alu_last_instr})); + + emit_instruction(new AluInstruction(op2_and_int, src.coord.reg_i(3), + {src.coord.reg_i(3), PValue(new LiteralValue(15))}, + {alu_write, alu_last_instr})); + + auto dst = make_dest(*instr); + + /* txf doesn't need rounding for the array index, but 1D has the array index + * in the z component */ + if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D) + src.coord.set_reg_i(2, src.coord.reg_i(1)); + + auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord, + sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + + + if (src.offset) { + assert(src.offset->is_ssa); + AluInstruction *ir = nullptr; + for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) { + ir = new AluInstruction(op2_add_int, src.coord.reg_i(i), + {src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write}); + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); + } + + emit_instruction(tex_ir); + return true; +} + +bool EmitTexInstruction::get_inputs(const nir_tex_instr& instr, TexInputs &src) +{ + sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components << " components\n"; + + unsigned grad_components = instr.coord_components; + if (instr.is_array) + --grad_components; + + + src.offset = nullptr; + bool retval = true; + for (unsigned i = 0; i < instr.num_srcs; ++i) { + switch (instr.src[i].src_type) { + case nir_tex_src_bias: + src.bias = from_nir(instr.src[i], 0); + break; + + case nir_tex_src_coord: { + std::unique_ptr coord(vec_from_nir_with_fetch_constant(instr.src[i].src, + (1 << instr.coord_components) - 1, + {0,1,2,3})); + src.coord = *coord; + + } break; + case nir_tex_src_comparator: + src.comperator = from_nir(instr.src[i], 0); + break; + case nir_tex_src_ddx: { + sfn_log << SfnLog::tex << "Get DDX "; + std::unique_ptr coord(vec_from_nir_with_fetch_constant(instr.src[i].src, + (1 << grad_components) - 1, + swizzle_from_mask(grad_components))); + src.ddx = *coord; + sfn_log << SfnLog::tex << src.ddx << "\n"; + } break; + case nir_tex_src_ddy:{ + sfn_log << SfnLog::tex << "Get DDY "; + std::unique_ptr coord(vec_from_nir_with_fetch_constant(instr.src[i].src, + (1 << grad_components) - 1, + swizzle_from_mask(grad_components))); + src.ddy = *coord; + sfn_log << SfnLog::tex << src.ddy << "\n"; + } break; + case nir_tex_src_lod: + src.lod = from_nir_with_fetch_constant(instr.src[i].src, 0); + break; + case nir_tex_src_offset: + sfn_log << SfnLog::tex << " -- Find offset\n"; + src.offset = &instr.src[i].src; + break; + case nir_tex_src_sampler_deref: + src.sampler_deref = get_deref_location(instr.src[i].src); + break; + case nir_tex_src_texture_deref: + src.texture_deref = get_deref_location(instr.src[i].src); + break; + case nir_tex_src_ms_index: + src.ms_index = from_nir(instr.src[i], 0); + break; + case nir_tex_src_texture_offset: + src.texture_offset = from_nir(instr.src[i], 0); + break; + case nir_tex_src_sampler_offset: + src.sampler_offset = from_nir(instr.src[i], 0); + break; + case nir_tex_src_plane: + case nir_tex_src_projector: + case nir_tex_src_min_lod: + case nir_tex_src_ms_mcs: + default: + sfn_log << SfnLog::tex << "Texture source type " << instr.src[i].src_type << " not supported\n"; + retval = false; + } + } + return retval; +} + +GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr) +{ + int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components : + instr.dest.reg.reg->num_components; + std::array dst_elms; + for (uint16_t i = 0; i < 4; ++i) + dst_elms[i] = from_nir(instr.dest, (i < num_dest_components) ? i : 7); + return GPRVector(dst_elms); +} + + +GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr, + const std::array& swizzle) +{ + int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components : + instr.dest.reg.reg->num_components; + std::array dst_elms; + for (uint16_t i = 0; i < 4; ++i) { + int k = swizzle[i]; + dst_elms[i] = from_nir(instr.dest, (k < num_dest_components) ? k : 7); + } + return GPRVector(dst_elms); +} + +void EmitTexInstruction::set_rect_coordinate_flags(nir_tex_instr* instr, + TexInstruction* ir) const +{ + if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) { + ir->set_flag(TexInstruction::x_unnormalized); + ir->set_flag(TexInstruction::y_unnormalized); + } +} + +void EmitTexInstruction::set_offsets(TexInstruction* ir, nir_src *offset) +{ + if (!offset) + return; + + assert(offset->is_ssa); + auto literal = get_literal_register(*offset); + assert(literal); + + for (int i = 0; i < offset->ssa->num_components; ++i) { + ir->set_offset(i, literal->value[i].i32); + } +} + +void EmitTexInstruction::handle_array_index(const nir_tex_instr& instr, const GPRVector& src, TexInstruction *ir) +{ + int src_idx = instr.sampler_dim == GLSL_SAMPLER_DIM_1D ? 1 : 2; + emit_instruction(new AluInstruction(op1_rndne, src.reg_i(2), src.reg_i(src_idx), + {alu_last_instr, alu_write})); + ir->set_flag(TexInstruction::z_unnormalized); +} + +EmitTexInstruction::SamplerId +EmitTexInstruction::get_samplerr_id(int sampler_id, const nir_variable *deref) +{ + EmitTexInstruction::SamplerId result = {sampler_id, false}; + + if (deref) { + assert(glsl_type_is_sampler(deref->type)); + result.id = deref->data.binding; + } + return result; +} + +EmitTexInstruction::TexInputs::TexInputs(): + sampler_deref(nullptr), + texture_deref(nullptr), + offset(nullptr) +{ +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h b/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h new file mode 100644 index 00000000000..5c998b99b45 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h @@ -0,0 +1,104 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_EMITTEXINSTRUCTION_H +#define SFN_EMITTEXINSTRUCTION_H + +#include "sfn_emitinstruction.h" +#include "sfn_instruction_tex.h" + +namespace r600 { + +class EmitTexInstruction : public EmitInstruction +{ +public: + EmitTexInstruction(ShaderFromNirProcessor& processor); + +private: + struct TexInputs { + TexInputs(); + const nir_variable *sampler_deref; + const nir_variable *texture_deref; + GPRVector coord; + PValue bias; + PValue comperator; + PValue lod; + GPRVector ddx; + GPRVector ddy; + nir_src *offset; + PValue gather_comp; + PValue ms_index; + PValue sampler_offset; + PValue texture_offset; + }; + + + bool emit_cube_tex(nir_tex_instr* instr, TexInputs& src); + bool emit_cube_txf(nir_tex_instr* instr, TexInputs& src); + bool emit_cube_txb(nir_tex_instr* instr, TexInputs& src); + bool emit_cube_txl(nir_tex_instr* instr, TexInputs& src); + bool emit_cube_txd(nir_tex_instr* instr, TexInputs& src); + bool emit_cube_lod(nir_tex_instr* instr, TexInputs& src); + bool emit_cube_tg4(nir_tex_instr* instr, TexInputs& src); + bool emit_cube_prep(const GPRVector& coord, GPRVector& cubed, bool is_array); + + bool emit_tex_tex(nir_tex_instr* instr, TexInputs& src); + + bool emit_tex_txf(nir_tex_instr* instr, TexInputs &src); + bool emit_tex_txb(nir_tex_instr* instr, TexInputs& src); + bool emit_tex_txd(nir_tex_instr* instr, TexInputs& src); + bool emit_tex_txl(nir_tex_instr* instr, TexInputs& src); + bool emit_tex_txs(nir_tex_instr* instr, TexInputs& src, + const std::array &dest_swz); + bool emit_tex_lod(nir_tex_instr* instr, TexInputs& src); + bool emit_tex_tg4(nir_tex_instr* instr, TexInputs& src); + bool emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src); + bool emit_buf_txf(nir_tex_instr* instr, TexInputs& src); + + bool get_inputs(const nir_tex_instr& instr, TexInputs &src); + + void set_rect_coordinate_flags(nir_tex_instr* instr, TexInstruction* ir) const; + + bool do_emit(nir_instr* instr) override; + + GPRVector make_dest(nir_tex_instr& instr); + GPRVector make_dest(nir_tex_instr &instr, const std::array &swizzle); + + void set_offsets(TexInstruction* ir, nir_src *offset); + void handle_array_index(const nir_tex_instr& instr, const GPRVector &src, TexInstruction* ir); + + struct SamplerId { + int id; + bool indirect; + }; + + SamplerId get_samplerr_id(int sampler_id, const nir_variable *deref); + +}; + +} + +#endif // SFN_EMITTEXINSTRUCTION_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp new file mode 100644 index 00000000000..dffc39b9164 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp @@ -0,0 +1,152 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_instruction_alu.h" +#include "sfn_valuepool.h" + +namespace r600 { + +const AluModifiers AluInstruction::src_abs_flags[2] = + {alu_src0_abs, alu_src1_abs}; +const AluModifiers AluInstruction::src_neg_flags[3] = + {alu_src0_neg, alu_src1_neg, alu_src2_neg}; +const AluModifiers AluInstruction::src_rel_flags[3] = + {alu_src0_rel, alu_src1_rel, alu_src2_rel}; + +AluInstruction::AluInstruction(EAluOp opcode): + Instruction (Instruction::alu), + m_opcode(opcode), + m_src(alu_ops.at(opcode).nsrc), + m_bank_swizzle(alu_vec_unknown), + m_cf_type(cf_alu) +{ + if (alu_ops.at(opcode).nsrc == 3) + m_flags.set(alu_op3); +} + +AluInstruction::AluInstruction(EAluOp opcode, PValue dest, + std::vector src, + const std::set& flags): + Instruction (Instruction::alu), + m_opcode(opcode), + m_dest(dest), + m_bank_swizzle(alu_vec_unknown), + m_cf_type(cf_alu) +{ + m_src.swap(src); + for (auto f : flags) + m_flags.set(f); + + if (alu_ops.at(opcode).nsrc == 3) + m_flags.set(alu_op3); +} + +AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0, + const std::set& flags): + AluInstruction(opcode, dest, std::vector{src0}, flags) +{ +} + +AluInstruction::AluInstruction(EAluOp opcode, PValue dest, + PValue src0, PValue src1, + const std::set &m_flags): + AluInstruction(opcode, dest, {src0, src1}, m_flags) +{ +} + +AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0, + PValue src1, PValue src2, + const std::set &flags): + AluInstruction(opcode, dest, {src0, src1, src2}, flags) +{ +} + +bool AluInstruction::is_equal_to(const Instruction& lhs) const +{ + assert(lhs.type() == alu); + const auto& oth = static_cast(lhs); + + if (m_opcode != oth.m_opcode) { + return false; + } + + if (*m_dest != *oth.m_dest) + return false; + + if (m_src.size() != oth.m_src.size()) + return false; + + for (unsigned i = 0; i < m_src.size(); ++i) + if (*m_src[i] != *oth.m_src[i]) { + return false; + } + return (m_flags == oth.m_flags && m_cf_type == oth.m_cf_type); +} + +void AluInstruction::set_flag(AluModifiers flag) +{ + m_flags.set(flag); +} + +void AluInstruction::set_bank_swizzle(AluBankSwizzle bswz) +{ + m_bank_swizzle = bswz; +} + +unsigned AluInstruction::n_sources() const +{ + return m_src.size(); +} + +void AluInstruction::do_print(std::ostream& os) const +{ + os << "ALU " << alu_ops.at(m_opcode).name; + if (m_flags.test(alu_dst_clamp)) + os << "_CLAMP"; + os << ' ' << *m_dest << " : " ; + + for (unsigned i = 0; i < m_src.size(); ++i) { + int pflags = 0; + if (i) + os << ' '; + if (m_flags.test(src_neg_flags[i])) pflags |= Value::PrintFlags::has_neg; + if (m_flags.test(src_rel_flags[i])) pflags |= Value::PrintFlags::is_rel; + if (i < 2) + if (m_flags.test(src_abs_flags[i])) pflags |= Value::PrintFlags::has_abs; + m_src[i]->print(os, Value::PrintFlags(0, pflags)); + } + os << " {"; + os << (m_flags.test(alu_write) ? 'W' : ' '); + os << (m_flags.test(alu_last_instr) ? 'L' : ' '); + os << (m_flags.test(alu_update_exec) ? 'E' : ' '); + os << (m_flags.test(alu_update_pred) ? 'P' : ' '); + os << "}"; + + os << " BS:" << m_bank_swizzle; + os << " CF:" << m_cf_type; +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h b/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h new file mode 100644 index 00000000000..67b641511b7 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h @@ -0,0 +1,138 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef sfn_r600_instruction_alu_h +#define sfn_r600_instruction_alu_h + +#include "sfn_instruction_base.h" +#include "sfn_alu_defines.h" + +namespace r600 { + +enum AluModifiers { + alu_src0_neg, + alu_src0_abs, + alu_src0_rel, + alu_src1_neg, + alu_src1_abs, + alu_src1_rel, + alu_src2_neg, + alu_src2_rel, + alu_dst_clamp, + alu_dst_rel, + alu_last_instr, + alu_update_exec, + alu_update_pred, + alu_write, + alu_op3 +}; + +enum AluDstModifiers { + omod_off = 0, + omod_mul2 = 1, + omod_mul4 = 2, + omod_divl2 = 3 +}; + +enum AluPredSel { + pred_off = 0, + pred_zero = 2, + pred_one = 3 +}; + +enum AluBankSwizzle { + alu_vec_012 = 0, + sq_alu_scl_201 = 0, + alu_vec_021 = 1, + sq_alu_scl_122 = 1, + alu_vec_120 = 2, + sq_alu_scl_212 = 2, + alu_vec_102 = 3, + sq_alu_scl_221 = 3, + alu_vec_201 = 4, + alu_vec_210 = 5, + alu_vec_unknown = 6 +}; + +class AluInstruction : public Instruction { +public: + + static const AluModifiers src_abs_flags[2]; + static const AluModifiers src_neg_flags[3]; + static const AluModifiers src_rel_flags[3]; + + AluInstruction(EAluOp opcode); + AluInstruction(EAluOp opcode, PValue dest, + std::vector src0, + const std::set& m_flags); + + AluInstruction(EAluOp opcode, PValue dest, PValue src0, + const std::set& m_flags); + + AluInstruction(EAluOp opcode, PValue dest, + PValue src0, PValue src1, + const std::set& m_flags); + + AluInstruction(EAluOp opcode, PValue dest, PValue src0, PValue src1, + PValue src2, + const std::set& m_flags); + + + void set_flag(AluModifiers flag); + unsigned n_sources() const; + + PValue dest() {return m_dest;} + EAluOp opcode() const {return m_opcode;} + const Value *dest() const {return m_dest.get();} + Value& src(unsigned i) const {assert(i < m_src.size() && m_src[i]); return *m_src[i];} + PValue *psrc(unsigned i) {assert(i < m_src.size()); return &m_src[i];} + bool is_last() const {return m_flags.test(alu_last_instr);} + bool write() const {return m_flags.test(alu_write);} + bool flag(AluModifiers f) const {return m_flags.test(f);} + void set_bank_swizzle(AluBankSwizzle swz); + int bank_swizzle() const {return m_bank_swizzle;} + ECFAluOpCode cf_type() const {return m_cf_type;} + void set_cf_type(ECFAluOpCode cf_type){ m_cf_type = cf_type; } + +private: + + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + + + EAluOp m_opcode; + PValue m_dest; + std::vector m_src; + AluOpFlags m_flags; + AluDstModifiers m_omod; + AluPredSel m_pred_sel; + AluBankSwizzle m_bank_swizzle; + ECFAluOpCode m_cf_type; +}; + +} + +#endif diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp new file mode 100644 index 00000000000..6930747550f --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp @@ -0,0 +1,60 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#include +#include + +#include "sfn_instruction_base.h" +#include "sfn_valuepool.h" +#include "sfn_debug.h" + +namespace r600 { + +Instruction::Instruction(instr_type t): + m_type(t) +{ +} + +Instruction::~Instruction() +{ +} + +void Instruction::print(std::ostream& os) const +{ + os << "OP:"; + do_print(os); +} + +bool operator == (const Instruction& lhs, const Instruction& rhs) +{ + if (rhs.m_type != lhs.m_type) + return false; + + return lhs.is_equal_to(rhs); +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_base.h b/src/gallium/drivers/r600/sfn/sfn_instruction_base.h new file mode 100644 index 00000000000..fe481fbabfb --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_base.h @@ -0,0 +1,102 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef sfn_r600_instr_h +#define sfn_r600_instr_h + +#include "sfn_value_gpr.h" +#include "sfn_defines.h" + +#include "gallium/drivers/r600/r600_isa.h" +#include +#include +#include +#include + +namespace r600 { + + +using OutputRegisterMap = std::map; + +class Instruction { +public: + enum instr_type { + alu, + exprt, + tex, + vtx, + wait_ack, + cond_if, + cond_else, + cond_endif, + loop_begin, + loop_end, + loop_break, + loop_continue, + phi, + streamout, + ring, + emit_vtx, + mem_wr_scratch, + gds, + rat, + unknown + }; + + typedef std::shared_ptr Pointer; + + friend bool operator == (const Instruction& lhs, const Instruction& rhs); + + Instruction(instr_type t); + + virtual ~Instruction(); + + instr_type type() const { return m_type;} + + void print(std::ostream& os) const; + +private: + virtual bool is_equal_to(const Instruction& lhs) const = 0; + + instr_type m_type; + + virtual void do_print(std::ostream& os) const = 0; + +}; + +using PInstruction=Instruction::Pointer; + +inline std::ostream& operator << (std::ostream& os, const Instruction& instr) +{ + instr.print(os); + return os; +} + +bool operator == (const Instruction& lhs, const Instruction& rhs); + +} + +#endif diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp new file mode 100644 index 00000000000..7dde127cd20 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp @@ -0,0 +1,162 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_instruction_cf.h" + +namespace r600 { + +CFInstruction::CFInstruction(instr_type type):Instruction(type) +{ + +} + +IfElseInstruction::IfElseInstruction(instr_type type): + CFInstruction (type) +{ + +} + +IfInstruction::IfInstruction(AluInstruction *pred): + IfElseInstruction(cond_if), + m_pred(pred) +{ + PValue *v = m_pred->psrc(0); +} + +bool IfInstruction::is_equal_to(const Instruction& lhs) const +{ + assert(lhs.type() == cond_if); + const IfInstruction& l = dynamic_cast(lhs); + return *l.m_pred == *m_pred; +} + +void IfInstruction::do_print(std::ostream& os) const +{ + os << "PRED = " << *m_pred << "\n"; + os << "IF (PRED)"; +} + +ElseInstruction::ElseInstruction(IfInstruction *jump_src): + IfElseInstruction(cond_else), + m_jump_src(jump_src) +{ +} + + +bool ElseInstruction::is_equal_to(const Instruction& lhs) const +{ + if (lhs.type() != cond_else) + return false; + auto& l = static_cast(lhs); + return (*m_jump_src == *l.m_jump_src); +} + +void ElseInstruction::do_print(std::ostream& os) const +{ + os << "ELSE"; +} + +IfElseEndInstruction::IfElseEndInstruction(): + IfElseInstruction(cond_endif) +{ +} + +bool IfElseEndInstruction::is_equal_to(const Instruction& lhs) const +{ + if (lhs.type() != cond_endif) + return false; + return true; +} + +void IfElseEndInstruction::do_print(std::ostream& os) const +{ + os << "ENDIF"; +} + +LoopBeginInstruction::LoopBeginInstruction(): + CFInstruction(loop_begin) +{ +} + +bool LoopBeginInstruction::is_equal_to(const Instruction& lhs) const +{ + assert(lhs.type() == loop_begin); + return true; +} + +void LoopBeginInstruction::do_print(std::ostream& os) const +{ + os << "BGNLOOP"; +} + +LoopEndInstruction::LoopEndInstruction(LoopBeginInstruction *start): + CFInstruction (loop_end), + m_start(start) +{ +} + +bool LoopEndInstruction::is_equal_to(const Instruction& lhs) const +{ + assert(lhs.type() == loop_end); + const auto& other = static_cast(lhs); + return *m_start == *other.m_start; +} + +void LoopEndInstruction::do_print(std::ostream& os) const +{ + os << "ENDLOOP"; +} + +LoopBreakInstruction::LoopBreakInstruction(): + CFInstruction (loop_break) +{ +} + +bool LoopBreakInstruction::is_equal_to(UNUSED const Instruction& lhs) const +{ + return true; +} + +void LoopBreakInstruction::do_print(std::ostream& os) const +{ + os << "BREAK"; +} + +LoopContInstruction::LoopContInstruction(): + CFInstruction (loop_continue) +{ +} + +bool LoopContInstruction::is_equal_to(UNUSED const Instruction& lhs) const +{ + return true; +} +void LoopContInstruction::do_print(std::ostream& os) const +{ + os << "CONTINUE"; +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h b/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h new file mode 100644 index 00000000000..abf84f0eb18 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h @@ -0,0 +1,107 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_IFELSEINSTRUCTION_H +#define SFN_IFELSEINSTRUCTION_H + +#include "sfn_instruction_alu.h" + +namespace r600 { + +class CFInstruction : public Instruction { +protected: + CFInstruction(instr_type type); +}; + +class IfElseInstruction : public CFInstruction { +public: + IfElseInstruction(instr_type type); +}; + +class IfInstruction : public IfElseInstruction { +public: + IfInstruction(AluInstruction *pred); + const AluInstruction& pred() const {return *m_pred;} +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + std::shared_ptr m_pred; +}; + +class ElseInstruction : public IfElseInstruction { +public: + ElseInstruction(IfInstruction *jump_src); +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + + IfElseInstruction *m_jump_src; +}; + +class IfElseEndInstruction : public IfElseInstruction { +public: + IfElseEndInstruction(); +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; +}; + +class LoopBeginInstruction: public CFInstruction { +public: + LoopBeginInstruction(); +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; +}; + +class LoopEndInstruction: public CFInstruction { +public: + LoopEndInstruction(LoopBeginInstruction *start); +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + LoopBeginInstruction *m_start; +}; + +class LoopBreakInstruction: public CFInstruction { +public: + LoopBreakInstruction(); +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; +}; + +class LoopContInstruction: public CFInstruction { +public: + LoopContInstruction(); +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; +}; + +} + +#endif // SFN_IFELSEINSTRUCTION_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp new file mode 100644 index 00000000000..7efa0832da1 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp @@ -0,0 +1,132 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#include "sfn_instruction_export.h" +#include "sfn_valuepool.h" + +namespace r600 { + +WriteoutInstruction::WriteoutInstruction(instr_type t, const GPRVector& value): + Instruction(t), + m_value(value) +{ +} + +ExportInstruction::ExportInstruction(unsigned loc, const GPRVector &value, ExportType type): + WriteoutInstruction(Instruction::exprt, value), + m_type(type), + m_loc(loc), + m_is_last(false) +{ +} + + +bool ExportInstruction::is_equal_to(const Instruction& lhs) const +{ + assert(lhs.type() == exprt); + const auto& oth = static_cast(lhs); + + return (gpr() == oth.gpr()) && + (m_type == oth.m_type) && + (m_loc == oth.m_loc) && + (m_is_last == oth.m_is_last); +} + +void ExportInstruction::do_print(std::ostream& os) const +{ + os << (m_is_last ? "EXPORT_DONE ":"EXPORT "); + switch (m_type) { + case et_pixel: os << "PIXEL "; break; + case et_pos: os << "POS "; break; + case et_param: os << "PARAM "; break; + } + os << m_loc << " " << gpr(); +} + +void ExportInstruction::update_output_map(OutputRegisterMap& map) const +{ + map[m_loc] = gpr_ptr(); +} + +void ExportInstruction::set_last() +{ + m_is_last = true; +} + +StreamOutIntruction::StreamOutIntruction(const GPRVector& value, int num_components, + int array_base, int comp_mask, int out_buffer, + int stream): + WriteoutInstruction(Instruction::streamout, value), + m_element_size(num_components == 3 ? 3 : num_components - 1), + m_burst_count(1), + m_array_base(array_base), + m_array_size(0xfff), + m_writemask(comp_mask), + m_output_buffer(out_buffer), + m_stream(stream) +{ +} + +unsigned StreamOutIntruction::op() const +{ + int op = 0; + switch (m_output_buffer) { + case 0: op = CF_OP_MEM_STREAM0_BUF0; break; + case 1: op = CF_OP_MEM_STREAM0_BUF1; break; + case 2: op = CF_OP_MEM_STREAM0_BUF2; break; + case 3: op = CF_OP_MEM_STREAM0_BUF3; break; + } + return 4 * m_stream + op; +} + +bool StreamOutIntruction::is_equal_to(const Instruction& lhs) const +{ + assert(lhs.type() == streamout); + const auto& oth = static_cast(lhs); + + return gpr() == oth.gpr() && + m_element_size == oth.m_element_size && + m_burst_count == oth.m_burst_count && + m_array_base == oth.m_array_base && + m_array_size == oth.m_array_size && + m_writemask == oth.m_writemask && + m_output_buffer == oth.m_output_buffer && + m_stream == oth.m_stream; +} + +void StreamOutIntruction::do_print(std::ostream& os) const +{ + os << "WRITE STREAM(" << m_stream << ") " << gpr() + << " ES:" << m_element_size + << " BC:" << m_burst_count + << " BUF:" << m_output_buffer + << " ARRAY:" << m_array_base; + if (m_array_size != 0xfff) + os << "+" << m_array_size; +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_export.h b/src/gallium/drivers/r600/sfn/sfn_instruction_export.h new file mode 100644 index 00000000000..0ea493865af --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_export.h @@ -0,0 +1,106 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_EXPORTINSTRUCTION_H +#define SFN_EXPORTINSTRUCTION_H + +#include "sfn_instruction_base.h" + +namespace r600 { + +class WriteoutInstruction: public Instruction { +public: + const GPRVector& gpr() const {return m_value;} + const GPRVector *gpr_ptr() const {return &m_value;} +protected: + WriteoutInstruction(instr_type t, const GPRVector& value); + + GPRVector m_value; +}; + +class ExportInstruction : public WriteoutInstruction { +public: + enum ExportType { + et_pixel, + et_pos, + et_param + }; + + ExportInstruction(unsigned loc, const GPRVector& value, ExportType type); + void set_last(); + + ExportType export_type() const {return m_type;} + + unsigned location() const {return m_loc;} + bool is_last_export() const {return m_is_last;} + + void update_output_map(OutputRegisterMap& map) const; + +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + + ExportType m_type; + unsigned m_loc; + bool m_is_last; +}; + +class StreamOutIntruction: public WriteoutInstruction { +public: + StreamOutIntruction(const GPRVector& value, int num_components, + int array_base, int comp_mask, int out_buffer, + int stream); + int element_size() const { return m_element_size;} + int burst_count() const { return m_burst_count;} + int array_base() const { return m_array_base;} + int array_size() const { return m_array_size;} + int comp_mask() const { return m_writemask;} + unsigned op() const; + +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + + int m_element_size; + int m_burst_count; + int m_array_base; + int m_array_size; + int m_writemask; + int m_output_buffer; + int m_stream; +}; + +enum EMemWriteType { + mem_write = 0, + mem_write_ind = 1, + mem_write_ack = 2, + mem_write_ind_ack = 3, +}; + +} + + +#endif // SFN_EXPORTINSTRUCTION_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp new file mode 100644 index 00000000000..9bd23be809c --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp @@ -0,0 +1,391 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_instruction_fetch.h" + +#include "gallium/drivers/r600/r600_pipe.h" + +namespace r600 { + +/* refactor this to add status create methods for specific tasks */ +FetchInstruction::FetchInstruction(EVFetchInstr op, + EVFetchType type, + GPRVector dst, + PValue src, int offset, + int buffer_id, PValue buffer_offset, + EBufferIndexMode cp_rel, + bool use_const_field): + Instruction(vtx), + m_vc_opcode(op), + m_fetch_type(type), + m_endian_swap(vtx_es_none), + m_src(src), + m_dst(dst), + m_offset(offset), + m_is_mega_fetch(1), + m_mega_fetch_count(16), + m_buffer_id(buffer_id), + m_semantic_id(0), + m_buffer_index_mode(cp_rel), + m_flags(0), + m_uncached(false), + m_indexed(false), + m_array_base(0), + m_array_size(0), + m_elm_size(0), + m_buffer_offset(buffer_offset), + m_dest_swizzle({0,1,2,3}) +{ + if (use_const_field) { + m_flags.set(vtx_use_const_field); + m_data_format = fmt_invalid; + m_num_format = vtx_nf_norm; + } else { + m_flags.set(vtx_format_comp_signed); + m_data_format = fmt_32_32_32_32_float; + m_num_format = vtx_nf_scaled; + } + +} + +/* Resource query */ +FetchInstruction::FetchInstruction(EVFetchInstr vc_opcode, + EVFetchType fetch_type, + EVTXDataFormat data_format, + EVFetchNumFormat num_format, + EVFetchEndianSwap endian_swap, + const PValue src, + const GPRVector dst, + uint32_t offset, + bool is_mega_fetch, + uint32_t mega_fetch_count, + uint32_t buffer_id, + uint32_t semantic_id, + + EBufferIndexMode buffer_index_mode, + bool uncached, + bool indexed, + int array_base, + int array_size, + int elm_size, + PValue buffer_offset, + const std::array& dest_swizzle): + Instruction(vtx), + m_vc_opcode(vc_opcode), + m_fetch_type(fetch_type), + m_data_format(data_format), + m_num_format(num_format), + m_endian_swap(endian_swap), + m_src(src), + m_dst(dst), + m_offset(offset), + m_is_mega_fetch(is_mega_fetch), + m_mega_fetch_count(mega_fetch_count), + m_buffer_id(buffer_id), + m_semantic_id(semantic_id), + m_buffer_index_mode(buffer_index_mode), + m_uncached(uncached), + m_indexed(indexed), + m_array_base(array_base), + m_array_size(array_size), + m_elm_size(elm_size), + m_buffer_offset(buffer_offset), + m_dest_swizzle(dest_swizzle) +{ +} + +FetchInstruction::FetchInstruction(GPRVector dst, + PValue src, + int buffer_id, PValue buffer_offset, + EVTXDataFormat format, + EVFetchNumFormat num_format): + Instruction(vtx), + m_vc_opcode(vc_fetch), + m_fetch_type(no_index_offset), + m_data_format(format), + m_num_format(num_format), + m_endian_swap(vtx_es_none), + m_src(src), + m_dst(dst), + m_offset(0), + m_is_mega_fetch(0), + m_mega_fetch_count(0), + m_buffer_id(buffer_id), + m_semantic_id(0), + m_buffer_index_mode(bim_none), + m_flags(0), + m_uncached(false), + m_indexed(false), + m_array_base(0), + m_array_size(0), + m_elm_size(1), + m_buffer_offset(buffer_offset), + m_dest_swizzle({0,1,2,3}) +{ + m_flags.set(vtx_format_comp_signed); +} + + +/* Resource query */ +FetchInstruction::FetchInstruction(GPRVector dst, + PValue src, + int buffer_id, + EBufferIndexMode cp_rel): + Instruction(vtx), + m_vc_opcode(vc_get_buf_resinfo), + m_fetch_type(no_index_offset), + m_data_format(fmt_32_32_32_32), + m_num_format(vtx_nf_norm), + m_endian_swap(vtx_es_none), + m_src(src), + m_dst(dst), + m_offset(0), + m_is_mega_fetch(0), + m_mega_fetch_count(16), + m_buffer_id(buffer_id), + m_semantic_id(0), + m_buffer_index_mode(cp_rel), + m_flags(0), + m_uncached(false), + m_indexed(false), + m_array_base(0), + m_array_size(0), + m_elm_size(0), + m_dest_swizzle({0,1,2,3}) +{ + m_flags.set(vtx_format_comp_signed); +} + +FetchInstruction::FetchInstruction(GPRVector dst, PValue src, int scratch_size): + Instruction(vtx), + m_vc_opcode(vc_read_scratch), + m_fetch_type(vertex_data), + m_data_format(fmt_32_32_32_32), + m_num_format(vtx_nf_int), + m_endian_swap(vtx_es_none), + m_dst(dst), + m_offset(0), + m_is_mega_fetch(0), + m_mega_fetch_count(16), + m_buffer_id(0), + m_semantic_id(0), + m_buffer_index_mode(bim_none), + m_flags(0), + m_uncached(true), + m_array_base(0), + m_array_size(0), + m_elm_size(3), + m_dest_swizzle({0,1,2,3}) +{ + if (src->type() == Value::literal) { + const auto& lv = dynamic_cast(*src); + m_array_base = lv.value(); + m_indexed = false; + m_src.reset(new GPRValue(0,0)); + m_array_size = 0; + } else { + m_array_base = 0; + m_src = src; + m_indexed = true; + m_array_size = scratch_size - 1; + } +} + + +bool FetchInstruction::is_equal_to(const Instruction& lhs) const +{ + auto& l = static_cast(lhs); + if (m_src) { + if (!l.m_src) + return false; + if (*m_src != *l.m_src) + return false; + } else { + if (l.m_src) + return false; + } + + return m_vc_opcode == l.m_vc_opcode && + m_fetch_type == l.m_fetch_type && + m_data_format == l.m_data_format && + m_num_format == l.m_num_format && + m_endian_swap == l.m_endian_swap && + m_dst == l.m_dst && + m_offset == l.m_offset && + m_buffer_id == l.m_buffer_id && + m_semantic_id == l.m_semantic_id && + m_buffer_index_mode == l.m_buffer_index_mode && + m_flags == l.m_flags && + m_indexed == l.m_indexed && + m_uncached == l.m_uncached; +} + +void FetchInstruction::set_format(EVTXDataFormat fmt) +{ + m_data_format = fmt; +} + + +void FetchInstruction::set_dest_swizzle(const std::array& swz) +{ + m_dest_swizzle = swz; +} + +void FetchInstruction::prelude_append(Instruction *instr) +{ + assert(instr); + m_prelude.push_back(PInstruction(instr)); +} + +const std::vector& FetchInstruction::prelude() const +{ + return m_prelude; +} + +static const char *fmt_descr[64] = { + "INVALID", + "8", + "4_4", + "3_3_2", + "RESERVED_4", + "16", + "16F", + "8_8", + "5_6_5", + "6_5_5", + "1_5_5_5", + "4_4_4_4", + "5_5_5_1", + "32", + "32F", + "16_16", + "16_16F", + "8_24", + "8_24F", + "24_8", + "24_8F", + "10_11_11", + "10_11_11F", + "11_11_10", + "11_11_10F", + "2_10_10_10", + "8_8_8_8", + "10_10_10_2", + "X24_8_32F", + "32_32", + "32_32F", + "16_16_16_16", + "16_16_16_16F", + "RESERVED_33", + "32_32_32_32", + "32_32_32_32F", + "RESERVED_36", + "1", + "1_REVERSED", + "GB_GR", + "BG_RG", + "32_AS_8", + "32_AS_8_8", + "5_9_9_9_SHAREDEXP", + "8_8_8", + "16_16_16", + "16_16_16F", + "32_32_32", + "32_32_32F", + "BC1", + "BC2", + "BC3", + "BC4", + "BC5", + "APC0", + "APC1", + "APC2", + "APC3", + "APC4", + "APC5", + "APC6", + "APC7", + "CTX1", + "RESERVED_63" +}; + + +void FetchInstruction::do_print(std::ostream& os) const +{ + static const std::string num_format_char[] = {"norm", "int", "scaled"}; + static const std::string endian_swap_code[] = { + "noswap", "8in16", "8in32" + }; + static const char buffer_index_mode_char[] = "_01E"; + static const char *flag_string[] = {"WQM", "CF", "signed", "no_zero", + "nostride", "AC"}; + switch (m_vc_opcode) { + case vc_fetch: + os << "Fetch " << m_dst; + break; + case vc_semantic: + os << "Fetch Semantic ID:" << m_semantic_id; + break; + case vc_get_buf_resinfo: + os << "Fetch BufResinfo:" << m_dst; + break; + case vc_read_scratch: + os << "MEM_READ_SCRATCH:" << m_dst; + break; + default: + os << "Fetch ERROR"; + return; + } + + os << ", " << *m_src; + + if (m_offset) + os << "+" << m_offset; + + os << " BUFID:" << m_buffer_id + << " FMT:(" << fmt_descr[m_data_format] + << " " << num_format_char[m_num_format] + << " " << endian_swap_code[m_endian_swap] + << ")"; + if (m_buffer_index_mode > 0) + os << " IndexMode:" << buffer_index_mode_char[m_buffer_index_mode]; + + + if (m_is_mega_fetch) + os << " MFC:" << m_mega_fetch_count; + else + os << " mfc*:" << m_mega_fetch_count; + + if (m_flags.any()) { + os << " Flags:"; + for( int i = 0; i < vtx_unknwon; ++i) { + if (m_flags.test(i)) + os << ' ' << flag_string[i]; + } + } +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h b/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h new file mode 100644 index 00000000000..0ed41316235 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h @@ -0,0 +1,167 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_INSTRUCTION_FETCH_H +#define SFN_INSTRUCTION_FETCH_H + +#include "sfn_instruction_base.h" + +namespace r600 { + +class FetchInstruction : public Instruction { +public: + + FetchInstruction(EVFetchInstr vc_opcode, + EVFetchType fetch_type, + EVTXDataFormat data_format, + EVFetchNumFormat num_format, + EVFetchEndianSwap endian_swap, + const PValue src, + const GPRVector dst, + uint32_t offset, + bool is_mega_fetch, + uint32_t mega_fetch_count, + uint32_t buffer_id, + uint32_t semantic_id, + + EBufferIndexMode buffer_index_mode, + bool uncached, + bool indexed, + int array_base, + int array_size, + int elm_size, + PValue buffer_offset, + const std::array& dest_swizzle); + + FetchInstruction(EVFetchInstr op, + EVFetchType type, + GPRVector dst, + PValue src, int offset, + int buffer_id, PValue buffer_offset, + EBufferIndexMode cp_rel, + bool use_const_field = false); + + FetchInstruction(GPRVector dst, + PValue src, + int buffer_id, + PValue buffer_offset, + EVTXDataFormat format, + EVFetchNumFormat num_format); + + FetchInstruction(GPRVector dst, + PValue src, + int buffer_id, + EBufferIndexMode cp_rel); + + FetchInstruction(GPRVector dst, PValue src, int scratch_size); + + EVFetchInstr vc_opcode() const { return m_vc_opcode;} + EVFetchType fetch_type() const { return m_fetch_type;} + + EVTXDataFormat data_format() const { return m_data_format;} + EVFetchNumFormat num_format() const { return m_num_format;} + EVFetchEndianSwap endian_swap() const { return m_endian_swap;} + + const Value& src() const { return *m_src;} + const GPRVector& dst() const { return m_dst;} + uint32_t offset() const { return m_offset;} + + bool is_mega_fetchconst() { return m_is_mega_fetch;} + uint32_t mega_fetch_count() const { return m_mega_fetch_count;} + + uint32_t buffer_id() const { return m_buffer_id;} + uint32_t semantic_id() const { return m_semantic_id;} + EBufferIndexMode buffer_index_mode() const{ return m_buffer_index_mode;} + + bool is_signed() const { return m_flags.test(vtx_format_comp_signed);} + bool use_const_fields() const { return m_flags.test(vtx_use_const_field);} + + bool srf_mode_no_zero() const { return m_flags.test(vtx_srf_mode);} + + void set_flag(EVFetchFlagShift flag) {m_flags.set(flag);} + + bool uncached() const {return m_uncached; } + bool indexed() const {return m_indexed; } + int array_base()const {return m_array_base; } + int array_size() const {return m_array_size; } + int elm_size() const {return m_elm_size; } + + void set_buffer_offset(PValue buffer_offset) { + m_buffer_offset = buffer_offset; + } + PValue buffer_offset() const { return m_buffer_offset; } + + void set_dest_swizzle(const std::array& swz); + void set_format(EVTXDataFormat fmt); + + int swz(int idx) const { return m_dest_swizzle[idx];} + + bool use_tc() const {return m_flags.test(vtx_use_tc);} + + bool use_vpm() const {return m_flags.test(vtx_vpm);} + + void prelude_append(Instruction *instr); + + const std::vector& prelude() const; + + bool has_prelude() const {return !m_prelude.empty();} + +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + + EVFetchInstr m_vc_opcode; + EVFetchType m_fetch_type; + + EVTXDataFormat m_data_format; + EVFetchNumFormat m_num_format; + EVFetchEndianSwap m_endian_swap; + + PValue m_src; + GPRVector m_dst; + uint32_t m_offset; + + bool m_is_mega_fetch; + uint32_t m_mega_fetch_count; + + uint32_t m_buffer_id; + uint32_t m_semantic_id; + + EBufferIndexMode m_buffer_index_mode; + std::bitset<16> m_flags; + bool m_uncached; + bool m_indexed; + int m_array_base; + int m_array_size; + int m_elm_size; + PValue m_buffer_offset; + std::array m_dest_swizzle; + std::vector m_prelude; +}; + +} + +#endif // SFN_INSTRUCTION_FETCH_H diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp new file mode 100644 index 00000000000..c0f37009f18 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp @@ -0,0 +1,310 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_instruction_tex.h" +#include "nir_builder.h" +#include "nir_builtin_builder.h" + +namespace r600 { + +TexInstruction::TexInstruction(Opcode op, const GPRVector &dest, const GPRVector &src, + unsigned sid, unsigned rid, PValue sampler_offset): + Instruction(tex), + m_opcode(op), + m_dst(dest), + m_src(src), + m_sampler_id(sid), + m_resource_id(rid), + m_flags(0), + m_inst_mode(0), + m_dest_swizzle{0,1,2,3}, + m_sampler_offset(sampler_offset) + +{ + memset(m_offset, 0, sizeof (m_offset)); +} + +void TexInstruction::set_gather_comp(int cmp) +{ + m_inst_mode = cmp; +} + +void TexInstruction::set_offset(unsigned index, int32_t val) +{ + assert(index < 3); + m_offset[index] = val; +} + +int TexInstruction::get_offset(unsigned index) const +{ + assert(index < 3); + return (m_offset[index] << 1 & 0x1f); +} + +bool TexInstruction::is_equal_to(const Instruction& rhs) const +{ + assert(rhs.type() == tex); + const auto& r = static_cast(rhs); + return (m_opcode == r.m_opcode && + m_dst == r.m_dst && + m_src == r.m_src && + m_sampler_id == r.m_sampler_id && + m_resource_id == r.m_resource_id); +} + +void TexInstruction::do_print(std::ostream& os) const +{ + const char *map_swz = "xyzw01?_"; + os << opname(m_opcode) << " R" << m_dst.sel() << "."; + for (int i = 0; i < 4; ++i) + os << map_swz[m_dest_swizzle[i]]; + + os << " " << m_src + << " RESID:" << m_resource_id << " SAMPLER:" + << m_sampler_id; +} + +const char *TexInstruction::opname(Opcode op) +{ + switch (op) { + case ld: return "LD"; + case get_resinfo: return "GET_TEXTURE_RESINFO"; + case get_nsampled: return "GET_NUMBER_OF_SAMPLES"; + case get_tex_lod: return "GET_LOD"; + case get_gradient_h: return "GET_GRADIENTS_H"; + case get_gradient_v: return "GET_GRADIENTS_V"; + case set_offsets: return "SET_TEXTURE_OFFSETS"; + case keep_gradients: return "KEEP_GRADIENTS"; + case set_gradient_h: return "SET_GRADIENTS_H"; + case set_gradient_v: return "SET_GRADIENTS_V"; + case sample: return "SAMPLE"; + case sample_l: return "SAMPLE_L"; + case sample_lb: return "SAMPLE_LB"; + case sample_lz: return "SAMPLE_LZ"; + case sample_g: return "SAMPLE_G"; + case sample_g_lb: return "SAMPLE_G_L"; + case gather4: return "GATHER4"; + case gather4_o: return "GATHER4_O"; + case sample_c: return "SAMPLE_C"; + case sample_c_l: return "SAMPLE_C_L"; + case sample_c_lb: return "SAMPLE_C_LB"; + case sample_c_lz: return "SAMPLE_C_LZ"; + case sample_c_g: return "SAMPLE_C_G"; + case sample_c_g_lb: return "SAMPLE_C_G_L"; + case gather4_c: return "GATHER4_C"; + case gather4_c_o: return "OP_GATHER4_C_O"; + } + return "ERROR"; +} + + + +static bool lower_coord_shift_normalized(nir_builder& b, nir_tex_instr *tex) +{ + b.cursor = nir_before_instr(&tex->instr); + + nir_ssa_def * size = nir_i2f32(&b, nir_get_texture_size(&b, tex)); + nir_ssa_def *scale = nir_frcp(&b, size); + + int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); + nir_ssa_def *corr = nir_fadd(&b, + nir_fmul(&b, nir_imm_float(&b, -0.5f), scale), + tex->src[coord_index].src.ssa); + nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src, + nir_src_for_ssa(corr)); + return true; +} + +static bool lower_coord_shift_unnormalized(nir_builder& b, nir_tex_instr *tex) +{ + b.cursor = nir_before_instr(&tex->instr); + int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); + nir_ssa_def *corr = nir_fadd(&b, tex->src[coord_index].src.ssa, + nir_imm_float(&b, -0.5f)); + nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src, + nir_src_for_ssa(corr)); + return true; +} + +static bool +r600_nir_lower_int_tg4_impl(nir_function_impl *impl, const std::vector& lower) +{ + nir_builder b; + nir_builder_init(&b, impl); + + bool progress = false; + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_tex) { + nir_tex_instr *tex = nir_instr_as_tex(instr); + if (tex->op == nir_texop_tg4 && + tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) { + if (lower[tex->sampler_index]) { + if (tex->sampler_dim != GLSL_SAMPLER_DIM_RECT) + lower_coord_shift_normalized(b, tex); + else + lower_coord_shift_unnormalized(b, tex); + progress = true; + } + } + } + } + } + return progress; +} + +/* + * This lowering pass works around a bug in r600 when doing TG4 from + * integral valued samplers. + + * Gather4 should follow the same rules as bilinear filtering, but the hardware + * incorrectly forces nearest filtering if the texture format is integer. + * The only effect it has on Gather4, which always returns 4 texels for + * bilinear filtering, is that the final coordinates are off by 0.5 of + * the texel size. +*/ + +bool r600_nir_lower_int_tg4(nir_shader *shader) +{ + bool progress = false; + bool need_lowering = false; + + int i = 0; + + std::vector lower_sampler(shader->uniforms.length(), false); + auto is = lower_sampler.begin(); + + nir_foreach_variable(var, &shader->uniforms) { + if (var->type->is_sampler()) { + if (glsl_base_type_is_integer(var->type->sampled_type)) { + need_lowering = *is = true; + } + ++i; + ++is; + } + } + + if (need_lowering) { + nir_foreach_function(function, shader) { + if (function->impl && r600_nir_lower_int_tg4_impl(function->impl, lower_sampler)) + progress = true; + } + } + + return progress; +} + +static +bool lower_txl_txf_array_or_cube(nir_builder *b, nir_tex_instr *tex) +{ + assert(tex->op == nir_texop_txb || tex->op == nir_texop_txl); + assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0); + assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0); + + b->cursor = nir_before_instr(&tex->instr); + + int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod); + int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias); + int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod); + assert (lod_idx >= 0 || bias_idx >= 0); + + nir_ssa_def *size = nir_get_texture_size(b, tex); + nir_ssa_def *lod = (lod_idx >= 0) ? + nir_ssa_for_src(b, tex->src[lod_idx].src, 1) : + nir_get_texture_lod(b, tex); + + if (bias_idx >= 0) + lod = nir_fadd(b, lod,nir_ssa_for_src(b, tex->src[bias_idx].src, 1)); + + if (min_lod_idx >= 0) + lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1)); + + /* max lod? */ + + nir_ssa_def *lambda_exp = nir_fexp2(b, lod); + nir_ssa_def *scale = NULL; + + if (tex->is_array) { + int cmp_mask = (1 << (size->num_components - 1)) - 1; + scale = nir_frcp(b, nir_channels(b, size, + (nir_component_mask_t)cmp_mask)); + } else if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { + unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0}; + scale = nir_frcp(b, nir_channels(b, size, 1)); + scale = nir_swizzle(b, scale, swizzle, 3); + } + + nir_ssa_def *grad = nir_fmul(b, lambda_exp, scale); + + if (lod_idx >= 0) + nir_tex_instr_remove_src(tex, lod_idx); + if (bias_idx >= 0) + nir_tex_instr_remove_src(tex, bias_idx); + if (min_lod_idx >= 0) + nir_tex_instr_remove_src(tex, min_lod_idx); + nir_tex_instr_add_src(tex, nir_tex_src_ddx, nir_src_for_ssa(grad)); + nir_tex_instr_add_src(tex, nir_tex_src_ddy, nir_src_for_ssa(grad)); + + tex->op = nir_texop_txd; + return true; +} + + +static bool +r600_nir_lower_txl_txf_array_or_cube_impl(nir_function_impl *impl) +{ + nir_builder b; + nir_builder_init(&b, impl); + + bool progress = false; + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_tex) { + nir_tex_instr *tex = nir_instr_as_tex(instr); + + if (tex->is_shadow && + (tex->op == nir_texop_txl || tex->op == nir_texop_txb) && + (tex->is_array || tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)) + progress |= lower_txl_txf_array_or_cube(&b, tex); + } + } + } + return progress; +} + +bool +r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader) +{ + bool progress = false; + nir_foreach_function(function, shader) { + if (function->impl && r600_nir_lower_txl_txf_array_or_cube_impl(function->impl)) + progress = true; + } + return progress; +} + + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h b/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h new file mode 100644 index 00000000000..ff5ef1ee1f2 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h @@ -0,0 +1,137 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef INSTRUCTION_TEX_H +#define INSTRUCTION_TEX_H + +#include "sfn_instruction_base.h" + +namespace r600 { + +class TexInstruction : public Instruction { +public: + enum Opcode { + ld = FETCH_OP_LD, + get_resinfo = FETCH_OP_GET_TEXTURE_RESINFO, + get_nsampled = FETCH_OP_GET_NUMBER_OF_SAMPLES, + get_tex_lod = FETCH_OP_GET_LOD, + get_gradient_h = FETCH_OP_GET_GRADIENTS_H, + get_gradient_v = FETCH_OP_GET_GRADIENTS_V, + set_offsets = FETCH_OP_SET_TEXTURE_OFFSETS, + keep_gradients = FETCH_OP_KEEP_GRADIENTS, + set_gradient_h = FETCH_OP_SET_GRADIENTS_H, + set_gradient_v = FETCH_OP_SET_GRADIENTS_V, + sample = FETCH_OP_SAMPLE, + sample_l = FETCH_OP_SAMPLE_L, + sample_lb = FETCH_OP_SAMPLE_LB, + sample_lz = FETCH_OP_SAMPLE_LZ, + sample_g = FETCH_OP_SAMPLE_G, + sample_g_lb = FETCH_OP_SAMPLE_G_L, + gather4 = FETCH_OP_GATHER4, + gather4_o = FETCH_OP_GATHER4_O, + + sample_c = FETCH_OP_SAMPLE_C, + sample_c_l = FETCH_OP_SAMPLE_C_L, + sample_c_lb = FETCH_OP_SAMPLE_C_LB, + sample_c_lz = FETCH_OP_SAMPLE_C_LZ, + sample_c_g = FETCH_OP_SAMPLE_C_G, + sample_c_g_lb = FETCH_OP_SAMPLE_C_G_L, + gather4_c = FETCH_OP_GATHER4_C, + gather4_c_o = FETCH_OP_GATHER4_C_O, + + }; + + enum Flags { + x_unnormalized, + y_unnormalized, + z_unnormalized, + w_unnormalized, + grad_fine + }; + + TexInstruction(Opcode op, const GPRVector& dest, const GPRVector& src, unsigned sid, + unsigned rid, PValue sampler_offset); + + const GPRVector& src() const {return m_src;} + const GPRVector& dst() const {return m_dst;} + unsigned opcode() const {return m_opcode;} + unsigned sampler_id() const {return m_sampler_id;} + unsigned resource_id() const {return m_resource_id;} + + void set_offset(unsigned index, int32_t val); + int get_offset(unsigned index) const; + + void set_inst_mode(int inst_mode) { m_inst_mode = inst_mode;} + + int inst_mode() const { return m_inst_mode;} + + void set_flag(Flags flag) { + m_flags.set(flag); + } + + PValue sampler_offset() const { + return m_sampler_offset; + } + + bool has_flag(Flags flag) const { + return m_flags.test(flag); + } + + int dest_swizzle(int i) const { + assert(i < 4); + return m_dest_swizzle[i]; + } + + void set_dest_swizzle(const std::array& swz) { + m_dest_swizzle = swz; + } + + void set_gather_comp(int cmp); + +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + + static const char *opname(Opcode code); + + Opcode m_opcode; + GPRVector m_dst; + GPRVector m_src; + unsigned m_sampler_id; + unsigned m_resource_id; + std::bitset<8> m_flags; + int m_offset[3]; + int m_inst_mode; + std::array m_dest_swizzle; + PValue m_sampler_offset; +}; + +bool r600_nir_lower_int_tg4(nir_shader *nir); +bool r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader); + +} + +#endif // INSTRUCTION_TEX_H diff --git a/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp b/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp new file mode 100644 index 00000000000..11472927310 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp @@ -0,0 +1,1071 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_ir_to_assembly.h" +#include "sfn_conditionaljumptracker.h" +#include "sfn_callstack.h" +#include "sfn_instruction_fetch.h" + +#include "../r600_shader.h" +#include "../r600_sq.h" + +namespace r600 { + +using std::vector; + +struct AssemblyFromShaderLegacyImpl { + + AssemblyFromShaderLegacyImpl(r600_shader *sh, r600_shader_key *key); + bool emit(const Instruction::Pointer i); + void reset_addr_register() {m_last_addr.reset();} + +private: + bool emit_alu(const AluInstruction& ai, ECFAluOpCode cf_op); + bool emit_export(const ExportInstruction & exi); + bool emit_streamout(const StreamOutIntruction& instr); + bool emit_tex(const TexInstruction & tex_instr); + bool emit_vtx(const FetchInstruction& fetch_instr); + bool emit_if_start(const IfInstruction & if_instr); + bool emit_else(const ElseInstruction & else_instr); + bool emit_endif(const IfElseEndInstruction & endif_instr); + + bool emit_loop_begin(const LoopBeginInstruction& instr); + bool emit_loop_end(const LoopEndInstruction& instr); + bool emit_loop_break(const LoopBreakInstruction& instr); + bool emit_loop_continue(const LoopContInstruction& instr); + + bool emit_load_addr(PValue addr); + bool emit_fs_pixel_export(const ExportInstruction & exi); + bool emit_vs_pos_export(const ExportInstruction & exi); + bool emit_vs_param_export(const ExportInstruction & exi); + bool copy_dst(r600_bytecode_alu_dst& dst, const Value& src); + bool copy_src(r600_bytecode_alu_src& src, const Value& s); + + ConditionalJumpTracker m_jump_tracker; + CallStack m_callstack; + +public: + r600_bytecode *m_bc; + r600_shader *m_shader; + r600_shader_key *m_key; + r600_bytecode_output m_output; + unsigned m_max_color_exports; + bool has_pos_output; + bool has_param_output; + PValue m_last_addr; + int m_loop_nesting; + int m_nliterals_in_group; +}; + + +AssemblyFromShaderLegacy::AssemblyFromShaderLegacy(struct r600_shader *sh, + r600_shader_key *key) +{ + impl = new AssemblyFromShaderLegacyImpl(sh, key); +} + +AssemblyFromShaderLegacy::~AssemblyFromShaderLegacy() +{ + delete impl; +} + +bool AssemblyFromShaderLegacy::do_lower(const std::vector& ir) +{ + if (impl->m_shader->processor_type == PIPE_SHADER_VERTEX && + impl->m_shader->ninput > 0) + r600_bytecode_add_cfinst(impl->m_bc, CF_OP_CALL_FS); + + + std::vector exports; + + for (const auto& i : ir) { + if (!impl->emit(i)) + return false; + if (i->type() != Instruction::alu) + impl->reset_addr_register(); + } + /* + for (const auto& i : exports) { + if (!impl->emit_export(static_cast(*i))) + return false; + }*/ + + + const struct cf_op_info *last = nullptr; + if (impl->m_bc->cf_last) + last = r600_isa_cf(impl->m_bc->cf_last->op); + + /* alu clause instructions don't have EOP bit, so add NOP */ + if (!last || last->flags & CF_ALU || impl->m_bc->cf_last->op == CF_OP_LOOP_END + || impl->m_bc->cf_last->op == CF_OP_POP) + r600_bytecode_add_cfinst(impl->m_bc, CF_OP_NOP); + + /* A fetch shader only can't be EOP (results in hang), but we can replace it + * by a NOP */ + else if (impl->m_bc->cf_last->op == CF_OP_CALL_FS) + impl->m_bc->cf_last->op = CF_OP_NOP; + + impl->m_bc->cf_last->end_of_program = 1; + + return true; +} + +bool AssemblyFromShaderLegacyImpl::emit(const Instruction::Pointer i) +{ + sfn_log << SfnLog::assembly << "Emit from '" << *i << "\n"; + switch (i->type()) { + case Instruction::alu: + return emit_alu(static_cast(*i), cf_alu_undefined); + case Instruction::exprt: + return emit_export(static_cast(*i)); + case Instruction::tex: + return emit_tex(static_cast(*i)); + case Instruction::vtx: + return emit_vtx(static_cast(*i)); + case Instruction::cond_if: + return emit_if_start(static_cast(*i)); + case Instruction::cond_else: + return emit_else(static_cast(*i)); + case Instruction::cond_endif: + return emit_endif(static_cast(*i)); + case Instruction::loop_begin: + return emit_loop_begin(static_cast(*i)); + case Instruction::loop_end: + return emit_loop_end(static_cast(*i)); + case Instruction::loop_break: + return emit_loop_break(static_cast(*i)); + case Instruction::loop_continue: + return emit_loop_continue(static_cast(*i)); + case Instruction::streamout: + return emit_streamout(static_cast(*i)); + default: + return false; + } +} + +AssemblyFromShaderLegacyImpl::AssemblyFromShaderLegacyImpl(r600_shader *sh, + r600_shader_key *key): + m_callstack(sh->bc), + m_bc(&sh->bc), + m_shader(sh), + m_key(key), + has_pos_output(false), + has_param_output(false), + m_loop_nesting(0), + m_nliterals_in_group(0) +{ + m_max_color_exports = MAX2(m_key->ps.nr_cbufs, 1); +} + +extern const std::map opcode_map; + +bool AssemblyFromShaderLegacyImpl::emit_load_addr(PValue addr) +{ + m_bc->ar_reg = addr->sel(); + m_bc->ar_chan = addr->chan(); + m_bc->ar_loaded = 0; + m_last_addr = addr; + + sfn_log << SfnLog::assembly << " Prepare " << *addr << " to address register\n"; + + return true; +} + +bool AssemblyFromShaderLegacyImpl::emit_alu(const AluInstruction& ai, ECFAluOpCode cf_op) +{ + + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(alu)); + PValue addr_in_use; + + if (opcode_map.find(ai.opcode()) == opcode_map.end()) { + std::cerr << "Opcode not handled for " << ai <<"\n"; + return false; + } + + for (unsigned i = 0; i < ai.n_sources(); ++i) { + auto& s = ai.src(i); + if (s.type() == Value::literal) + ++m_nliterals_in_group; + } + + /* This instruction group would exeed the limit of literals, so + * force a new instruction group by adding a NOP as last + * instruction. This will no loner be needed with a real + * scheduler */ + if (m_nliterals_in_group > 4) { + sfn_log << SfnLog::assembly << " Have " << m_nliterals_in_group << " inject a last op (nop)\n"; + alu.op = op0_nop; + alu.last = 1; + int retval = r600_bytecode_add_alu(m_bc, &alu); + if (retval) + return false; + memset(&alu, 0, sizeof(alu)); + m_nliterals_in_group = 0; + } + + alu.op = opcode_map.at(ai.opcode()); + + /* Missing test whether ai actually has a dest */ + auto dst = ai.dest(); + + if (dst) { + if (!copy_dst(alu.dst, *dst)) + return false; + + alu.dst.write = ai.flag(alu_write); + alu.dst.clamp = ai.flag(alu_dst_clamp); + + if (dst->type() == Value::gpr_array_value) { + auto& v = static_cast(*dst); + PValue addr = v.indirect(); + if (addr) { + if (!m_last_addr || *addr != *m_last_addr) { + emit_load_addr(addr); + addr_in_use = addr; + } + alu.dst.rel = addr ? 1 : 0;; + } + } + } + + alu.is_op3 = ai.n_sources() == 3; + + for (unsigned i = 0; i < ai.n_sources(); ++i) { + auto& s = ai.src(i); + + if (!copy_src(alu.src[i], s)) + return false; + alu.src[i].neg = ai.flag(AluInstruction::src_neg_flags[i]); + + if (s.type() == Value::gpr_array_value) { + auto& v = static_cast(s); + PValue addr = v.indirect(); + if (addr) { + assert(!addr_in_use || (*addr_in_use == *addr)); + if (!m_last_addr || *addr != *m_last_addr) { + emit_load_addr(addr); + addr_in_use = addr; + } + alu.src[i].rel = addr ? 1 : 0; + } + } + if (!alu.is_op3) + alu.src[i].abs = ai.flag(AluInstruction::src_abs_flags[i]); + } + + if (ai.bank_swizzle() != alu_vec_unknown) + alu.bank_swizzle_force = ai.bank_swizzle(); + + alu.last = ai.flag(alu_last_instr); + alu.update_pred = ai.flag(alu_update_pred); + alu.execute_mask = ai.flag(alu_update_exec); + + /* If the destination register is equal to the last loaded address register + * then clear the latter one, because the values will no longer be identical */ + if (m_last_addr) + sfn_log << SfnLog::assembly << " Current address register is " << *m_last_addr << "\n"; + + if (dst) + sfn_log << SfnLog::assembly << " Current dst register is " << *dst << "\n"; + + if (dst && m_last_addr) + if (*dst == *m_last_addr) { + sfn_log << SfnLog::assembly << " Clear address register (was " << *m_last_addr << "\n"; + m_last_addr.reset(); + } + + if (cf_op == cf_alu_undefined) + cf_op = ai.cf_type(); + + unsigned type = 0; + switch (cf_op) { + case cf_alu: type = CF_OP_ALU; break; + case cf_alu_push_before: type = CF_OP_ALU_PUSH_BEFORE; break; + case cf_alu_pop_after: type = CF_OP_ALU_POP_AFTER; break; + case cf_alu_pop2_after: type = CF_OP_ALU_POP2_AFTER; break; + case cf_alu_break: type = CF_OP_ALU_BREAK; break; + case cf_alu_else_after: type = CF_OP_ALU_ELSE_AFTER; break; + case cf_alu_continue: type = CF_OP_ALU_CONTINUE; break; + case cf_alu_extended: type = CF_OP_ALU_EXT; break; + default: + assert(0 && "cf_alu_undefined should have been replaced"); + } + + if (alu.last) + m_nliterals_in_group = 0; + + bool retval = !r600_bytecode_add_alu_type(m_bc, &alu, type); + + if (ai.opcode() == op1_mova_int) + m_bc->ar_loaded = 0; + + if (ai.opcode() == op1_set_cf_idx0) + m_bc->index_loaded[0] = 1; + + if (ai.opcode() == op1_set_cf_idx1) + m_bc->index_loaded[1] = 1; + + + m_bc->force_add_cf |= (ai.opcode() == op2_kille || + ai.opcode() == op2_killne_int || + ai.opcode() == op1_set_cf_idx0 || + ai.opcode() == op1_set_cf_idx1); + return retval; +} + +bool AssemblyFromShaderLegacyImpl::emit_vs_pos_export(const ExportInstruction & exi) +{ + r600_bytecode_output output; + memset(&output, 0, sizeof(output)); + assert(exi.gpr().type() == Value::gpr_vector); + const auto& gpr = exi.gpr(); + output.gpr = gpr.sel(); + output.elem_size = 3; + output.swizzle_x = gpr.chan_i(0); + output.swizzle_y = gpr.chan_i(1); + output.swizzle_z = gpr.chan_i(2); + output.swizzle_w = gpr.chan_i(3); + output.burst_count = 1; + output.array_base = 60 + exi.location(); + output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT; + output.type = exi.export_type(); + + + if (r600_bytecode_add_output(m_bc, &output)) { + R600_ERR("Error adding pixel export at location %d\n", exi.location()); + return false; + } + + return true; +} + + +bool AssemblyFromShaderLegacyImpl::emit_vs_param_export(const ExportInstruction & exi) +{ + r600_bytecode_output output; + assert(exi.gpr().type() == Value::gpr_vector); + const auto& gpr = exi.gpr(); + + memset(&output, 0, sizeof(output)); + output.gpr = gpr.sel(); + output.elem_size = 3; + output.swizzle_x = gpr.chan_i(0); + output.swizzle_y = gpr.chan_i(1); + output.swizzle_z = gpr.chan_i(2); + output.swizzle_w = gpr.chan_i(3); + output.burst_count = 1; + output.array_base = exi.location(); + output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT; + output.type = exi.export_type(); + + + if (r600_bytecode_add_output(m_bc, &output)) { + R600_ERR("Error adding pixel export at location %d\n", exi.location()); + return false; + } + + return true; +} + + +bool AssemblyFromShaderLegacyImpl::emit_fs_pixel_export(const ExportInstruction & exi) +{ + if (exi.location() >= m_max_color_exports && exi.location() < 60) { + R600_ERR("shader_from_nir: ignore pixel export %u, because supported max is %u\n", + exi.location(), m_max_color_exports); + return true; + } + + assert(exi.gpr().type() == Value::gpr_vector); + const auto& gpr = exi.gpr(); + + r600_bytecode_output output; + memset(&output, 0, sizeof(output)); + + output.gpr = gpr.sel(); + output.elem_size = 3; + output.swizzle_x = gpr.chan_i(0); + output.swizzle_y = gpr.chan_i(1); + output.swizzle_z = gpr.chan_i(2); + output.swizzle_w = m_key->ps.alpha_to_one ? 5 : gpr.chan_i(3); ; + output.burst_count = 1; + output.array_base = exi.location(); + output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT; + output.type = exi.export_type(); + + + if (r600_bytecode_add_output(m_bc, &output)) { + R600_ERR("Error adding pixel export at location %d\n", exi.location()); + return false; + } + + return true; +} + + +bool AssemblyFromShaderLegacyImpl::emit_export(const ExportInstruction & exi) +{ + switch (exi.export_type()) { + case ExportInstruction::et_pixel: + return emit_fs_pixel_export(exi); + case ExportInstruction::et_pos: + return emit_vs_pos_export(exi); + case ExportInstruction::et_param: + return emit_vs_param_export(exi); + default: + R600_ERR("shader_from_nir: export %d type not yet supported\n", exi.export_type()); + return false; + } +} + +bool AssemblyFromShaderLegacyImpl::emit_if_start(const IfInstruction & if_instr) +{ + assert(m_bc->chip_class == EVERGREEN); + + bool needs_workaround = false; + int elems = m_callstack.push(FC_PUSH_VPM); + + if (m_bc->family != CHIP_HEMLOCK && + m_bc->family != CHIP_CYPRESS && + m_bc->family != CHIP_JUNIPER) { + unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size; + unsigned dmod2 = (elems) % m_bc->stack.entry_size; + + if (elems && (!dmod1 || !dmod2)) + needs_workaround = true; + } + + auto& pred = if_instr.pred(); + auto op = cf_alu_push_before; + + if (needs_workaround) { + r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH); + m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2; + op = cf_alu; + } + emit_alu(pred, op); + + r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP); + + m_jump_tracker.push(m_bc->cf_last, jt_if); + return true; +} + +bool AssemblyFromShaderLegacyImpl::emit_else(UNUSED const ElseInstruction & else_instr) +{ + r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE); + m_bc->cf_last->pop_count = 1; + return m_jump_tracker.add_mid(m_bc->cf_last, jt_if); +} + +bool AssemblyFromShaderLegacyImpl::emit_endif(UNUSED const IfElseEndInstruction & endif_instr) +{ + m_callstack.pop(FC_PUSH_VPM); + + unsigned force_pop = m_bc->force_add_cf; + if (!force_pop) { + int alu_pop = 3; + if (m_bc->cf_last) { + if (m_bc->cf_last->op == CF_OP_ALU) + alu_pop = 0; + else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER) + alu_pop = 1; + } + alu_pop += 1; + if (alu_pop == 1) { + m_bc->cf_last->op = CF_OP_ALU_POP_AFTER; + m_bc->force_add_cf = 1; + } else if (alu_pop == 2) { + m_bc->cf_last->op = CF_OP_ALU_POP2_AFTER; + m_bc->force_add_cf = 1; + } else { + force_pop = 1; + } + } + + if (force_pop) { + r600_bytecode_add_cfinst(m_bc, CF_OP_POP); + m_bc->cf_last->pop_count = 1; + m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2; + } + + return m_jump_tracker.pop(m_bc->cf_last, jt_if); +} + +bool AssemblyFromShaderLegacyImpl::emit_loop_begin(UNUSED const LoopBeginInstruction& instr) +{ + r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10); + m_jump_tracker.push(m_bc->cf_last, jt_loop); + m_callstack.push(FC_LOOP); + ++m_loop_nesting; + return true; +} + +bool AssemblyFromShaderLegacyImpl::emit_loop_end(UNUSED const LoopEndInstruction& instr) +{ + r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END); + m_callstack.pop(FC_LOOP); + assert(m_loop_nesting); + --m_loop_nesting; + return m_jump_tracker.pop(m_bc->cf_last, jt_loop); +} + +bool AssemblyFromShaderLegacyImpl::emit_loop_break(UNUSED const LoopBreakInstruction& instr) +{ + r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK); + return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop); +} + +bool AssemblyFromShaderLegacyImpl::emit_loop_continue(UNUSED const LoopContInstruction &instr) +{ + r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE); + return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop); +} + +bool AssemblyFromShaderLegacyImpl::emit_streamout(const StreamOutIntruction& so_instr) +{ + struct r600_bytecode_output output; + memset(&output, 0, sizeof(struct r600_bytecode_output)); + + output.gpr = so_instr.gpr().sel(); + output.elem_size = so_instr.element_size(); + output.array_base = so_instr.array_base(); + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; + output.burst_count = so_instr.burst_count(); + output.array_size = so_instr.array_size(); + output.comp_mask = so_instr.comp_mask(); + output.op = so_instr.op(); + + assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3); + + + if (r600_bytecode_add_output(m_bc, &output)) { + R600_ERR("shader_from_nir: Error creating stream output instruction\n"); + return false; + } + return true; +} + +bool AssemblyFromShaderLegacyImpl::emit_tex(const TexInstruction & tex_instr) +{ + auto addr = tex_instr.sampler_offset(); + if (addr && (!m_bc->index_loaded[1] || m_loop_nesting + || m_bc->index_reg[1] != addr->sel())) { + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(alu)); + alu.op = opcode_map.at(op1_mova_int); + alu.dst.chan = 0; + alu.src[0].sel = addr->sel(); + alu.src[0].chan = addr->chan(); + alu.last = 1; + int r = r600_bytecode_add_alu(m_bc, &alu); + if (r) + return false; + + m_bc->ar_loaded = 0; + + alu.op = opcode_map.at(op1_set_cf_idx1); + alu.dst.chan = 0; + alu.src[0].sel = 0; + alu.src[0].chan = 0; + alu.last = 1; + + r = r600_bytecode_add_alu(m_bc, &alu); + if (r) + return false; + + m_bc->index_reg[1] = addr->sel(); + m_bc->index_loaded[1] = true; + } + + r600_bytecode_tex tex; + memset(&tex, 0, sizeof(struct r600_bytecode_tex)); + tex.op = tex_instr.opcode(); + tex.sampler_id = tex_instr.sampler_id(); + tex.sampler_index_mode = 0; + tex.resource_id = tex_instr.resource_id();; + tex.resource_index_mode = 0; + tex.src_gpr = tex_instr.src().sel(); + tex.dst_gpr = tex_instr.dst().sel(); + tex.dst_sel_x = tex_instr.dest_swizzle(0); + tex.dst_sel_y = tex_instr.dest_swizzle(1); + tex.dst_sel_z = tex_instr.dest_swizzle(2); + tex.dst_sel_w = tex_instr.dest_swizzle(3); + tex.src_sel_x = tex_instr.src().chan_i(0); + tex.src_sel_y = tex_instr.src().chan_i(1); + tex.src_sel_z = tex_instr.src().chan_i(2); + tex.src_sel_w = tex_instr.src().chan_i(3); + tex.coord_type_x = !tex_instr.has_flag(TexInstruction::x_unnormalized); + tex.coord_type_y = !tex_instr.has_flag(TexInstruction::y_unnormalized); + tex.coord_type_z = !tex_instr.has_flag(TexInstruction::z_unnormalized); + tex.coord_type_w = !tex_instr.has_flag(TexInstruction::w_unnormalized); + tex.offset_x = tex_instr.get_offset(0); + tex.offset_y = tex_instr.get_offset(1); + tex.offset_z = tex_instr.get_offset(2); + tex.resource_index_mode = (!!addr) ? 2 : 0; + tex.sampler_index_mode = tex.resource_index_mode; + + if (tex_instr.opcode() == TexInstruction::get_gradient_h || + tex_instr.opcode() == TexInstruction::get_gradient_v) + tex.inst_mod = tex_instr.has_flag(TexInstruction::grad_fine) ? 1 : 0; + else + tex.inst_mod = tex_instr.inst_mode(); + if (r600_bytecode_add_tex(m_bc, &tex)) { + R600_ERR("shader_from_nir: Error creating tex assembly instruction\n"); + return false; + } + return true; +} + +bool AssemblyFromShaderLegacyImpl::emit_vtx(const FetchInstruction& fetch_instr) +{ + int buffer_offset = 0; + auto addr = fetch_instr.buffer_offset(); + auto index_mode = fetch_instr.buffer_index_mode(); + + if (addr) { + if (addr->type() == Value::literal) { + const auto& boffs = dynamic_cast(*addr); + buffer_offset = boffs.value(); + } else { + index_mode = bim_zero; + if ((!m_bc->index_loaded[0] || m_loop_nesting || m_bc->index_reg[0] != addr->sel())) { + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(alu)); + alu.op = opcode_map.at(op1_mova_int); + alu.dst.chan = 0; + alu.src[0].sel = addr->sel(); + alu.src[0].chan = addr->chan(); + alu.last = 1; + int r = r600_bytecode_add_alu(m_bc, &alu); + if (r) + return false; + + m_bc->ar_loaded = 0; + + alu.op = opcode_map.at(op1_set_cf_idx0); + alu.dst.chan = 0; + alu.src[0].sel = 0; + alu.src[0].chan = 0; + alu.last = 1; + + r = r600_bytecode_add_alu(m_bc, &alu); + if (r) + return false; + + m_bc->index_reg[0] = addr->sel(); + m_bc->index_loaded[0] = true; + } + } + } + + if (fetch_instr.has_prelude()) { + for(auto &i : fetch_instr.prelude()) { + if (!emit(i)) + return false; + } + } + + struct r600_bytecode_vtx vtx; + memset(&vtx, 0, sizeof(vtx)); + vtx.op = fetch_instr.vc_opcode(); + vtx.buffer_id = fetch_instr.buffer_id() + buffer_offset; + vtx.fetch_type = fetch_instr.fetch_type(); + vtx.src_gpr = fetch_instr.src().sel(); + vtx.src_sel_x = fetch_instr.src().chan(); + vtx.mega_fetch_count = fetch_instr.mega_fetch_count(); + vtx.dst_gpr = fetch_instr.dst().sel(); + vtx.dst_sel_x = fetch_instr.swz(0); /* SEL_X */ + vtx.dst_sel_y = fetch_instr.swz(1); /* SEL_Y */ + vtx.dst_sel_z = fetch_instr.swz(2); /* SEL_Z */ + vtx.dst_sel_w = fetch_instr.swz(3); /* SEL_W */ + vtx.use_const_fields = fetch_instr.use_const_fields(); + vtx.data_format = fetch_instr.data_format(); + vtx.num_format_all = fetch_instr.num_format(); /* NUM_FORMAT_SCALED */ + vtx.format_comp_all = fetch_instr.is_signed(); /* FORMAT_COMP_SIGNED */ + vtx.endian = fetch_instr.endian_swap(); + vtx.buffer_index_mode = index_mode; + vtx.offset = fetch_instr.offset(); + vtx.indexed = fetch_instr.indexed(); + vtx.uncached = fetch_instr.uncached(); + vtx.elem_size = fetch_instr.elm_size(); + vtx.array_base = fetch_instr.array_base(); + vtx.array_size = fetch_instr.array_size(); + vtx.srf_mode_all = fetch_instr.srf_mode_no_zero(); + + if (fetch_instr.use_tc()) { + if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) { + R600_ERR("shader_from_nir: Error creating tex assembly instruction\n"); + return false; + } + + } else { + if ((r600_bytecode_add_vtx(m_bc, &vtx))) { + R600_ERR("shader_from_nir: Error creating tex assembly instruction\n"); + return false; + } + } + + m_bc->cf_last->vpm = fetch_instr.use_vpm(); + m_bc->cf_last->barrier = 1; + + return true; +} + +extern const std::map ds_opcode_map; + +bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst, + const Value& d) +{ + assert(d.type() == Value::gpr || d.type() == Value::gpr_array_value); + + if (d.sel() > 124) { + R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", d.sel()); + return false; + } + + dst.sel = d.sel(); + dst.chan = d.chan(); + + if (m_bc->index_reg[1] == dst.sel) + m_bc->index_loaded[1] = false; + + if (m_bc->index_reg[0] == dst.sel) + m_bc->index_loaded[0] = false; + + return true; +} + +bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src& src, const Value& s) +{ + + if (s.type() == Value::gpr && s.sel() > 124) { + R600_ERR("shader_from_nir: Don't support more then 124 GPRs, try using %d\n", s.sel()); + return false; + } + + if (s.type() == Value::lds_direct) { + R600_ERR("shader_from_nir: LDS_DIRECT values not supported\n"); + return false; + } + + if (s.type() == Value::kconst && s.sel() < 512) { + R600_ERR("shader_from_nir: Uniforms should have values >= 512, got %d \n", s.sel()); + return false; + } + + if (s.type() == Value::literal) { + auto& v = static_cast(s); + if (v.value() == 0) { + src.sel = ALU_SRC_0; + src.chan = 0; + return true; + } + if (v.value() == 1) { + src.sel = ALU_SRC_1_INT; + src.chan = 0; + return true; + } + if (v.value_float() == 1.0f) { + src.sel = ALU_SRC_1; + src.chan = 0; + return true; + } + if (v.value_float() == 0.5f) { + src.sel = ALU_SRC_0_5; + src.chan = 0; + return true; + } + src.value = v.value(); + } + + src.sel = s.sel(); + src.chan = s.chan(); + if (s.type() == Value::kconst) { + const UniformValue& cv = static_cast(s); + src.kc_bank = cv.kcache_bank(); + } + + return true; +} + +const std::map opcode_map = { + + {op2_add, ALU_OP2_ADD}, + {op2_mul, ALU_OP2_MUL}, + {op2_mul_ieee, ALU_OP2_MUL_IEEE}, + {op2_max, ALU_OP2_MAX}, + {op2_min, ALU_OP2_MIN}, + {op2_max_dx10, ALU_OP2_MAX_DX10}, + {op2_min_dx10, ALU_OP2_MIN_DX10}, + {op2_sete, ALU_OP2_SETE}, + {op2_setgt, ALU_OP2_SETGT}, + {op2_setge, ALU_OP2_SETGE}, + {op2_setne, ALU_OP2_SETNE}, + {op2_sete_dx10, ALU_OP2_SETE_DX10}, + {op2_setgt_dx10, ALU_OP2_SETGT_DX10}, + {op2_setge_dx10, ALU_OP2_SETGE_DX10}, + {op2_setne_dx10, ALU_OP2_SETNE_DX10}, + {op1_fract, ALU_OP1_FRACT}, + {op1_trunc, ALU_OP1_TRUNC}, + {op1_ceil, ALU_OP1_CEIL}, + {op1_rndne, ALU_OP1_RNDNE}, + {op1_floor, ALU_OP1_FLOOR}, + {op2_ashr_int, ALU_OP2_ASHR_INT}, + {op2_lshr_int, ALU_OP2_LSHR_INT}, + {op2_lshl_int, ALU_OP2_LSHL_INT}, + {op1_mov, ALU_OP1_MOV}, + {op0_nop, ALU_OP0_NOP}, + {op2_mul_64, ALU_OP2_MUL_64}, + {op1_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32}, + {op1v_flt64_to_flt32, ALU_OP1_FLT32_TO_FLT64}, + {op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT}, + {op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT}, + {op2_pred_sete, ALU_OP2_PRED_SETE}, + {op2_pred_setgt, ALU_OP2_PRED_SETGT}, + {op2_pred_setge, ALU_OP2_PRED_SETGE}, + {op2_pred_setne, ALU_OP2_PRED_SETNE}, + //{op2_pred_set_inv, ALU_OP2_PRED_SET}, + //{op2_pred_set_clr, ALU_OP2_PRED_SET_CRL}, + //{op2_pred_set_restore, ALU_OP2_PRED_SET_RESTORE}, + {op2_pred_sete_push, ALU_OP2_PRED_SETE_PUSH}, + {op2_pred_setgt_push, ALU_OP2_PRED_SETGT_PUSH}, + {op2_pred_setge_push, ALU_OP2_PRED_SETGE_PUSH}, + {op2_pred_setne_push, ALU_OP2_PRED_SETNE_PUSH}, + {op2_kille, ALU_OP2_KILLE}, + {op2_killgt, ALU_OP2_KILLGT}, + {op2_killge, ALU_OP2_KILLGE}, + {op2_killne, ALU_OP2_KILLNE}, + {op2_and_int, ALU_OP2_AND_INT}, + {op2_or_int, ALU_OP2_OR_INT}, + {op2_xor_int, ALU_OP2_XOR_INT}, + {op1_not_int, ALU_OP1_NOT_INT}, + {op2_add_int, ALU_OP2_ADD_INT}, + {op2_sub_int, ALU_OP2_SUB_INT}, + {op2_max_int, ALU_OP2_MAX_INT}, + {op2_min_int, ALU_OP2_MIN_INT}, + {op2_max_uint, ALU_OP2_MAX_UINT}, + {op2_min_uint, ALU_OP2_MIN_UINT}, + {op2_sete_int, ALU_OP2_SETE_INT}, + {op2_setgt_int, ALU_OP2_SETGT_INT}, + {op2_setge_int, ALU_OP2_SETGE_INT}, + {op2_setne_int, ALU_OP2_SETNE_INT}, + {op2_setgt_uint, ALU_OP2_SETGT_UINT}, + {op2_setge_uint, ALU_OP2_SETGE_UINT}, + {op2_killgt_uint, ALU_OP2_KILLGT_UINT}, + {op2_killge_uint, ALU_OP2_KILLGE_UINT}, + //p2_prede_int, ALU_OP2_PREDE_INT}, + {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT}, + {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT}, + {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT}, + {op2_kille_int, ALU_OP2_KILLE_INT}, + {op2_killgt_int, ALU_OP2_KILLGT_INT}, + {op2_killge_int, ALU_OP2_KILLGE_INT}, + {op2_killne_int, ALU_OP2_KILLNE_INT}, + {op2_pred_sete_push_int, ALU_OP2_PRED_SETE_PUSH_INT}, + {op2_pred_setgt_push_int, ALU_OP2_PRED_SETGT_PUSH_INT}, + {op2_pred_setge_push_int, ALU_OP2_PRED_SETGE_PUSH_INT}, + {op2_pred_setne_push_int, ALU_OP2_PRED_SETNE_PUSH_INT}, + {op2_pred_setlt_push_int, ALU_OP2_PRED_SETLT_PUSH_INT}, + {op2_pred_setle_push_int, ALU_OP2_PRED_SETLE_PUSH_INT}, + {op1_flt_to_int, ALU_OP1_FLT_TO_INT}, + {op1_bfrev_int, ALU_OP1_BFREV_INT}, + {op2_addc_uint, ALU_OP2_ADDC_UINT}, + {op2_subb_uint, ALU_OP2_SUBB_UINT}, + {op0_group_barrier, ALU_OP0_GROUP_BARRIER}, + {op0_group_seq_begin, ALU_OP0_GROUP_SEQ_BEGIN}, + {op0_group_seq_end, ALU_OP0_GROUP_SEQ_END}, + {op2_set_mode, ALU_OP2_SET_MODE}, + {op1_set_cf_idx0, ALU_OP0_SET_CF_IDX0}, + {op1_set_cf_idx1, ALU_OP0_SET_CF_IDX1}, + {op2_set_lds_size, ALU_OP2_SET_LDS_SIZE}, + {op1_exp_ieee, ALU_OP1_EXP_IEEE}, + {op1_log_clamped, ALU_OP1_LOG_CLAMPED}, + {op1_log_ieee, ALU_OP1_LOG_IEEE}, + {op1_recip_clamped, ALU_OP1_RECIP_CLAMPED}, + {op1_recip_ff, ALU_OP1_RECIP_FF}, + {op1_recip_ieee, ALU_OP1_RECIP_IEEE}, + {op1_recipsqrt_clamped, ALU_OP1_RECIPSQRT_CLAMPED}, + {op1_recipsqrt_ff, ALU_OP1_RECIPSQRT_FF}, + {op1_recipsqrt_ieee1, ALU_OP1_RECIPSQRT_IEEE}, + {op1_sqrt_ieee, ALU_OP1_SQRT_IEEE}, + {op1_sin, ALU_OP1_SIN}, + {op1_cos, ALU_OP1_COS}, + {op2_mullo_int, ALU_OP2_MULLO_INT}, + {op2_mulhi_int, ALU_OP2_MULHI_INT}, + {op2_mullo_uint, ALU_OP2_MULLO_UINT}, + {op2_mulhi_uint, ALU_OP2_MULHI_UINT}, + {op1_recip_int, ALU_OP1_RECIP_INT}, + {op1_recip_uint, ALU_OP1_RECIP_UINT}, + {op1_recip_64, ALU_OP2_RECIP_64}, + {op1_recip_clamped_64, ALU_OP2_RECIP_CLAMPED_64}, + {op1_recipsqrt_64, ALU_OP2_RECIPSQRT_64}, + {op1_recipsqrt_clamped_64, ALU_OP2_RECIPSQRT_CLAMPED_64}, + {op1_sqrt_64, ALU_OP2_SQRT_64}, + {op1_flt_to_uint, ALU_OP1_FLT_TO_UINT}, + {op1_int_to_flt, ALU_OP1_INT_TO_FLT}, + {op1_uint_to_flt, ALU_OP1_UINT_TO_FLT}, + {op2_bfm_int, ALU_OP2_BFM_INT}, + {op1_flt32_to_flt16, ALU_OP1_FLT32_TO_FLT16}, + {op1_flt16_to_flt32, ALU_OP1_FLT16_TO_FLT32}, + {op1_ubyte0_flt, ALU_OP1_UBYTE0_FLT}, + {op1_ubyte1_flt, ALU_OP1_UBYTE1_FLT}, + {op1_ubyte2_flt, ALU_OP1_UBYTE2_FLT}, + {op1_ubyte3_flt, ALU_OP1_UBYTE3_FLT}, + {op1_bcnt_int, ALU_OP1_BCNT_INT}, + {op1_ffbh_uint, ALU_OP1_FFBH_UINT}, + {op1_ffbl_int, ALU_OP1_FFBL_INT}, + {op1_ffbh_int, ALU_OP1_FFBH_INT}, + {op1_flt_to_uint4, ALU_OP1_FLT_TO_UINT4}, + {op2_dot_ieee, ALU_OP2_DOT_IEEE}, + {op1_flt_to_int_rpi, ALU_OP1_FLT_TO_INT_RPI}, + {op1_flt_to_int_floor, ALU_OP1_FLT_TO_INT_FLOOR}, + {op2_mulhi_uint24, ALU_OP2_MULHI_UINT24}, + {op1_mbcnt_32hi_int, ALU_OP1_MBCNT_32HI_INT}, + {op1_offset_to_flt, ALU_OP1_OFFSET_TO_FLT}, + {op2_mul_uint24, ALU_OP2_MUL_UINT24}, + {op1_bcnt_accum_prev_int, ALU_OP1_BCNT_ACCUM_PREV_INT}, + {op1_mbcnt_32lo_accum_prev_int, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT}, + {op2_sete_64, ALU_OP2_SETE_64}, + {op2_setne_64, ALU_OP2_SETNE_64}, + {op2_setgt_64, ALU_OP2_SETGT_64}, + {op2_setge_64, ALU_OP2_SETGE_64}, + {op2_min_64, ALU_OP2_MIN_64}, + {op2_max_64, ALU_OP2_MAX_64}, + {op2_dot4, ALU_OP2_DOT4}, + {op2_dot4_ieee, ALU_OP2_DOT4_IEEE}, + {op2_cube, ALU_OP2_CUBE}, + {op1_max4, ALU_OP1_MAX4}, + {op1_frexp_64, ALU_OP1_FREXP_64}, + {op1_ldexp_64, ALU_OP2_LDEXP_64}, + {op1_fract_64, ALU_OP1_FRACT_64}, + {op2_pred_setgt_64, ALU_OP2_PRED_SETGT_64}, + {op2_pred_sete_64, ALU_OP2_PRED_SETE_64}, + {op2_pred_setge_64, ALU_OP2_PRED_SETGE_64}, + {op2_add_64, ALU_OP2_ADD_64}, + {op1_mova_int, ALU_OP1_MOVA_INT}, + {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32}, + {op1_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64}, + {op2_sad_accum_prev_uint, ALU_OP2_SAD_ACCUM_PREV_UINT}, + {op2_dot, ALU_OP2_DOT}, + //p2_mul_prev, ALU_OP2_MUL_PREV}, + //p2_mul_ieee_prev, ALU_OP2_MUL_IEEE_PREV}, + //p2_add_prev, ALU_OP2_ADD_PREV}, + {op2_muladd_prev, ALU_OP2_MULADD_PREV}, + {op2_muladd_ieee_prev, ALU_OP2_MULADD_IEEE_PREV}, + {op2_interp_xy, ALU_OP2_INTERP_XY}, + {op2_interp_zw, ALU_OP2_INTERP_ZW}, + {op2_interp_x, ALU_OP2_INTERP_X}, + {op2_interp_z, ALU_OP2_INTERP_Z}, + {op0_store_flags, ALU_OP1_STORE_FLAGS}, + {op1_load_store_flags, ALU_OP1_LOAD_STORE_FLAGS}, + {op0_lds_1a, ALU_OP2_LDS_1A}, + {op0_lds_1a1d, ALU_OP2_LDS_1A1D}, + {op0_lds_2a, ALU_OP2_LDS_2A}, + {op1_interp_load_p0, ALU_OP1_INTERP_LOAD_P0}, + {op1_interp_load_p10, ALU_OP1_INTERP_LOAD_P10}, + {op1_interp_load_p20, ALU_OP1_INTERP_LOAD_P20}, + // {op 3 all left shift 6 + {op3_bfe_uint, ALU_OP3_BFE_UINT}, + {op3_bfe_int, ALU_OP3_BFE_INT}, + {op3_bfi_int, ALU_OP3_BFI_INT}, + {op3_fma, ALU_OP3_FMA}, + {op3_cndne_64, ALU_OP3_CNDNE_64}, + {op3_fma_64, ALU_OP3_FMA_64}, + {op3_lerp_uint, ALU_OP3_LERP_UINT}, + {op3_bit_align_int, ALU_OP3_BIT_ALIGN_INT}, + {op3_byte_align_int, ALU_OP3_BYTE_ALIGN_INT}, + {op3_sad_accum_uint, ALU_OP3_SAD_ACCUM_UINT}, + {op3_sad_accum_hi_uint, ALU_OP3_SAD_ACCUM_HI_UINT}, + {op3_muladd_uint24, ALU_OP3_MULADD_UINT24}, + {op3_lds_idx_op, ALU_OP3_LDS_IDX_OP}, + {op3_muladd, ALU_OP3_MULADD}, + {op3_muladd_m2, ALU_OP3_MULADD_M2}, + {op3_muladd_m4, ALU_OP3_MULADD_M4}, + {op3_muladd_d2, ALU_OP3_MULADD_D2}, + {op3_muladd_ieee, ALU_OP3_MULADD_IEEE}, + {op3_cnde, ALU_OP3_CNDE}, + {op3_cndgt, ALU_OP3_CNDGT}, + {op3_cndge, ALU_OP3_CNDGE}, + {op3_cnde_int, ALU_OP3_CNDE_INT}, + {op3_cndgt_int, ALU_OP3_CNDGT_INT}, + {op3_cndge_int, ALU_OP3_CNDGE_INT}, + {op3_mul_lit, ALU_OP3_MUL_LIT}, +}; + +const std::map ds_opcode_map = { + {DS_OP_ADD, FETCH_OP_GDS_ADD}, + {DS_OP_SUB, FETCH_OP_GDS_SUB}, + {DS_OP_RSUB, FETCH_OP_GDS_RSUB}, + {DS_OP_INC, FETCH_OP_GDS_INC}, + {DS_OP_DEC, FETCH_OP_GDS_DEC}, + {DS_OP_MIN_INT, FETCH_OP_GDS_MIN_INT}, + {DS_OP_MAX_INT, FETCH_OP_GDS_MAX_INT}, + {DS_OP_MIN_UINT, FETCH_OP_GDS_MIN_UINT}, + {DS_OP_MAX_UINT, FETCH_OP_GDS_MAX_UINT}, + {DS_OP_AND, FETCH_OP_GDS_AND}, + {DS_OP_OR, FETCH_OP_GDS_OR}, + {DS_OP_XOR, FETCH_OP_GDS_XOR}, + {DS_OP_MSKOR, FETCH_OP_GDS_MSKOR}, + {DS_OP_WRITE, FETCH_OP_GDS_WRITE}, + {DS_OP_WRITE_REL, FETCH_OP_GDS_WRITE_REL}, + {DS_OP_WRITE2, FETCH_OP_GDS_WRITE2}, + {DS_OP_CMP_STORE, FETCH_OP_GDS_CMP_STORE}, + {DS_OP_CMP_STORE_SPF, FETCH_OP_GDS_CMP_STORE_SPF}, + {DS_OP_BYTE_WRITE, FETCH_OP_GDS_BYTE_WRITE}, + {DS_OP_SHORT_WRITE, FETCH_OP_GDS_SHORT_WRITE}, + {DS_OP_ADD_RET, FETCH_OP_GDS_ADD_RET}, + {DS_OP_SUB_RET, FETCH_OP_GDS_SUB_RET}, + {DS_OP_RSUB_RET, FETCH_OP_GDS_RSUB_RET}, + {DS_OP_INC_RET, FETCH_OP_GDS_INC_RET}, + {DS_OP_DEC_RET, FETCH_OP_GDS_DEC_RET}, + {DS_OP_MIN_INT_RET, FETCH_OP_GDS_MIN_INT_RET}, + {DS_OP_MAX_INT_RET, FETCH_OP_GDS_MAX_INT_RET}, + {DS_OP_MIN_UINT_RET, FETCH_OP_GDS_MIN_UINT_RET}, + {DS_OP_MAX_UINT_RET, FETCH_OP_GDS_MAX_UINT_RET}, + {DS_OP_AND_RET, FETCH_OP_GDS_AND_RET}, + {DS_OP_OR_RET, FETCH_OP_GDS_OR_RET}, + {DS_OP_XOR_RET, FETCH_OP_GDS_XOR_RET}, + {DS_OP_MSKOR_RET, FETCH_OP_GDS_MSKOR_RET}, + {DS_OP_XCHG_RET, FETCH_OP_GDS_XCHG_RET}, + {DS_OP_XCHG_REL_RET, FETCH_OP_GDS_XCHG_REL_RET}, + {DS_OP_XCHG2_RET, FETCH_OP_GDS_XCHG2_RET}, + {DS_OP_CMP_XCHG_RET, FETCH_OP_GDS_CMP_XCHG_RET}, + {DS_OP_CMP_XCHG_SPF_RET, FETCH_OP_GDS_CMP_XCHG_SPF_RET}, + {DS_OP_READ_RET, FETCH_OP_GDS_READ_RET}, + {DS_OP_READ_REL_RET, FETCH_OP_GDS_READ_REL_RET}, + {DS_OP_READ2_RET, FETCH_OP_GDS_READ2_RET}, + {DS_OP_READWRITE_RET, FETCH_OP_GDS_READWRITE_RET}, + {DS_OP_BYTE_READ_RET, FETCH_OP_GDS_BYTE_READ_RET}, + {DS_OP_UBYTE_READ_RET, FETCH_OP_GDS_UBYTE_READ_RET}, + {DS_OP_SHORT_READ_RET, FETCH_OP_GDS_SHORT_READ_RET}, + {DS_OP_USHORT_READ_RET, FETCH_OP_GDS_USHORT_READ_RET}, + {DS_OP_ATOMIC_ORDERED_ALLOC_RET, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC}, + {DS_OP_INVALID, 0}, +}; + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h b/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h new file mode 100644 index 00000000000..075ea3b728a --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h @@ -0,0 +1,45 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#include "sfn_nir.h" + +struct r600_shader; +union r600_shader_key; + +namespace r600 { + +class AssemblyFromShaderLegacy : public AssemblyFromShader { +public: + AssemblyFromShaderLegacy(struct r600_shader *sh, r600_shader_key *key); + ~AssemblyFromShaderLegacy() override; +private: + bool do_lower(const std::vector& ir) override ; + + struct AssemblyFromShaderLegacyImpl *impl; +}; + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp new file mode 100644 index 00000000000..b72b873e254 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -0,0 +1,543 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_nir.h" +#include "nir_builder.h" + +#include "../r600_pipe.h" +#include "../r600_shader.h" + +#include "sfn_instruction_tex.h" + +#include "sfn_shader_vertex.h" +#include "sfn_shader_fragment.h" +#include "sfn_ir_to_assembly.h" + +#include + +namespace r600 { + +using std::vector; + +ShaderFromNir::ShaderFromNir():sh(nullptr), + m_current_if_id(0), + m_current_loop_id(0) +{ +} + +bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shader, + r600_pipe_shader_selector *sel, r600_shader_key& key, + struct r600_shader* gs_shader) +{ + sh = shader; + assert(sh); + + switch (shader->info.stage) { + case MESA_SHADER_VERTEX: + if (key.vs.as_es) { + sfn_log << SfnLog::trans << "VS; next type GS not yet supported\n"; + } else if (key.vs.as_ls) { + sfn_log << "VS: next type TCS and TES not yet supported\n"; + return false; + } else { + sfn_log << SfnLog::trans << "Start VS for FS\n"; + impl.reset(new VertexShaderFromNirForFS(pipe_shader, *sel, key)); + } + break; + case MESA_SHADER_FRAGMENT: + sfn_log << SfnLog::trans << "Start FS\n"; + impl.reset(new FragmentShaderFromNir(*shader, pipe_shader->shader, *sel, key)); + break; + default: + return false; + } + + sfn_log << SfnLog::trans << "Process declarations\n"; + if (!process_declaration()) + return false; + + // at this point all functions should be inlined + const nir_function *func = reinterpret_cast(exec_list_get_head_const(&sh->functions)); + + sfn_log << SfnLog::trans << "Scan shader\n"; + nir_foreach_block(block, func->impl) { + nir_foreach_instr(instr, block) { + if (!impl->scan_instruction(instr)) { + fprintf(stderr, "Unhandled sysvalue access "); + nir_print_instr(instr, stderr); + fprintf(stderr, "\n"); + return false; + } + } + } + + sfn_log << SfnLog::trans << "Reserve registers\n"; + if (!impl->allocate_reserved_registers()) { + return false; + } + + ValuePool::array_list arrays; + sfn_log << SfnLog::trans << "Allocate local registers\n"; + foreach_list_typed(nir_register, reg, node, &func->impl->registers) { + impl->allocate_local_register(*reg, arrays); + } + + sfn_log << SfnLog::trans << "Emit shader start\n"; + impl->allocate_arrays(arrays); + + impl->emit_shader_start(); + + sfn_log << SfnLog::trans << "Process shader \n"; + foreach_list_typed(nir_cf_node, node, node, &func->impl->body) { + if (!process_cf_node(node)) + return false; + } + + // Add optimizations here + sfn_log << SfnLog::trans << "Finalize\n"; + impl->finalize(); + + sfn_log << SfnLog::trans << "Finished translating to R600 IR\n"; + return true; +} + +Shader ShaderFromNir::shader() const +{ + return Shader{impl->m_output, impl->get_temp_registers()}; +} + + +bool ShaderFromNir::process_cf_node(nir_cf_node *node) +{ + SFN_TRACE_FUNC(SfnLog::flow, "CF"); + switch (node->type) { + case nir_cf_node_block: + return process_block(nir_cf_node_as_block(node)); + case nir_cf_node_if: + return process_if(nir_cf_node_as_if(node)); + case nir_cf_node_loop: + return process_loop(nir_cf_node_as_loop(node)); + default: + return false; + } +} + +bool ShaderFromNir::process_if(nir_if *if_stmt) +{ + SFN_TRACE_FUNC(SfnLog::flow, "IF"); + + if (!impl->emit_if_start(m_current_if_id, if_stmt)) + return false; + + int if_id = m_current_if_id++; + m_if_stack.push(if_id); + + foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list) + if (!process_cf_node(n)) return false; + + if (!if_stmt->then_list.is_empty()) { + if (!impl->emit_else_start(if_id)) + return false; + + foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list) + if (!process_cf_node(n)) return false; + } + + if (!impl->emit_ifelse_end(if_id)) + return false; + + m_if_stack.pop(); + return true; +} + +bool ShaderFromNir::process_loop(nir_loop *node) +{ + SFN_TRACE_FUNC(SfnLog::flow, "LOOP"); + int loop_id = m_current_loop_id++; + + if (!impl->emit_loop_start(loop_id)) + return false; + + foreach_list_typed(nir_cf_node, n, node, &node->body) + if (!process_cf_node(n)) return false; + + if (!impl->emit_loop_end(loop_id)) + return false; + + return true; +} + +bool ShaderFromNir::process_block(nir_block *block) +{ + SFN_TRACE_FUNC(SfnLog::flow, "BLOCK"); + nir_foreach_instr(instr, block) { + int r = emit_instruction(instr); + if (!r) { + sfn_log << SfnLog::err << "R600: Unsupported instruction: " + << *instr << "\n"; + return false; + } + } + return true; +} + + +ShaderFromNir::~ShaderFromNir() +{ +} + +pipe_shader_type ShaderFromNir::processor_type() const +{ + return impl->m_processor_type; +} + + +bool ShaderFromNir::emit_instruction(nir_instr *instr) +{ + assert(impl); + + sfn_log << SfnLog::instr << "Read instruction " << *instr << "\n"; + + switch (instr->type) { + case nir_instr_type_alu: + return impl->emit_alu_instruction(instr); + case nir_instr_type_deref: + return impl->emit_deref_instruction(nir_instr_as_deref(instr)); + case nir_instr_type_intrinsic: + return impl->emit_intrinsic_instruction(nir_instr_as_intrinsic(instr)); + case nir_instr_type_load_const: + return impl->set_literal_constant(nir_instr_as_load_const(instr)); + case nir_instr_type_tex: + return impl->emit_tex_instruction(instr); + case nir_instr_type_jump: + return impl->emit_jump_instruction(nir_instr_as_jump(instr)); + default: + fprintf(stderr, "R600: %s: ShaderFromNir Unsupported instruction: type %d:'", __func__, instr->type); + nir_print_instr(instr, stderr); + fprintf(stderr, "'\n"); + return false; + case nir_instr_type_ssa_undef: + return impl->create_undef(nir_instr_as_ssa_undef(instr)); + return true; + } +} + +bool ShaderFromNir::process_declaration() +{ + // scan declarations + nir_foreach_variable(variable, &sh->inputs) { + if (!impl->process_inputs(variable)) { + fprintf(stderr, "R600: error parsing input varible %s\n", variable->name); + return false; + } + } + + // scan declarations + nir_foreach_variable(variable, &sh->outputs) { + if (!impl->process_outputs(variable)) { + fprintf(stderr, "R600: error parsing outputs varible %s\n", variable->name); + return false; + } + } + + // scan declarations + nir_foreach_variable(variable, &sh->uniforms) { + if (!impl->process_uniforms(variable)) { + fprintf(stderr, "R600: error parsing outputs varible %s\n", variable->name); + return false; + } + } + + return true; +} + +const std::vector& ShaderFromNir::shader_ir() const +{ + assert(impl); + return impl->m_output; +} + + +AssemblyFromShader::~AssemblyFromShader() +{ +} + +bool AssemblyFromShader::lower(const std::vector& ir) +{ + return do_lower(ir); +} + +static nir_ssa_def * +r600_nir_lower_pack_unpack_2x16_impl(nir_builder *b, nir_instr *instr, void *_options) +{ + nir_alu_instr *alu = nir_instr_as_alu(instr); + + switch (alu->op) { + case nir_op_unpack_half_2x16: { + nir_ssa_def *packed = nir_ssa_for_alu_src(b, alu, 0); + return nir_vec2(b, nir_unpack_half_2x16_split_x(b, packed), + nir_unpack_half_2x16_split_y(b, packed)); + + } + case nir_op_pack_half_2x16: { + nir_ssa_def *src_vec2 = nir_ssa_for_alu_src(b, alu, 0); + return nir_pack_half_2x16_split(b, nir_channel(b, src_vec2, 0), + nir_channel(b, src_vec2, 1)); + } + default: + return nullptr; + } +} + +bool r600_nir_lower_pack_unpack_2x16_filter(const nir_instr *instr, const void *_options) +{ + return instr->type == nir_instr_type_alu; +} + +bool r600_nir_lower_pack_unpack_2x16(nir_shader *shader) +{ + return nir_shader_lower_instructions(shader, + r600_nir_lower_pack_unpack_2x16_filter, + r600_nir_lower_pack_unpack_2x16_impl, + nullptr); +}; + +} + +using r600::r600_nir_lower_int_tg4; +using r600::r600_nir_lower_pack_unpack_2x16; + +int +r600_glsl_type_size(const struct glsl_type *type, bool is_bindless) +{ + return glsl_count_vec4_slots(type, false, is_bindless); +} + +void +r600_get_natural_size_align_bytes(const struct glsl_type *type, + unsigned *size, unsigned *align) +{ + if (type->base_type != GLSL_TYPE_ARRAY) { + *align = 1; + *size = 1; + } else { + unsigned elem_size, elem_align; + glsl_get_natural_size_align_bytes(type->fields.array, + &elem_size, &elem_align); + *align = 1; + *size = type->length; + } +} + +static bool +optimize_once(nir_shader *shader) +{ + bool progress = false; + NIR_PASS(progress, shader, nir_copy_prop); + NIR_PASS(progress, shader, nir_opt_dce); + NIR_PASS(progress, shader, nir_opt_algebraic); + NIR_PASS(progress, shader, nir_opt_constant_folding); + NIR_PASS(progress, shader, nir_opt_copy_prop_vars); + NIR_PASS(progress, shader, nir_opt_vectorize); + + NIR_PASS(progress, shader, nir_opt_remove_phis); + + if (nir_opt_trivial_continues(shader)) { + progress = true; + NIR_PASS(progress, shader, nir_copy_prop); + NIR_PASS(progress, shader, nir_opt_dce); + } + + NIR_PASS(progress, shader, nir_opt_if, false); + NIR_PASS(progress, shader, nir_opt_dead_cf); + NIR_PASS(progress, shader, nir_opt_cse); + NIR_PASS(progress, shader, nir_opt_peephole_select, 200, true, true); + + NIR_PASS(progress, shader, nir_opt_conditional_discard); + NIR_PASS(progress, shader, nir_opt_dce); + NIR_PASS(progress, shader, nir_opt_undef); + + NIR_PASS(progress, shader, nir_remove_dead_variables, nir_var_shader_in); + NIR_PASS(progress, shader, nir_remove_dead_variables, nir_var_shader_out); + return progress; +} + +bool has_saturate(const nir_function *func) +{ + nir_foreach_block(block, func->impl) { + nir_foreach_instr(instr, block) { + if (instr->type == nir_instr_type_alu) { + auto alu = nir_instr_as_alu(instr); + if (alu->dest.saturate) + return true; + } + } + } + return false; +} + +int r600_shader_from_nir(struct r600_context *rctx, + struct r600_pipe_shader *pipeshader, + r600_shader_key *key) +{ + char filename[4000]; + struct r600_pipe_shader_selector *sel = pipeshader->selector; + + r600::ShaderFromNir convert; + + if (rctx->screen->b.debug_flags & DBG_PREOPT_IR) { + fprintf(stderr, "PRE-OPT-NIR-----------.------------------------------\n"); + nir_print_shader(sel->nir, stderr); + fprintf(stderr, "END PRE-OPT-NIR--------------------------------------\n\n"); + } + + NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa); + NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa); + NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar); + + static const struct nir_lower_tex_options lower_tex_options = { + .lower_txp = ~0u, + }; + NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options); + + NIR_PASS_V(sel->nir, r600::r600_nir_lower_txl_txf_array_or_cube); + + NIR_PASS_V(sel->nir, r600_nir_lower_int_tg4); + NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16); + + NIR_PASS_V(sel->nir, nir_lower_io, nir_var_uniform, r600_glsl_type_size, + nir_lower_io_lower_64bit_to_32); + + if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL || + sel->nir->info.stage == MESA_SHADER_TESS_EVAL) + NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_in, r600_glsl_type_size, + nir_lower_io_lower_64bit_to_32); + + if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL) + NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_out, r600_glsl_type_size, + nir_lower_io_lower_64bit_to_32); + + const nir_function *func = reinterpret_cast(exec_list_get_head_const(&sel->nir->functions)); + bool optimize = func->impl->registers.length() == 0 && !has_saturate(func); + + + /* It seems the output of this optimization is cached somewhere, and + * when there are registers, then we can no longer copy propagate, so + * skip the optimization then. (There is probably a better way, but yeah) + */ + if (optimize) + while(optimize_once(sel->nir)); + + while (optimize && optimize_once(sel->nir)); + + NIR_PASS_V(sel->nir, nir_lower_locals_to_regs); + //NIR_PASS_V(sel->nir, nir_opt_algebraic); + //NIR_PASS_V(sel->nir, nir_copy_prop); + NIR_PASS_V(sel->nir, nir_lower_to_source_mods, nir_lower_float_source_mods); + NIR_PASS_V(sel->nir, nir_convert_from_ssa, true); + NIR_PASS_V(sel->nir, nir_opt_dce); + + if ((rctx->screen->b.debug_flags & DBG_NIR) && + (rctx->screen->b.debug_flags & DBG_ALL_SHADERS)) { + fprintf(stderr, "-- NIR --------------------------------------------------------\n"); + struct nir_function *func = (struct nir_function *)exec_list_get_head(&sel->nir->functions); + nir_index_ssa_defs(func->impl); + nir_print_shader(sel->nir, stderr); + fprintf(stderr, "-- END --------------------------------------------------------\n"); + } + + memset(&pipeshader->shader, 0, sizeof(r600_shader)); + + if (sel->nir->info.stage == MESA_SHADER_TESS_EVAL || + sel->nir->info.stage == MESA_SHADER_VERTEX || + sel->nir->info.stage == MESA_SHADER_GEOMETRY) { + pipeshader->shader.clip_dist_write |= ((1 << sel->nir->info.clip_distance_array_size) - 1); + pipeshader->shader.cull_dist_write = ((1 << sel->nir->info.cull_distance_array_size) - 1) + << sel->nir->info.clip_distance_array_size; + pipeshader->shader.cc_dist_mask = (1 << (sel->nir->info.cull_distance_array_size + + sel->nir->info.clip_distance_array_size)) - 1; + } + + // For learning we print out the complete failed shader + // and instead of asserts we use exceptions + bool r; + try { + struct r600_shader* gs_shader = nullptr; + if (rctx->gs_shader) + gs_shader = &rctx->gs_shader->current->shader; + r = convert.lower(sel->nir, pipeshader, sel, *key, gs_shader); + + } catch (std::logic_error& x) { + r = false; + } + if (!r || rctx->screen->b.debug_flags & DBG_ALL_SHADERS) { + static int shnr = 0; + + snprintf(filename, 4000, "nir-%s_%d.inc", sel->nir->info.name, shnr++); + + if (access(filename, F_OK) == -1) { + FILE *f = fopen(filename, "w"); + + if (f) { + fprintf(f, "const char *shader_blob_%s = {\nR\"(", sel->nir->info.name); + nir_print_shader(sel->nir, f); + fprintf(f, ")\";\n"); + fclose(f); + } + } + if (!r) + return -2; + } + + auto shader = convert.shader(); + + r600_screen *rscreen = rctx->screen; + r600_bytecode_init(&pipeshader->shader.bc, rscreen->b.chip_class, rscreen->b.family, + rscreen->has_compressed_msaa_texturing); + + r600::sfn_log << r600::SfnLog::shader_info + << "pipeshader->shader.processor_type = " + << pipeshader->shader.processor_type << "\n"; + + pipeshader->shader.bc.type = pipeshader->shader.processor_type; + pipeshader->shader.bc.isa = rctx->isa; + + r600::AssemblyFromShaderLegacy afs(&pipeshader->shader, key); + if (!afs.lower(shader.m_ir)) { + R600_ERR("%s: Lowering to assembly failed\n", __func__); + return -1; + } + + if (sel->nir->info.stage == MESA_SHADER_GEOMETRY) { + r600::sfn_log << r600::SfnLog::shader_info << "Geometry shader, create copy shader\n"; + generate_gs_copy_shader(rctx, pipeshader, &sel->so); + assert(pipeshader->gs_copy_shader); + } else { + r600::sfn_log << r600::SfnLog::shader_info << "This is not a Geometry shader\n"; + } + + return 0; +} diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.h b/src/gallium/drivers/r600/sfn/sfn_nir.h new file mode 100644 index 00000000000..a663325f257 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_nir.h @@ -0,0 +1,112 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_NIR_H +#define SFN_NIR_H + +#include "nir.h" + +#ifdef __cplusplus +#include "sfn_shader_base.h" +#include + +namespace r600 { + +bool r600_nir_lower_pack_unpack_2x16(nir_shader *shader); + +bool r600_lower_scratch_addresses(nir_shader *shader); + +bool r600_lower_ubo_to_align16(nir_shader *shader); + +class Shader { +public: + std::vector& m_ir; + ValueMap m_temp; +}; + +class ShaderFromNir { +public: + ShaderFromNir(); + ~ShaderFromNir(); + + unsigned ninputs() const; + + bool lower(const nir_shader *shader, r600_pipe_shader *sh, + r600_pipe_shader_selector *sel, r600_shader_key &key, + r600_shader *gs_shader); + + bool process_declaration(); + + pipe_shader_type processor_type() const; + + bool emit_instruction(nir_instr *instr); + + const std::vector& shader_ir() const; + + Shader shader() const; +private: + + bool process_block(); + bool process_cf_node(nir_cf_node *node); + bool process_if(nir_if *node); + bool process_loop(nir_loop *node); + bool process_block(nir_block *node); + + std::unique_ptr impl; + const nir_shader *sh; + + int m_current_if_id; + int m_current_loop_id; + std::stack m_if_stack; + int scratch_size; +}; + +class AssemblyFromShader { +public: + virtual ~AssemblyFromShader(); + bool lower(const std::vector& ir); +private: + virtual bool do_lower(const std::vector& ir) = 0 ; +}; + +} + +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +int r600_shader_from_nir(struct r600_context *rctx, + struct r600_pipe_shader *pipeshader, + union r600_shader_key *key); + +#ifdef __cplusplus +} +#endif + + +#endif // SFN_NIR_H diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp new file mode 100644 index 00000000000..b238098773a --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp @@ -0,0 +1,758 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "../r600_pipe.h" +#include "../r600_shader.h" +#include "sfn_shader_vertex.h" + +#include "sfn_shader_fragment.h" +#include "sfn_ir_to_assembly.h" +#include "sfn_nir.h" +#include "sfn_instruction_fetch.h" + +#include + +#define ENABLE_DEBUG 1 + +#ifdef ENABLE_DEBUG +#define DEBUG_SFN(X) \ + do {\ + X; \ + } while (0) +#else +#define DEBUG_SFN(X) +#endif + +namespace r600 { + +using namespace std; + + +ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype, + r600_pipe_shader_selector& sel, + r600_shader &sh_info): + m_processor_type(ptype), + m_sh_info(sh_info), + m_tex_instr(*this), + m_alu_instr(*this), + m_pending_else(nullptr), + m_next_hwatomic_loc(0), + m_sel(sel) +{ + m_sh_info.processor_type = ptype; +} + + +ShaderFromNirProcessor::~ShaderFromNirProcessor() +{ +} + +bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr) +{ + switch (instr->type) { + case nir_instr_type_tex: { + nir_tex_instr *t = nir_instr_as_tex(instr); + if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF) + sh_info().uses_tex_buffers = true; + } + default: + ; + } + + return scan_sysvalue_access(instr); +} + +bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform) +{ + // m_uniform_type_map + m_uniform_type_map[uniform->data.location] = uniform->type; + + if (uniform->type->contains_atomic()) { + int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE; + sh_info().nhwatomic += natomics; + + if (uniform->type->is_array()) + sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC; + + sh_info().uses_atomics = 1; + + struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges]; + ++sh_info().nhwatomic_ranges; + atom.buffer_id = uniform->data.binding; + atom.hw_idx = m_next_hwatomic_loc; + atom.start = m_next_hwatomic_loc; + atom.end = atom.start + natomics - 1; + m_next_hwatomic_loc = atom.end + 1; + //atom.array_id = uniform->type->is_array() ? 1 : 0; + + m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end - atom.start + 1; + + sfn_log << SfnLog::io << "HW_ATOMIC file count: " + << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n"; + } + + if (uniform->type->is_image() || uniform->data.mode == nir_var_mem_ssbo) { + sh_info().uses_images = 1; + } + + return true; +} + +bool ShaderFromNirProcessor::process_inputs(nir_variable *input) +{ + return do_process_inputs(input); +} + +bool ShaderFromNirProcessor::process_outputs(nir_variable *output) +{ + return do_process_outputs(output); +} + +void ShaderFromNirProcessor::add_array_deref(nir_deref_instr *instr) +{ + nir_variable *var = nir_deref_instr_get_variable(instr); + + assert(instr->mode == nir_var_function_temp); + assert(glsl_type_is_array(var->type)); + + // add an alias for the index to the register(s); + + +} + +void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr) +{ + auto& dest = instr->dest; + unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index; + m_var_mode[instr->var] = instr->mode; + m_var_derefs[index] = instr->var; + + sfn_log << SfnLog::io << "Add var deref:" << index + << " with DDL:" << instr->var->data.driver_location << "\n"; +} + +void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io) +{ + switch (io.name) { + case TGSI_SEMANTIC_POSITION: + case TGSI_SEMANTIC_PSIZE: + case TGSI_SEMANTIC_EDGEFLAG: + case TGSI_SEMANTIC_FACE: + case TGSI_SEMANTIC_SAMPLEMASK: + case TGSI_SEMANTIC_CLIPVERTEX: + io.spi_sid = 0; + break; + case TGSI_SEMANTIC_GENERIC: + io.spi_sid = io.sid + 1; + break; + default: + /* For non-generic params - pack name and sid into 8 bits */ + io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1; + } +} + +const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const +{ + unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index; + + sfn_log << SfnLog::io << "Search for deref:" << index << "\n"; + + auto v = m_var_derefs.find(index); + if (v != m_var_derefs.end()) + return v->second; + + fprintf(stderr, "R600: could not find deref with index %d\n", index); + + return nullptr; + + /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr); + return nir_deref_instr_get_variable(deref); */ +} + +bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr) +{ + return m_tex_instr.emit(instr); +} + +void ShaderFromNirProcessor::emit_instruction(Instruction *ir) +{ + if (m_pending_else) { + m_output.push_back(PInstruction(m_pending_else)); + m_pending_else = nullptr; + } + + r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n"; + m_output.push_back(Instruction::Pointer(ir)); +} + +void ShaderFromNirProcessor::emit_shader_start() +{ + /* placeholder, may become an abstract method */ +} + +bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr) +{ + switch (instr->type) { + case nir_jump_break: { + auto b = new LoopBreakInstruction(); + emit_instruction(b); + return true; + } + case nir_jump_continue: { + auto b = new LoopContInstruction(); + emit_instruction(b); + return true; + } + default: { + nir_instr *i = reinterpret_cast(instr); + sfn_log << SfnLog::err << "Jump instrunction " << *i << " not supported\n"; + return false; + } + } + return true; +} + +bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr) +{ + return m_alu_instr.emit(instr); +} + +bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr) +{ + return false; +} + +bool ShaderFromNirProcessor::emit_loop_start(int loop_id) +{ + LoopBeginInstruction *loop = new LoopBeginInstruction(); + emit_instruction(loop); + m_loop_begin_block_map[loop_id] = loop; + return true; +} +bool ShaderFromNirProcessor::emit_loop_end(int loop_id) +{ + auto start = m_loop_begin_block_map.find(loop_id); + if (start == m_loop_begin_block_map.end()) { + sfn_log << SfnLog::err << "End loop: Loop start for " + << loop_id << " not found\n"; + return false; + } + LoopEndInstruction *loop = new LoopEndInstruction(start->second); + emit_instruction(loop); + + m_loop_begin_block_map.erase(start); + return true; +} + +bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt) +{ + + auto value = from_nir(if_stmt->condition, 0, 0); + AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)), + value, Value::zero, EmitInstruction::last); + pred->set_flag(alu_update_exec); + pred->set_flag(alu_update_pred); + pred->set_cf_type(cf_alu_push_before); + + IfInstruction *ir = new IfInstruction(pred); + emit_instruction(ir); + assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end()); + m_if_block_start_map[if_id] = ir; + return true; +} + +bool ShaderFromNirProcessor::emit_else_start(int if_id) +{ + auto iif = m_if_block_start_map.find(if_id); + if (iif == m_if_block_start_map.end()) { + std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n"; + return false; + } + + if (iif->second->type() != Instruction::cond_if) { + std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n"; + return false; + } + IfInstruction *if_instr = static_cast(iif->second); + ElseInstruction *ir = new ElseInstruction(if_instr); + m_if_block_start_map[if_id] = ir; + m_pending_else = ir; + + return true; +} + +bool ShaderFromNirProcessor::emit_ifelse_end(int if_id) +{ + auto ifelse = m_if_block_start_map.find(if_id); + if (ifelse == m_if_block_start_map.end()) { + std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n"; + return false; + } + + if (ifelse->second->type() != Instruction::cond_if && + ifelse->second->type() != Instruction::cond_else) { + std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n"; + return false; + } + /* Clear pending else, if the else branch was empty, non will be emitted */ + + m_pending_else = nullptr; + + IfElseEndInstruction *ir = new IfElseEndInstruction(); + emit_instruction(ir); + + return true; +} + +bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr) +{ + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast(instr) + << "' (" << __func__ << ")\n"; + + if (emit_intrinsic_instruction_override(instr)) + return true; + + switch (instr->intrinsic) { + case nir_intrinsic_load_deref: { + auto var = get_deref_location(instr->src[0]); + if (!var) + return false; + auto mode_helper = m_var_mode.find(var); + if (mode_helper == m_var_mode.end()) { + cerr << "r600-nir: variable '" << var->name << "' not found\n"; + return false; + } + switch (mode_helper->second) { + case nir_var_shader_in: + return emit_load_input_deref(var, instr); + case nir_var_function_temp: + return emit_load_function_temp(var, instr); + default: + cerr << "r600-nir: Unsupported mode" << mode_helper->second + << "for src variable\n"; + return false; + } + } + case nir_intrinsic_store_deref: + return emit_store_deref(instr); + case nir_intrinsic_load_uniform: + return reserve_uniform(instr); + case nir_intrinsic_discard: + case nir_intrinsic_discard_if: + return emit_discard_if(instr); + case nir_intrinsic_load_ubo: + return emit_load_ubo(instr); + case nir_intrinsic_copy_deref: + case nir_intrinsic_load_constant: + case nir_intrinsic_load_input: + case nir_intrinsic_store_output: + default: + fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic); + return false; + } + return false; +} + +bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr) +{ + return false; +} + +bool +ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr) +{ + return false; +} + +bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last) +{ + if (!dest.is_ssa) { + auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write}); + if (as_last) + ir->set_flag(alu_last_instr); + emit_instruction(ir); + } else { + inject_register(dest.ssa.index, chan, value, true); + } + return true; +} + +GPRVector *ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src, + UNUSED unsigned mask, + const GPRVector::Swizzle& swizzle) +{ + GPRVector *result = nullptr; + int sel = lookup_register_index(src); + if (sel >= 0 && from_nir(src, 0)->type() == Value::gpr && + from_nir(src, 0)->chan() == 0) { + /* If the x-channel is really an x-channel register then we are pretty + * save that the value come like we need them */ + result = new GPRVector(from_nir(src, 0)->sel(), swizzle); + } else { + AluInstruction *ir = nullptr; + int sel = allocate_temp_register(); + GPRVector::Values v; + for (int i = 0; i < 4; ++i) { + v[i] = PValue(new GPRValue(sel, swizzle[i])); + if (swizzle[i] < 4 && (mask & (1 << i))) { + ir = new AluInstruction(op1_mov, v[i], from_nir(src, swizzle[i]), + EmitInstruction::write); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + + result = new GPRVector(v); + } + return result; +} + +bool ShaderFromNirProcessor::emit_load_ubo(nir_intrinsic_instr* instr) +{ + nir_src& src0 = instr->src[0]; + nir_src& src1 = instr->src[1]; + + int sel_bufid_reg = src0.is_ssa ? src0.ssa->index : src0.reg.reg->index; + const nir_load_const_instr* literal0 = get_literal_constant(sel_bufid_reg); + + int ofs_reg = src1.is_ssa ? src1.ssa->index : src1.reg.reg->index; + const nir_load_const_instr* literal1 = get_literal_constant(ofs_reg); + if (literal0) { + if (literal1) { + uint bufid = literal0->value[0].u32; + uint buf_ofs = literal1->value[0].u32 >> 4; + int buf_cmp = ((literal1->value[0].u32 >> 2) & 3); + AluInstruction *ir = nullptr; + for (int i = 0; i < instr->num_components; ++i) { + int cmp = buf_cmp + i; + assert(cmp < 4); + auto u = PValue(new UniformValue(512 + buf_ofs, cmp, bufid + 1)); + if (instr->dest.is_ssa) + add_uniform((instr->dest.ssa.index << 2) + i, u); + else { + ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write}); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return true; + + } else { + /* literal0 is lost ...*/ + return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, literal0->value[0].u32 + 1); + } + } else { + /* TODO: This can also be solved by using the CF indes on the ALU block, and + * this would probably make sense when there are more then one loads with + * the same buffer ID. */ + PValue bufid = from_nir(instr->src[0], 0, 0); + PValue addr = from_nir_with_fetch_constant(instr->src[1], 0); + GPRVector trgt; + for (int i = 0; i < 4; ++i) + trgt.set_reg_i(i, from_nir(instr->dest, i)); + + auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0, + 1, bufid, bim_zero); + + emit_instruction(ir); + for (int i = 0; i < instr->num_components ; ++i) { + add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i)); + } + m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT; + return true; + } + +} + +bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr) +{ + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast(instr) + << "' (" << __func__ << ")\n"; + + if (instr->intrinsic == nir_intrinsic_discard_if) { + emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)), + {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr})); + + } else { + emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)), + {Value::zero, Value::zero}, {alu_last_instr})); + } + m_sh_info.uses_kill = 1; + return true; +} + +bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var, + nir_intrinsic_instr* instr) +{ + return do_emit_load_deref(var, instr); +} + +bool ShaderFromNirProcessor::reserve_uniform(nir_intrinsic_instr* instr) +{ + r600::sfn_log << SfnLog::instr << __func__ << ": emit '" + << *reinterpret_cast(instr) + << "'\n"; + + + /* If the target register is a SSA register and the loading is not + * indirect then we can do lazy loading, i.e. the uniform value can + * be used directly. Otherwise we have to load the data for real + * rigt away. + */ + + /* Try to find the literal that defines the array index */ + const nir_load_const_instr* literal = nullptr; + if (instr->src[0].is_ssa) + literal = get_literal_constant(instr->src[0].ssa->index); + + int base = nir_intrinsic_base(instr); + if (literal) { + AluInstruction *ir = nullptr; + + for (int i = 0; i < instr->num_components ; ++i) { + PValue u = PValue(new UniformValue(512 + literal->value[0].u32 + base, i)); + sfn_log << SfnLog::io << "uniform " + << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n"; + + if (instr->dest.is_ssa) + add_uniform((instr->dest.ssa.index << 2) + i, u); + else { + ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), + u, {alu_write}); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + } else { + PValue addr = from_nir(instr->src[0], 0, 0); + return load_uniform_indirect(instr, addr, 16 * base, 0); + } + return true; +} + +bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid) +{ + if (!addr) { + std::cerr << "r600-nir: don't know how uniform is addressed\n"; + return false; + } + + GPRVector trgt; + for (int i = 0; i < 4; ++i) + trgt.set_reg_i(i, from_nir(instr->dest, i)); + + if (addr->type() != Value::gpr) { + emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr}); + addr = trgt.reg_i(0); + } + + /* FIXME: buffer index and index mode are not set correctly */ + auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest, + bufferid, PValue(), bim_none); + emit_instruction(ir); + for (int i = 0; i < instr->num_components ; ++i) { + add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i)); + } + m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT; + return true; +} + +AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask) +{ + AluInstruction *ir = nullptr; + for (int i = 0; i < literal->def.num_components ; ++i) { + if (writemask & (1 << i)){ + PValue lsrc; + switch (literal->def.bit_size) { + + case 1: + sfn_log << SfnLog::reg << "Got literal of bit size 1\n"; + lsrc = literal->value[i].b ? + PValue(new LiteralValue( 0xffffffff, i)) : + Value::zero; + break; + case 32: + sfn_log << SfnLog::reg << "Got literal of bit size 32\n"; + if (literal->value[i].u32 == 0) + lsrc = Value::zero; + else if (literal->value[i].u32 == 1) + lsrc = Value::one_i; + else if (literal->value[i].f32 == 1.0f) + lsrc = Value::one_f; + else if (literal->value[i].f32 == 0.5f) + lsrc = Value::zero_dot_5; + else + lsrc = PValue(new LiteralValue(literal->value[i].u32, i)); + break; + default: + sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size + << " falling back to 32 bit\n"; + lsrc = PValue(new LiteralValue(literal->value[i].u32, i)); + } + ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write); + + emit_instruction(ir); + } + } + return ir; +} + +PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component) +{ + PValue value = from_nir(src, component); + if (value->type() != Value::gpr && + value->type() != Value::gpr_vector && + value->type() != Value::gpr_array_value) { + unsigned temp = allocate_temp_register(); + PValue retval(new GPRValue(temp, component)); + emit_instruction(new AluInstruction(op1_mov, retval, value, + EmitInstruction::last_write)); + value = retval; + } + return value; +} + +bool ShaderFromNirProcessor::emit_store_deref(nir_intrinsic_instr* instr) +{ + auto out_var = get_deref_location(instr->src[0]); + if (!out_var) + return false; + + return do_emit_store_deref(out_var, instr); +} + +bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr) +{ + r600::sfn_log << SfnLog::instr << __func__ << ": emit '" + << *reinterpret_cast(instr) + << "'\n"; + + /* Give the specific shader type a chance to process this, i.e. Geometry and + * tesselation shaders need specialized deref_array, for the other shaders + * it is lowered. + */ + if (emit_deref_instruction_override(instr)) + return true; + + switch (instr->deref_type) { + case nir_deref_type_var: + set_var_address(instr); + return true; + case nir_deref_type_array: + case nir_deref_type_array_wildcard: + case nir_deref_type_struct: + case nir_deref_type_cast: + default: + fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type); + } + return false; +} + +void ShaderFromNirProcessor::load_uniform(const nir_alu_src &src) +{ + AluInstruction *ir = nullptr; + PValue sv[4]; + + assert(src.src.is_ssa); + + for (int i = 0; i < src.src.ssa->num_components ; ++i) { + unsigned uindex = (src.src.ssa->index << 2) + i; + sv[i] = uniform(uindex); + assert(sv[i]); + } + + for (int i = 0; i < src.src.ssa->num_components ; ++i) { + ir = new AluInstruction(op1_mov, create_register_from_nir_src(src.src, i), sv[i], + EmitInstruction::write); + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); +} + + + +bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest, + std::vector srcs, + const std::set& m_flags) +{ + AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags); + emit_instruction(ir); + return true; +} + +void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr) +{ + m_output_register_map[loc] = gpr; +} + +void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir) +{ + r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n"; + m_export_output.push_back(PInstruction(ir)); +} + +const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const +{ + const GPRVector *retval = nullptr; + auto val = m_output_register_map.find(location); + if (val != m_output_register_map.end()) + retval = val->second; + return retval; +} + +void ShaderFromNirProcessor::set_input(unsigned pos, PValue var) +{ + r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var << "\n"; + m_inputs[pos] = var; +} + +void ShaderFromNirProcessor::set_output(unsigned pos, PValue var) +{ + r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << *var << "\n"; + m_outputs[pos] = var; +} + +void ShaderFromNirProcessor::finalize() +{ + do_finalize(); + + for (auto& i : m_inputs) + m_sh_info.input[i.first].gpr = i.second->sel(); + + for (auto& i : m_outputs) + m_sh_info.output[i.first].gpr = i.second->sel(); + + m_output.insert(m_output.end(), m_export_output.begin(), m_export_output.end()); + m_export_output.clear(); +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.h b/src/gallium/drivers/r600/sfn/sfn_shader_base.h new file mode 100644 index 00000000000..0a12d1ca835 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shader_base.h @@ -0,0 +1,183 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef sfn_shader_from_nir_h +#define sfn_shader_from_nir_h + + +#include "gallium/drivers/r600/r600_shader.h" + +#include "compiler/nir/nir.h" +#include "compiler/nir_types.h" + +#include "sfn_instruction_export.h" +#include "sfn_alu_defines.h" +#include "sfn_valuepool.h" +#include "sfn_debug.h" +#include "sfn_instruction_cf.h" +#include "sfn_emittexinstruction.h" +#include "sfn_emitaluinstruction.h" + +#include +#include +#include + +struct nir_instr; + +namespace r600 { + +extern SfnLog sfn_log; + +class ShaderFromNirProcessor : public ValuePool { +public: + ShaderFromNirProcessor(pipe_shader_type ptype, r600_pipe_shader_selector& sel, + r600_shader& sh_info); + virtual ~ShaderFromNirProcessor(); + + void emit_instruction(Instruction *ir); + + PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component); + GPRVector *vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask, + const GPRVector::Swizzle& swizzle); + + bool emit_instruction(EAluOp opcode, PValue dest, + std::vector src0, + const std::set& m_flags); + void emit_export_instruction(WriteoutInstruction *ir); + + void split_constants(nir_alu_instr* instr); + void load_uniform(const nir_alu_src& src); + + const nir_variable *get_deref_location(const nir_src& src) const; +protected: + + void set_var_address(nir_deref_instr *instr); + void set_input(unsigned pos, PValue var); + void set_output(unsigned pos, PValue var); + + void evaluate_spi_sid(r600_shader_io &io); + + r600_shader& sh_info() {return m_sh_info;} + + bool scan_instruction(nir_instr *instr); + + virtual bool scan_sysvalue_access(nir_instr *instr) = 0; + + bool emit_if_start(int if_id, nir_if *if_stmt); + bool emit_else_start(int if_id); + bool emit_ifelse_end(int if_id); + + bool emit_loop_start(int loop_id); + bool emit_loop_end(int loop_id); + bool emit_jump_instruction(nir_jump_instr *instr); + + const GPRVector *output_register(unsigned location) const; + + bool load_preloaded_value(const nir_dest& dest, int chan, PValue value, + bool as_last = true); + void add_param_output_reg(int loc, const GPRVector *gpr); + void inc_atomic_file_count(); + std::bitset<8> m_sv_values; + + enum ESlots { + es_face, + es_instanceid, + es_pos, + es_sample_mask_in, + es_sample_id, + es_vertexid, + }; + +private: + virtual bool allocate_reserved_registers() = 0; + + bool emit_alu_instruction(nir_instr *instr); + bool emit_deref_instruction(nir_deref_instr* instr); + bool emit_intrinsic_instruction(nir_intrinsic_instr* instr); + virtual bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr); + bool emit_tex_instruction(nir_instr* instr); + bool emit_discard_if(nir_intrinsic_instr* instr); + bool emit_load_ubo(nir_intrinsic_instr* instr); + bool emit_ssbo_atomic_add(nir_intrinsic_instr* instr); + bool load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufid); + + /* Code creating functions */ + bool emit_load_input_deref(const nir_variable *var, nir_intrinsic_instr* instr); + bool emit_load_function_temp(const nir_variable *var, nir_intrinsic_instr *instr); + AluInstruction *emit_load_literal(const nir_load_const_instr *literal, const nir_src& src, unsigned writemask); + + bool emit_store_deref(nir_intrinsic_instr* instr); + + bool reserve_uniform(nir_intrinsic_instr* instr); + bool process_uniforms(nir_variable *uniform); + bool process_inputs(nir_variable *input); + bool process_outputs(nir_variable *output); + + void add_array_deref(nir_deref_instr* instr); + + virtual void emit_shader_start(); + virtual bool emit_deref_instruction_override(nir_deref_instr* instr); + virtual bool do_process_inputs(nir_variable *input) = 0; + virtual bool do_process_outputs(nir_variable *output) = 0; + virtual bool do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) = 0; + virtual bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) = 0; + + virtual void do_finalize() = 0; + + void finalize(); + friend class ShaderFromNir; + + std::set m_arrays; + + std::map m_inputs; + std::map m_outputs; + + std::map m_var_derefs; + std::map m_var_mode; + + std::map m_uniform_type_map; + std::map m_if_block_start_map; + std::map m_loop_begin_block_map; + + pipe_shader_type m_processor_type; + + std::vector m_output; + std::vector m_export_output; + r600_shader& m_sh_info; + + EmitTexInstruction m_tex_instr; + EmitAluInstruction m_alu_instr; + OutputRegisterMap m_output_register_map; + + IfElseInstruction *m_pending_else; + int m_next_hwatomic_loc; + + r600_pipe_shader_selector& m_sel; +}; + +} + +#endif diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp new file mode 100644 index 00000000000..7b0e4c998d2 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp @@ -0,0 +1,754 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_defines.h" +#include "tgsi/tgsi_from_mesa.h" +#include "sfn_shader_fragment.h" +#include "sfn_instruction_fetch.h" + +namespace r600 { + +FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir, + r600_shader& sh, + r600_pipe_shader_selector &sel, + const r600_shader_key &key): + ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh), + m_max_color_exports(MAX2(key.ps.nr_cbufs,1)), + m_max_counted_color_exports(0), + m_two_sided_color(key.ps.color_two_side), + m_last_pixel_export(nullptr), + m_nir(nir), + m_reserved_registers(0), + m_frag_pos_index(0), + m_need_back_color(false), + m_front_face_loaded(false), + m_depth_exports(0), + m_enable_centroid_interpolators(false) +{ + for (auto& i: m_interpolator) { + i.enabled = false; + i.ij_index= 0; + } + + sh_info().rat_base = key.ps.nr_cbufs; + sh_info().atomic_base = key.ps.first_atomic_counter; +} + +bool FragmentShaderFromNir::do_process_inputs(nir_variable *input) +{ + sfn_log << SfnLog::io << "Parse input variable " + << input->name << " location:" << input->data.location + << " driver-loc:" << input->data.driver_location + << " interpolation:" << input->data.interpolation + << "\n"; + + unsigned name, sid; + + if (input->data.location == VARYING_SLOT_FACE) { + m_sv_values.set(es_face); + return true; + } + + tgsi_get_gl_varying_semantic(static_cast(input->data.location), + true, &name, &sid); + + /* Work around the mixed tgsi/nir semantic problems, this fixes + * dEQP-GLES2.functional.shaders.builtin_variable.pointcoord */ + if (input->data.location == VARYING_SLOT_PNTC) { + name = TGSI_SEMANTIC_GENERIC; + sid = 8; + } + + tgsi_semantic sname = static_cast(name); + + switch (sname) { + case TGSI_SEMANTIC_POSITION: { + m_sv_values.set(es_pos); + return true; + } + case TGSI_SEMANTIC_COLOR: { + m_shaderio.add_input(new ShaderInputColor(sname, sid, input)); + m_need_back_color = m_two_sided_color; + return true; + } + case TGSI_SEMANTIC_PRIMID: + sh_info().gs_prim_id_input = true; + sh_info().ps_prim_id_input = m_shaderio.inputs().size(); + /* fallthrough */ + case TGSI_SEMANTIC_FOG: + case TGSI_SEMANTIC_GENERIC: + case TGSI_SEMANTIC_TEXCOORD: + case TGSI_SEMANTIC_LAYER: + case TGSI_SEMANTIC_PCOORD: + case TGSI_SEMANTIC_VIEWPORT_INDEX: + case TGSI_SEMANTIC_CLIPDIST: { + if (!m_shaderio.find_varying(sname, sid, input->data.location_frac)) + m_shaderio.add_input(new ShaderInputVarying(sname, sid, input)); + return true; + } + default: + return false; + } +} + +bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr) +{ + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr); + switch (ii->intrinsic) { + case nir_intrinsic_load_front_face: + m_sv_values.set(es_face); + break; + case nir_intrinsic_load_sample_mask_in: + m_sv_values.set(es_sample_mask_in); + break; + case nir_intrinsic_load_sample_id: + m_sv_values.set(es_sample_id); + break; + case nir_intrinsic_interp_deref_at_centroid: + /* This is not a sysvalue, should go elsewhere */ + m_enable_centroid_interpolators = true; + break; + default: + ; + } + } + default: + ; + } + return true; +} + +bool FragmentShaderFromNir::allocate_reserved_registers() +{ + assert(!m_reserved_registers); + + int face_reg_index = -1; + // enabled interpolators based on inputs + for (auto& i: m_shaderio.inputs()) { + int ij = i->ij_index(); + if (ij >= 0) { + m_interpolator[ij].enabled = true; + } + } + + /* Lazy, enable both possible interpolators, + * TODO: check which ones are really needed */ + if (m_enable_centroid_interpolators) { + m_interpolator[2].enabled = true; /* perspective */ + m_interpolator[5].enabled = true; /* linear */ + } + + // sort the varying inputs + m_shaderio.sort_varying_inputs(); + + // handle interpolators + int num_baryc = 0; + for (int i = 0; i < 6; ++i) { + if (m_interpolator[i].enabled) { + sfn_log << SfnLog::io << "Interpolator " << i << " is enabled\n"; + + m_interpolator[i].ij_index = num_baryc; + + unsigned sel = num_baryc / 2; + unsigned chan = 2 * (num_baryc % 2); + + auto ip_i = new GPRValue(sel, chan + 1); + ip_i->set_as_input(); + m_interpolator[i].i.reset(ip_i); + inject_register(sel, chan + 1, m_interpolator[i].i, false); + + auto ip_j = new GPRValue(sel, chan); + ip_j->set_as_input(); + m_interpolator[i].j.reset(ip_j); + inject_register(sel, chan, m_interpolator[i].j, false); + + ++num_baryc; + } + } + m_reserved_registers += (num_baryc + 1) >> 1; + + if (m_sv_values.test(es_pos)) { + m_frag_pos_index = m_reserved_registers++; + m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_POSITION, m_frag_pos_index)); + } + + // handle system values + if (m_sv_values.test(es_face) || m_need_back_color) { + face_reg_index = m_reserved_registers++; + auto ffr = new GPRValue(face_reg_index,0); + ffr->set_as_input(); + m_front_face_reg.reset(ffr); + sfn_log << SfnLog::io << "Set front_face register to " << *m_front_face_reg << "\n"; + inject_register(ffr->sel(), ffr->chan(), m_front_face_reg, false); + + m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE, face_reg_index)); + load_front_face(); + } + + if (m_sv_values.test(es_sample_mask_in)) { + if (face_reg_index < 0) + face_reg_index = m_reserved_registers++; + + auto smi = new GPRValue(face_reg_index,2); + smi->set_as_input(); + m_sample_mask_reg.reset(smi); + sfn_log << SfnLog::io << "Set sample mask in register to " << *m_sample_mask_reg << "\n"; + //inject_register(smi->sel(), smi->chan(), m_sample_mask_reg, false); + sh_info().nsys_inputs = 1; + m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK, face_reg_index)); + } + + if (m_sv_values.test(es_sample_id)) { + if (face_reg_index < 0) + face_reg_index = m_reserved_registers++; + + auto smi = new GPRValue(face_reg_index, 3); + smi->set_as_input(); + m_sample_id_reg.reset(smi); + sfn_log << SfnLog::io << "Set sample id register to " << *m_sample_id_reg << "\n"; + sh_info().nsys_inputs++; + m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID, face_reg_index)); + } + + // The back color handling is not emmited in the code, so we have + // to add the inputs here and later we also need to inject the code to set + // the right color + if (m_need_back_color) { + size_t ninputs = m_shaderio.inputs().size(); + for (size_t k = 0; k < ninputs; ++k) { + ShaderInput& i = m_shaderio.input(k); + + if (i.name() != TGSI_SEMANTIC_COLOR) + continue; + + ShaderInputColor& col = static_cast(i); + + size_t next_pos = m_shaderio.size(); + auto bcol = new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR, col, next_pos); + m_shaderio.add_input(bcol); + col.set_back_color(next_pos); + } + m_shaderio.set_two_sided(); + } + + m_shaderio.update_lds_pos(); + + set_reserved_registers(m_reserved_registers); + + return true; +} + +void FragmentShaderFromNir::emit_shader_start() +{ + if (m_sv_values.test(es_face)) + load_front_face(); + + if (m_sv_values.test(es_pos)) { + for (int i = 0; i < 4; ++i) { + auto v = new GPRValue(m_frag_pos_index, i); + v->set_as_input(); + auto reg = PValue(v); + if (i == 3) + emit_instruction(new AluInstruction(op1_recip_ieee, reg, reg, {alu_write, alu_last_instr})); + m_frag_pos[i] = reg; + } + } +} + +bool FragmentShaderFromNir::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) +{ + if (out_var->data.location == FRAG_RESULT_COLOR) + return emit_export_pixel(out_var, instr, true); + + if ((out_var->data.location >= FRAG_RESULT_DATA0 && + out_var->data.location <= FRAG_RESULT_DATA7) || + out_var->data.location == FRAG_RESULT_DEPTH || + out_var->data.location == FRAG_RESULT_STENCIL) + return emit_export_pixel(out_var, instr, false); + + sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_deref for " << + out_var->data.location << "(" << out_var->data.driver_location << ")\n"; + return false; +} + +bool FragmentShaderFromNir::do_process_outputs(nir_variable *output) +{ + sfn_log << SfnLog::instr << "Parse output variable " + << output->name << " @" << output->data.location + << "@dl:" << output->data.driver_location << "\n"; + + ++sh_info().noutput; + r600_shader_io& io = sh_info().output[output->data.driver_location]; + tgsi_get_gl_frag_result_semantic(static_cast( output->data.location), + &io.name, &io.sid); + + /* Check whether this code has become obsolete by the IO vectorization */ + unsigned num_components = 4; + unsigned vector_elements = glsl_get_vector_elements(glsl_without_array(output->type)); + if (vector_elements) + num_components = vector_elements; + unsigned component = output->data.location_frac; + + for (unsigned j = component; j < num_components + component; j++) + io.write_mask |= 1 << j; + + int loc = output->data.location; + if (loc == FRAG_RESULT_COLOR && + (m_nir.info.outputs_written & (1ull << loc))) { + sh_info().fs_write_all = true; + } + + if (output->data.location == FRAG_RESULT_COLOR || + (output->data.location >= FRAG_RESULT_DATA0 && + output->data.location <= FRAG_RESULT_DATA7)) { + return true; + } + if (output->data.location == FRAG_RESULT_DEPTH || + output->data.location == FRAG_RESULT_STENCIL) { + io.write_mask = 15; + return true; + } + + return false; +} + +bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_sample_mask_in: + return load_preloaded_value(instr->dest, 0, m_sample_mask_reg); + case nir_intrinsic_load_sample_id: + return load_preloaded_value(instr->dest, 0, m_sample_id_reg); + case nir_intrinsic_load_front_face: + return load_preloaded_value(instr->dest, 0, m_front_face_reg); + case nir_intrinsic_interp_deref_at_sample: + return emit_interp_deref_at_sample(instr); + case nir_intrinsic_interp_deref_at_offset: + return emit_interp_deref_at_offset(instr); + case nir_intrinsic_interp_deref_at_centroid: + return emit_interp_deref_at_centroid(instr); + default: + return false; + } +} + +void FragmentShaderFromNir::load_front_face() +{ + assert(m_front_face_reg); + if (m_front_face_loaded) + return; + + auto ir = new AluInstruction(op2_setge_dx10, m_front_face_reg, m_front_face_reg, + Value::zero, {alu_write, alu_last_instr}); + m_front_face_loaded = true; + emit_instruction(ir); +} + +bool FragmentShaderFromNir::emit_interp_deref_at_sample(nir_intrinsic_instr* instr) +{ + GPRVector slope = get_temp_vec4(); + + auto fetch = new FetchInstruction(vc_fetch, no_index_offset, slope, + from_nir_with_fetch_constant(instr->src[1], 0), + 0, R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none); + fetch->set_flag(vtx_srf_mode); + emit_instruction(fetch); + + GPRVector grad = get_temp_vec4(); + auto var = get_deref_location(instr->src[0]); + assert(var); + + auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac); + auto interpolator = m_interpolator[io.ij_index()]; + PValue dummy(new GPRValue(interpolator.i->sel(), 7)); + + GPRVector src({interpolator.j, interpolator.i, dummy, dummy}); + + auto tex = new TexInstruction(TexInstruction::get_gradient_h, grad, src, 0, 0, PValue()); + tex->set_dest_swizzle({0,1,7,7}); + emit_instruction(tex); + + tex = new TexInstruction(TexInstruction::get_gradient_v, grad, src, 0, 0, PValue()); + tex->set_dest_swizzle({7,7,0,1}); + emit_instruction(tex); + + emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(0), slope.reg_i(2), interpolator.j}, {alu_write})); + emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(1), slope.reg_i(2), interpolator.i}, {alu_write, alu_last_instr})); + + emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(2), slope.reg_i(3), slope.reg_i(0)}, {alu_write})); + emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(3), slope.reg_i(3), slope.reg_i(1)}, {alu_write, alu_last_instr})); + + Interpolator ip = {true, 0, slope.reg_i(1), slope.reg_i(0)}; + + auto dst = vec_from_nir(instr->dest, 4); + int num_components = instr->dest.is_ssa ? + instr->dest.ssa.num_components: + instr->dest.reg.reg->num_components; + + load_interpolated(dst, io, ip, num_components, var->data.location_frac); + + return true; +} + +bool FragmentShaderFromNir::emit_interp_deref_at_offset(nir_intrinsic_instr* instr) +{ + int temp = allocate_temp_register(); + + GPRVector help(temp, {0,1,2,3}); + + auto var = get_deref_location(instr->src[0]); + assert(var); + + auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac); + auto interpolator = m_interpolator[io.ij_index()]; + PValue dummy(new GPRValue(interpolator.i->sel(), 7)); + + GPRVector interp({interpolator.j, interpolator.i, dummy, dummy}); + + auto getgradh = new TexInstruction(TexInstruction::get_gradient_h, help, interp, 0, 0, PValue()); + getgradh->set_dest_swizzle({0,1,7,7}); + getgradh->set_flag(TexInstruction::x_unnormalized); + getgradh->set_flag(TexInstruction::y_unnormalized); + getgradh->set_flag(TexInstruction::z_unnormalized); + getgradh->set_flag(TexInstruction::w_unnormalized); + emit_instruction(getgradh); + + auto getgradv = new TexInstruction(TexInstruction::get_gradient_v, help, interp, 0, 0, PValue()); + getgradv->set_dest_swizzle({7,7,0,1}); + getgradv->set_flag(TexInstruction::x_unnormalized); + getgradv->set_flag(TexInstruction::y_unnormalized); + getgradv->set_flag(TexInstruction::z_unnormalized); + getgradv->set_flag(TexInstruction::w_unnormalized); + emit_instruction(getgradv); + + PValue ofs_x = from_nir(instr->src[1], 0); + PValue ofs_y = from_nir(instr->src[1], 1); + emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(0), ofs_x, interpolator.j, {alu_write})); + emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(1), ofs_x, interpolator.i, {alu_write, alu_last_instr})); + emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(2), ofs_y, help.reg_i(0), {alu_write})); + emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(3), ofs_y, help.reg_i(1), {alu_write, alu_last_instr})); + + Interpolator ip = {true, 0, help.reg_i(1), help.reg_i(0)}; + + auto dst = vec_from_nir(instr->dest, 4); + load_interpolated(dst, io, ip, nir_dest_num_components(instr->dest), + var->data.location_frac); + + return true; +} + +bool FragmentShaderFromNir::emit_interp_deref_at_centroid(nir_intrinsic_instr* instr) +{ + auto var = get_deref_location(instr->src[0]); + assert(var); + + auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac); + io.set_uses_interpolate_at_centroid(); + + int ij_index = io.ij_index() >= 3 ? 5 : 2; + assert (m_interpolator[ij_index].enabled); + auto ip = m_interpolator[ij_index]; + + int num_components = nir_dest_num_components(instr->dest); + + auto dst = vec_from_nir(instr->dest, 4); + load_interpolated(dst, io, ip, num_components, var->data.location_frac); + return true; +} + + +bool FragmentShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) +{ + if (in_var->data.location == VARYING_SLOT_POS) { + assert(instr->dest.is_ssa); + + for (int i = 0; i < instr->dest.ssa.num_components; ++i) { + inject_register(instr->dest.ssa.index, i, m_frag_pos[i], true); + } + return true; + } + + if (in_var->data.location == VARYING_SLOT_FACE) + return load_preloaded_value(instr->dest, 0, m_front_face_reg); + + // todo: replace io with ShaderInputVarying + auto& io = m_shaderio.input(in_var->data.driver_location, in_var->data.location_frac); + unsigned num_components = 4; + + + if (instr->dest.is_ssa) { + num_components = instr->dest.ssa.num_components; + } else { + num_components = instr->dest.reg.reg->num_components; + } + + auto dst = vec_from_nir(instr->dest, 4); + + sfn_log << SfnLog::io << "Set input[" << in_var->data.driver_location + << "].gpr=" << dst.sel() << "\n"; + + io.set_gpr(dst.sel()); + + auto& ip = io.interpolate() ? m_interpolator[io.ij_index()] : m_interpolator[0]; + + load_interpolated(dst, io, ip, num_components, in_var->data.location_frac); + + /* These results are expected starting in slot x..*/ + if (in_var->data.location_frac > 0) { + int n = instr->dest.is_ssa ? instr->dest.ssa.num_components : + instr->dest.reg.reg->num_components; + AluInstruction *ir = nullptr; + for (int i = 0; i < n; ++i) { + ir = new AluInstruction(op1_mov, dst[i], + dst[i + in_var->data.location_frac], {alu_write}); + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); + } + + + if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) { + + auto & color_input = static_cast (io); + auto& bgio = m_shaderio.input(color_input.back_color_input_index()); + + bgio.set_gpr(allocate_temp_register()); + + GPRVector bgcol(bgio.gpr(), {0,1,2,3}); + load_interpolated(bgcol, bgio, ip, num_components, 0); + + load_front_face(); + + AluInstruction *ir = nullptr; + for (unsigned i = 0; i < 4 ; ++i) { + ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write}); + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); + } + + return true; +} + +bool FragmentShaderFromNir::load_interpolated(GPRVector &dest, + ShaderInput& io, const Interpolator &ip, + int num_components, int start_comp) +{ + // replace io with ShaderInputVarying + if (io.interpolate() > 0) { + + sfn_log << SfnLog::io << "Using Interpolator " << io.ij_index() << "\n"; + + if (num_components == 1) { + switch (start_comp) { + case 0: return load_interpolated_one_comp(dest, io, ip, op2_interp_x); + case 1: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1); + case 2: return load_interpolated_one_comp(dest, io, ip, op2_interp_z); + case 3: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_zw, 2, 3); + default: + assert(0); + } + } + + if (num_components == 2) { + switch (start_comp) { + case 0: return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3); + case 2: return load_interpolated_two_comp(dest, io, ip, op2_interp_zw, 0xc); + case 1: return load_interpolated_one_comp(dest, io, ip, op2_interp_z) && + load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1); + default: + assert(0); + } + } + + if (num_components == 3 && start_comp == 0) + return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3) && + load_interpolated_one_comp(dest, io, ip, op2_interp_z); + + int full_write_mask = ((1 << num_components) - 1) << start_comp; + + bool success = load_interpolated_two_comp(dest, io, ip, op2_interp_zw, full_write_mask & 0xc); + success &= load_interpolated_two_comp(dest, io, ip, op2_interp_xy, full_write_mask & 0x3); + return success; + + } else { + AluInstruction *ir = nullptr; + for (unsigned i = 0; i < 4 ; ++i) { + ir = new AluInstruction(op1_interp_load_p0, dest[i], + PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)), + EmitInstruction::write); + emit_instruction(ir); + } + ir->set_flag(alu_last_instr); + } + return true; +} + +bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector &dest, + ShaderInput& io, const Interpolator& ip, EAluOp op) +{ + for (unsigned i = 0; i < 2 ; ++i) { + int chan = i; + if (op == op2_interp_z) + chan += 2; + + + auto ir = new AluInstruction(op, dest[chan], i & 1 ? ip.j : ip.i, + PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)), + i == 0 ? EmitInstruction::write : EmitInstruction::last); + ir->set_bank_swizzle(alu_vec_210); + emit_instruction(ir); + } + return true; +} + +bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector &dest, ShaderInput& io, + const Interpolator& ip, EAluOp op, int writemask) +{ + AluInstruction *ir = nullptr; + for (unsigned i = 0; i < 4 ; ++i) { + ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)), + (writemask & (1 << i)) ? EmitInstruction::write : EmitInstruction::empty); + ir->set_bank_swizzle(alu_vec_210); + emit_instruction(ir); + } + ir->set_flag(alu_last_instr); + return true; +} + +bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector &dest, + ShaderInput& io, const Interpolator& ip, + EAluOp op, UNUSED int start, int comp) +{ + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, + PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)), + i == comp ? EmitInstruction::write : EmitInstruction::empty); + ir->set_bank_swizzle(alu_vec_210); + emit_instruction(ir); + } + ir->set_flag(alu_last_instr); + return true; +} + + +bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, bool all_chanels) +{ + int outputs = all_chanels ? m_max_color_exports : 1; + + std::array swizzle; + unsigned writemask = nir_intrinsic_write_mask(instr); + if (out_var->data.location != FRAG_RESULT_STENCIL) { + for (int i = 0; i < 4; ++i) { + swizzle[i] = (i < instr->num_components) ? i : 7; + } + } else { + swizzle = {7,0,7,7}; + } + + GPRVector *value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle); + + set_output(out_var->data.driver_location, PValue(value)); + + if (out_var->data.location == FRAG_RESULT_COLOR || + (out_var->data.location >= FRAG_RESULT_DATA0 && + out_var->data.location <= FRAG_RESULT_DATA7)) { + for (int k = 0 ; k < outputs; ++k) { + + unsigned location = out_var->data.driver_location + k - m_depth_exports; + if (location >= m_max_color_exports) { + sfn_log << SfnLog::io << "Pixel output " << location + << " skipped because we have only " << m_max_color_exports << "CBs\n"; + continue; + } + + m_last_pixel_export = new ExportInstruction(location, *value, ExportInstruction::et_pixel); + + if (sh_info().ps_export_highest < location) + sh_info().ps_export_highest = location; + + sh_info().nr_ps_color_exports++; + + unsigned mask = (0xfu << (location * 4)); + sh_info().ps_color_export_mask |= mask; + + emit_export_instruction(m_last_pixel_export); + ++m_max_counted_color_exports; + }; + } else if (out_var->data.location == FRAG_RESULT_DEPTH) { + m_depth_exports++; + emit_export_instruction(new ExportInstruction(61, *value, ExportInstruction::et_pixel)); + } else if (out_var->data.location == FRAG_RESULT_STENCIL) { + m_depth_exports++; + emit_export_instruction(new ExportInstruction(61, *value, ExportInstruction::et_pixel)); + } else { + return false; + } + + return true; +} + +void FragmentShaderFromNir::do_finalize() +{ + // update shader io info and set LDS etc. + sh_info().ninput = m_shaderio.inputs().size(); + + sfn_log << SfnLog::io << "Have " << sh_info().ninput << " inputs\n"; + for (size_t i = 0; i < sh_info().ninput; ++i) { + int ij_idx = (m_shaderio.input(i).ij_index() < 6 && + m_shaderio.input(i).ij_index() >= 0) ? m_shaderio.input(i).ij_index() : 0; + m_shaderio.input(i).set_ioinfo(sh_info().input[i], m_interpolator[ij_idx].ij_index); + } + + sh_info().two_side = m_shaderio.two_sided(); + sh_info().nlds = m_shaderio.nlds(); + + sh_info().nr_ps_max_color_exports = m_max_counted_color_exports; + + if (sh_info().fs_write_all) { + sh_info().nr_ps_max_color_exports = m_max_color_exports; + } + + if (!m_last_pixel_export) { + GPRVector v(0, {7,7,7,7}); + m_last_pixel_export = new ExportInstruction(0, v, ExportInstruction::et_pixel); + sh_info().nr_ps_color_exports++; + sh_info().ps_color_export_mask = 0xf; + emit_export_instruction(m_last_pixel_export); + } + + m_last_pixel_export->set_last(); + + if (sh_info().fs_write_all) + sh_info().nr_ps_max_color_exports = 8; +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h b/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h new file mode 100644 index 00000000000..91811671814 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h @@ -0,0 +1,101 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef sfn_fragment_shader_from_nir_h +#define sfn_fragment_shader_from_nir_h + +#include "sfn_shader_base.h" +#include "sfn_shaderio.h" +#include + +namespace r600 { + +class FragmentShaderFromNir : public ShaderFromNirProcessor { +public: + FragmentShaderFromNir(const nir_shader& nir, r600_shader& sh_info, + r600_pipe_shader_selector &sel, const r600_shader_key &key); + bool scan_sysvalue_access(nir_instr *instr) override; +private: + + struct Interpolator { + bool enabled; + unsigned ij_index; + PValue i; + PValue j; + }; + + void emit_shader_start() override; + bool do_process_inputs(nir_variable *input) override; + bool allocate_reserved_registers() override; + bool do_process_outputs(nir_variable *output) override; + bool do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) override; + bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override; + bool emit_export_pixel(const nir_variable *, nir_intrinsic_instr* instr, bool all_chanels); + bool load_interpolated(GPRVector &dest, ShaderInput &io, const Interpolator& ip, + int num_components, int start_comp); + bool load_interpolated_one_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip, EAluOp op); + bool load_interpolated_two_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip,EAluOp op, int writemask); + bool load_interpolated_two_comp_for_one(GPRVector &dest, + ShaderInput& io, const Interpolator& ip, EAluOp op, int start, int comp); + bool emit_interp_deref_at_centroid(nir_intrinsic_instr* instr); + + bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override; + void do_finalize() override; + + void load_front_face(); + + bool emit_load_front_face(nir_intrinsic_instr* instr); + bool emit_load_sample_mask_in(nir_intrinsic_instr* instr); + bool emit_load_sample_id(nir_intrinsic_instr* instr); + bool emit_interp_deref_at_sample(nir_intrinsic_instr* instr); + bool emit_interp_deref_at_offset(nir_intrinsic_instr* instr); + + unsigned m_max_color_exports; + unsigned m_max_counted_color_exports; + bool m_two_sided_color; + ExportInstruction *m_last_pixel_export; + const nir_shader& m_nir; + + + std::array m_interpolator; + unsigned m_reserved_registers; + unsigned m_frag_pos_index; + PValue m_front_face_reg; + PValue m_sample_mask_reg; + PValue m_sample_id_reg; + GPRVector m_frag_pos; + bool m_need_back_color; + bool m_front_face_loaded; + ShaderIO m_shaderio; + unsigned m_depth_exports; + + std::map m_input_cache; + bool m_enable_centroid_interpolators; +}; + +} + +#endif diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp new file mode 100644 index 00000000000..7eb67f46c83 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp @@ -0,0 +1,491 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#include "pipe/p_defines.h" +#include "tgsi/tgsi_from_mesa.h" +#include "sfn_shader_vertex.h" + +#include + + +namespace r600 { + +using std::priority_queue; + +VertexShaderFromNir::VertexShaderFromNir(r600_pipe_shader *sh, + r600_pipe_shader_selector& sel, + const r600_shader_key& key): + ShaderFromNirProcessor (PIPE_SHADER_VERTEX, sel, sh->shader), + m_num_clip_dist(0), + m_last_param_export(nullptr), + m_last_pos_export(nullptr), + m_pipe_shader(sh), + m_enabled_stream_buffers_mask(0), + m_so_info(&sel.so), + m_cur_param(0), + m_cur_clip_pos(1), + m_vertex_id(), + m_key(key) +{ + // reg 0 is used in the fetch shader + increment_reserved_registers(); + + sh_info().atomic_base = key.vs.first_atomic_counter; +} + +bool VertexShaderFromNir::do_process_inputs(nir_variable *input) +{ + ++sh_info().ninput; + + if (input->data.location < VERT_ATTRIB_MAX) { + increment_reserved_registers(); + return true; + } + fprintf(stderr, "r600-NIR-VS: Unimplemented process_inputs for %d\n", input->data.location); + return false; +} + +bool VertexShaderFromNir::allocate_reserved_registers() +{ + /* Since the vertex ID is nearly always used, we add it here as an input so + * that the registers used for vertex attributes don't get clobbered by the + * register merge step */ + auto R0x = new GPRValue(0,0); + R0x->set_as_input(); + m_vertex_id.reset(R0x); + inject_register(0, 0, m_vertex_id, false); + + if (m_sv_values.test(es_instanceid)) { + auto R0w = new GPRValue(0,3); + R0w->set_as_input(); + m_instance_id.reset(R0w); + inject_register(0, 3, m_instance_id, false); + } + + priority_queue, std::greater> q; + for (auto a: m_param_map) { + q.push(a.first); + } + + int next_param = 0; + while (!q.empty()) { + int loc = q.top(); + q.pop(); + m_param_map[loc] = next_param++; + } + return true; +} + +bool VertexShaderFromNir::scan_sysvalue_access(nir_instr *instr) +{ + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr); + switch (ii->intrinsic) { + case nir_intrinsic_load_vertex_id: + m_sv_values.set(es_vertexid); + break; + case nir_intrinsic_load_instance_id: + m_sv_values.set(es_instanceid); + break; + default: + ; + } + } + default: + ; + } + return true; +} + +bool VertexShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_vertex_id: + return load_preloaded_value(instr->dest, 0, m_vertex_id); + case nir_intrinsic_load_instance_id: + return load_preloaded_value(instr->dest, 0, m_instance_id); + default: + return false; + } +} + +bool VertexShaderFromNir::do_process_outputs(nir_variable *output) +{ + if (output->data.location == VARYING_SLOT_COL0 || + output->data.location == VARYING_SLOT_COL1 || + (output->data.location >= VARYING_SLOT_VAR0 && + output->data.location <= VARYING_SLOT_VAR31) || + (output->data.location >= VARYING_SLOT_TEX0 && + output->data.location <= VARYING_SLOT_TEX7) || + output->data.location == VARYING_SLOT_BFC0 || + output->data.location == VARYING_SLOT_BFC1 || + output->data.location == VARYING_SLOT_CLIP_VERTEX || + output->data.location == VARYING_SLOT_CLIP_DIST0 || + output->data.location == VARYING_SLOT_CLIP_DIST1 || + output->data.location == VARYING_SLOT_POS || + output->data.location == VARYING_SLOT_PSIZ || + output->data.location == VARYING_SLOT_FOGC || + output->data.location == VARYING_SLOT_LAYER || + output->data.location == VARYING_SLOT_EDGE || + output->data.location == VARYING_SLOT_VIEWPORT + ) { + + r600_shader_io& io = sh_info().output[output->data.driver_location]; + tgsi_get_gl_varying_semantic(static_cast( output->data.location), + true, &io.name, &io.sid); + if (! m_key.vs.as_es) + evaluate_spi_sid(io); + ++sh_info().noutput; + + if (output->data.location == VARYING_SLOT_PSIZ || + output->data.location == VARYING_SLOT_EDGE || + output->data.location == VARYING_SLOT_LAYER) + m_cur_clip_pos = 2; + + if (output->data.location != VARYING_SLOT_POS && + output->data.location != VARYING_SLOT_EDGE && + output->data.location != VARYING_SLOT_PSIZ && + output->data.location != VARYING_SLOT_CLIP_VERTEX) + m_param_map[output->data.location] = m_cur_param++; + + return true; + } + return false; +} + +bool VertexShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) +{ + if (in_var->data.location < VERT_ATTRIB_MAX) { + for (int i = 0; i < instr->num_components ; ++i) { + auto s = new GPRValue(in_var->data.driver_location + 1, i); + s->set_as_input(); + auto src = PValue(s); + inject_register(in_var->data.driver_location + 1, i, src, false); + + if (i == 0) + set_input(in_var->data.driver_location, src); + + load_preloaded_value(instr->dest, i, src, i == instr->num_components - 1); + } + return true; + } + fprintf(stderr, "r600-NIR: Unimplemented load_deref for %d\n", in_var->data.location); + return false; +} + +bool VertexShaderFromNir::emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr) +{ + sh_info().cc_dist_mask = 0xff; + sh_info().clip_dist_write = 0xff; + + std::unique_ptr clip_vertex(vec_from_nir_with_fetch_constant(instr->src[1], 0xf, {0,1,2,3})); + + for (int i = 0; i < 4; ++i) + sh_info().output[out_var->data.driver_location].write_mask |= 1 << i; + + GPRVector clip_dist[2] = { get_temp_vec4(), get_temp_vec4()}; + + for (int i = 0; i < 8; i++) { + int oreg = i >> 2; + int ochan = i & 3; + AluInstruction *ir = nullptr; + for (int j = 0; j < 4; j++) { + ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), clip_vertex->reg_i(j), + PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)), + (j == ochan) ? EmitInstruction::write : EmitInstruction::empty); + emit_instruction(ir); + } + ir->set_flag(alu_last_instr); + } + + m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos); + emit_export_instruction(m_last_pos_export); + + m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos); + emit_export_instruction(m_last_pos_export); + + return true; +} + +bool VertexShaderFromNir::emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr, + std::array *swizzle_override) +{ + std::array swizzle; + uint32_t write_mask = 0; + + if (swizzle_override) { + swizzle = *swizzle_override; + for (int i = 0; i < 4; ++i) { + if (swizzle[i] < 6) + write_mask |= 1 << i; + } + } else { + write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac; + for (int i = 0; i < 4; ++i) + swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7; + } + + sh_info().output[out_var->data.driver_location].write_mask = write_mask; + + GPRVector *value = vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle); + set_output(out_var->data.driver_location, PValue(value)); + + int export_slot = 0; + + switch (out_var->data.location) { + case VARYING_SLOT_EDGE: { + sh_info().vs_out_misc_write = 1; + sh_info().vs_out_edgeflag = 1; + emit_instruction(op1_mov, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr}); + emit_instruction(op1_flt_to_int, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_last_instr}); + sh_info().output[out_var->data.driver_location].write_mask = 0xf; + } + /* fallthrough */ + case VARYING_SLOT_PSIZ: + case VARYING_SLOT_LAYER: + export_slot = 1; + break; + case VARYING_SLOT_POS: + break; + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + export_slot = m_cur_clip_pos++; + break; + default: + sfn_log << SfnLog::err << __func__ << "Unsupported location " + << out_var->data.location << "\n"; + return false; + } + + m_last_pos_export = new ExportInstruction(export_slot, *value, ExportInstruction::et_pos); + emit_export_instruction(m_last_pos_export); + add_param_output_reg(out_var->data.driver_location, m_last_pos_export->gpr_ptr()); + return true; +} + +bool VertexShaderFromNir::emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr) +{ + assert(out_var->data.driver_location < sh_info().noutput); + sfn_log << SfnLog::io << __func__ << ": emit DDL: " << out_var->data.driver_location << "\n"; + + int write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac; + std::array swizzle; + for (int i = 0; i < 4; ++i) + swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7; + + sh_info().output[out_var->data.driver_location].write_mask = write_mask; + + GPRVector *value = vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle); + sh_info().output[out_var->data.driver_location].gpr = value->sel(); + + /* This should use the registers!! */ + set_output(out_var->data.driver_location, PValue(value)); + + auto param_loc = m_param_map.find(out_var->data.location); + assert(param_loc != m_param_map.end()); + + m_last_param_export = new ExportInstruction(param_loc->second, *value, ExportInstruction::et_param); + emit_export_instruction(m_last_param_export); + add_param_output_reg(out_var->data.driver_location, m_last_param_export->gpr_ptr()); + return true; +} + +bool VertexShaderFromNir::emit_stream(int stream) +{ + assert(m_so_info); + if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) { + R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs); + return false; + } + for (unsigned i = 0; i < m_so_info->num_outputs; i++) { + if (m_so_info->output[i].output_buffer >= 4) { + R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", + m_so_info->output[i].output_buffer); + return false; + } + } + const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS]; + unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS]; + std::vector tmp(m_so_info->num_outputs); + + /* Initialize locations where the outputs are stored. */ + for (unsigned i = 0; i < m_so_info->num_outputs; i++) { + if (stream != -1 && stream != m_so_info->output[i].stream) + continue; + + sfn_log << SfnLog::instr << "Emit stream " << i + << " with register index " << m_so_info->output[i].register_index << " so_gpr:"; + + + so_gpr[i] = output_register(m_so_info->output[i].register_index); + + if (!so_gpr[i]) { + sfn_log << SfnLog::err << "\nERR: register index " + << m_so_info->output[i].register_index + << " doesn't correspond to an output register\n"; + return false; + } + start_comp[i] = m_so_info->output[i].start_component; + /* Lower outputs with dst_offset < start_component. + * + * We can only output 4D vectors with a write mask, e.g. we can + * only output the W component at offset 3, etc. If we want + * to store Y, Z, or W at buffer offset 0, we need to use MOV + * to move it to X and output X. */ + if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) { + int tmp_index = allocate_temp_register(); + int sc = m_so_info->output[i].start_component; + AluInstruction *alu = nullptr; + for (int j = 0; j < m_so_info->output[i].num_components; j++) { + PValue dst(new GPRValue(tmp_index, j)); + alu = new AluInstruction(op1_mov, dst, so_gpr[i]->reg_i(j + sc), {alu_write}); + tmp[i].set_reg_i(j, dst); + emit_instruction(alu); + } + if (alu) + alu->set_flag(alu_last_instr); + + /* Fill the vector with masked values */ + PValue dst_blank(new GPRValue(tmp_index, 7)); + for (int j = m_so_info->output[i].num_components; j < 4; j++) + tmp[i].set_reg_i(j, dst_blank); + + start_comp[i] = 0; + so_gpr[i] = &tmp[i]; + } + sfn_log << SfnLog::instr << *so_gpr[i] << "\n"; + } + + /* Write outputs to buffers. */ + for (unsigned i = 0; i < m_so_info->num_outputs; i++) { + sfn_log << SfnLog::instr << "Write output buffer " << i + << " with register index " << m_so_info->output[i].register_index << "\n"; + + StreamOutIntruction *out_stream = + new StreamOutIntruction(*so_gpr[i], + m_so_info->output[i].num_components, + m_so_info->output[i].dst_offset - start_comp[i], + ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i], + m_so_info->output[i].output_buffer, + m_so_info->output[i].stream); + emit_export_instruction(out_stream); + m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4; + } + return true; +} + +void VertexShaderFromNir::do_finalize() +{ + if (m_key.vs.as_gs_a) { + PValue o(new GPRValue(0,PIPE_SWIZZLE_0)); + GPRVector primid({PValue(new GPRValue(0,2)), o,o,o}); + m_last_param_export = new ExportInstruction(m_cur_param, primid, ExportInstruction::et_param); + emit_export_instruction(m_last_param_export); + int i; + i = sh_info().noutput++; + auto& io = sh_info().output[i]; + io.name = TGSI_SEMANTIC_PRIMID; + io.sid = 0; + io.gpr = 0; + io.interpolate = TGSI_INTERPOLATE_CONSTANT; + io.write_mask = 0x4; + io.spi_sid = m_key.vs.prim_id_out; + sh_info().vs_as_gs_a = 1; + } + + finalize_exports(); +} + + +bool VertexShaderFromNirForFS::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) +{ + + switch (out_var->data.location) { + case VARYING_SLOT_PSIZ: + sh_info().vs_out_point_size = 1; + sh_info().vs_out_misc_write = 1; + /* fallthrough */ + case VARYING_SLOT_POS: + return emit_varying_pos(out_var, instr); + case VARYING_SLOT_EDGE: { + std::array swizzle_override = {7 ,0, 7, 7}; + return emit_varying_pos(out_var, instr, &swizzle_override); + } + case VARYING_SLOT_CLIP_VERTEX: + return emit_clip_vertices(out_var, instr); + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + m_num_clip_dist += 4; + return emit_varying_param(out_var, instr) && emit_varying_pos(out_var, instr); + case VARYING_SLOT_LAYER: { + sh_info().vs_out_misc_write = 1; + sh_info().vs_out_layer = 1; + std::array swz = {7,7,0,7}; + return emit_varying_pos(out_var, instr, &swz) && + emit_varying_param(out_var, instr); + } + case VARYING_SLOT_VIEW_INDEX: + return emit_varying_pos(out_var, instr) && + emit_varying_param(out_var, instr); + + default: + if (out_var->data.location <= VARYING_SLOT_VAR31 || + (out_var->data.location >= VARYING_SLOT_TEX0 && + out_var->data.location <= VARYING_SLOT_TEX7)) + return emit_varying_param(out_var, instr); + } + + fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n", + out_var->data.location); + return false; +} + +void VertexShaderFromNirForFS::finalize_exports() +{ + if (m_so_info && m_so_info->num_outputs) + emit_stream(-1); + + m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask; + + if (!m_last_param_export) { + GPRVector value(0,{7,7,7,7}); + m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param); + emit_export_instruction(m_last_param_export); + } + m_last_param_export->set_last(); + + if (!m_last_pos_export) { + GPRVector value(0,{7,7,7,7}); + m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos); + emit_export_instruction(m_last_pos_export); + } + m_last_pos_export->set_last(); + +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h b/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h new file mode 100644 index 00000000000..e7be40f7cea --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h @@ -0,0 +1,86 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef sfn_vertex_shader_from_nir_h +#define sfn_vertex_shader_from_nir_h + +#include "sfn_shader_base.h" + +namespace r600 { + +class VertexShaderFromNir : public ShaderFromNirProcessor { +public: + VertexShaderFromNir(r600_pipe_shader *sh, + r600_pipe_shader_selector &sel, + const r600_shader_key &key); + + bool do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) override; + bool scan_sysvalue_access(nir_instr *instr) override; +protected: + bool emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr, + std::array *swizzle_override = nullptr); + bool emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr); + bool emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr); + bool emit_stream(int stream); + + // todo: encapsulate + unsigned m_num_clip_dist; + ExportInstruction *m_last_param_export; + ExportInstruction *m_last_pos_export; + r600_pipe_shader *m_pipe_shader; + unsigned m_enabled_stream_buffers_mask; + const pipe_stream_output_info *m_so_info; + void do_finalize() override; +private: + + bool do_process_inputs(nir_variable *input) override; + bool allocate_reserved_registers() override; + bool do_process_outputs(nir_variable *output) override; + bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override; + + virtual void finalize_exports() = 0; + + unsigned m_cur_param; + std::map m_param_map; + unsigned m_cur_clip_pos; + + PValue m_vertex_id; + PValue m_instance_id; + r600_shader_key m_key; +}; + +class VertexShaderFromNirForFS : public VertexShaderFromNir { +public: + using VertexShaderFromNir::VertexShaderFromNir; + + bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override; +private: + void finalize_exports() override; +}; + +} + +#endif diff --git a/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp b/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp new file mode 100644 index 00000000000..46aaf15e655 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp @@ -0,0 +1,371 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_shaderio.h" +#include "sfn_debug.h" +#include "tgsi/tgsi_from_mesa.h" + +#include + +namespace r600 { + +using std::vector; +using std::priority_queue; + +ShaderIO::ShaderIO(): + m_two_sided(false), + m_lds_pos(0) +{ + +} + +ShaderInput::ShaderInput(tgsi_semantic name): + m_name(name), + m_gpr(0), + m_uses_interpolate_at_centroid(false) +{ +} + +ShaderInput::~ShaderInput() +{ +} + +void ShaderInput::set_lds_pos(UNUSED int lds_pos) +{ +} + +int ShaderInput::ij_index() const +{ + return -1; +} + +bool ShaderInput::interpolate() const +{ + return false; +} + +int ShaderInput::lds_pos() const +{ + return 0; +} + +void ShaderInput::set_uses_interpolate_at_centroid() +{ + m_uses_interpolate_at_centroid = true; +} + +void ShaderInput::set_ioinfo(r600_shader_io& io, int translated_ij_index) const +{ + io.name = m_name; + io.gpr = m_gpr; + io.ij_index = translated_ij_index; + io.lds_pos = lds_pos(); + io.uses_interpolate_at_centroid = m_uses_interpolate_at_centroid; + + set_specific_ioinfo(io); +} + +void ShaderInput::set_specific_ioinfo(UNUSED r600_shader_io& io) const +{ +} + +ShaderInputSystemValue::ShaderInputSystemValue(tgsi_semantic name, int gpr): + ShaderInput(name), + m_gpr(gpr) +{ +} + +void ShaderInputSystemValue::set_specific_ioinfo(r600_shader_io& io) const +{ + io.gpr = m_gpr; + io.ij_index = 0; +} + +ShaderInputVarying::ShaderInputVarying(tgsi_semantic _name, int sid, nir_variable *input): + ShaderInput(_name), + m_driver_location(input->data.driver_location), + m_location_frac(input->data.location_frac), + m_sid(sid), + m_ij_index(-10), + m_mask((1 << input->type->components()) - 1) +{ + sfn_log << SfnLog::io << __func__ + << "name:" << _name + << " sid: " << sid + << " op: " << input->data.interpolation; + + evaluate_spi_sid(); + + enum glsl_base_type base_type = + glsl_get_base_type(glsl_without_array(input->type)); + + switch (input->data.interpolation) { + case INTERP_MODE_NONE: + if (glsl_base_type_is_integer(base_type)) { + m_interpolate = TGSI_INTERPOLATE_CONSTANT; + break; + } + + if (name() == TGSI_SEMANTIC_COLOR) { + m_interpolate = TGSI_INTERPOLATE_COLOR; + m_ij_index = 0; + break; + } + /* fall-through */ + + case INTERP_MODE_SMOOTH: + assert(!glsl_base_type_is_integer(base_type)); + + m_interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + m_ij_index = 0; + break; + + case INTERP_MODE_NOPERSPECTIVE: + assert(!glsl_base_type_is_integer(base_type)); + + m_interpolate = TGSI_INTERPOLATE_LINEAR; + m_ij_index = 3; + break; + + case INTERP_MODE_FLAT: + m_interpolate = TGSI_INTERPOLATE_CONSTANT; + break; + } + + if (input->data.sample) { + m_interpolate_loc = TGSI_INTERPOLATE_LOC_SAMPLE; + } else if (input->data.centroid) { + m_interpolate_loc = TGSI_INTERPOLATE_LOC_CENTROID; + m_ij_index += 2; + } else { + m_interpolate_loc = TGSI_INTERPOLATE_LOC_CENTER; + m_ij_index += 1; + } + sfn_log << SfnLog::io + << " -> IP:" << m_interpolate + << " IJ:" << m_ij_index + << "\n"; +} + +void ShaderInputVarying::update_mask(int additional_comps) +{ + m_mask |= additional_comps; +} + +void ShaderInputVarying::evaluate_spi_sid() +{ + switch (name()) { + case TGSI_SEMANTIC_POSITION: + case TGSI_SEMANTIC_PSIZE: + case TGSI_SEMANTIC_EDGEFLAG: + case TGSI_SEMANTIC_FACE: + case TGSI_SEMANTIC_SAMPLEMASK: + assert(0 && "System value used as varying"); + break; + case TGSI_SEMANTIC_GENERIC: + m_spi_sid = m_sid + 1; + break; + default: + /* For non-generic params - pack name and sid into 8 bits */ + m_spi_sid = (0x80 | (name() << 3) | m_sid) + 1; + } +} + +ShaderInputVarying::ShaderInputVarying(tgsi_semantic name, + const ShaderInputVarying& orig, size_t location): + ShaderInput(name), + m_driver_location(location), + m_location_frac(orig.location_frac()), + + m_sid(orig.m_sid), + m_spi_sid(orig.m_spi_sid), + m_interpolate(orig.m_interpolate), + m_interpolate_loc(orig.m_interpolate_loc), + m_ij_index(orig.m_ij_index), + m_lds_pos(0) +{ + evaluate_spi_sid(); +} + +bool ShaderInputVarying::interpolate() const +{ + return m_interpolate > 0; +} + +int ShaderInputVarying::ij_index() const +{ + return m_ij_index; +} + +void ShaderInputVarying::set_lds_pos(int lds_pos) +{ + m_lds_pos = lds_pos; +} + +int ShaderInputVarying::lds_pos() const +{ + return m_lds_pos; +} + +void ShaderInputVarying::set_specific_ioinfo(r600_shader_io& io) const +{ + io.interpolate = m_interpolate; + io.interpolate_location = m_interpolate_loc; + io.sid = m_sid; + io.spi_sid = m_spi_sid; + set_color_ioinfo(io); +} + +void ShaderInputVarying::set_color_ioinfo(UNUSED r600_shader_io& io) const +{ + sfn_log << SfnLog::io << __func__ << " Don't set color_ioinfo\n"; +} + +ShaderInputColor::ShaderInputColor(tgsi_semantic name, int sid, nir_variable *input): + ShaderInputVarying(name, sid, input), + m_back_color_input_idx(0) +{ + sfn_log << SfnLog::io << __func__ << "name << " << name << " sid << " << sid << "\n"; +} + +void ShaderInputColor::set_back_color(unsigned back_color_input_idx) +{ + sfn_log << SfnLog::io << "Set back color index " << back_color_input_idx << "\n"; + m_back_color_input_idx = back_color_input_idx; +} + +void ShaderInputColor::set_color_ioinfo(r600_shader_io& io) const +{ + sfn_log << SfnLog::io << __func__ << " set color_ioinfo " << m_back_color_input_idx << "\n"; + io.back_color_input = m_back_color_input_idx; +} + +size_t ShaderIO::add_input(ShaderInput *input) +{ + m_inputs.push_back(PShaderInput(input)); + return m_inputs.size() - 1; +} + +PShaderInput ShaderIO::find_varying(tgsi_semantic name, int sid, int frac) +{ + for (auto& a : m_inputs) { + if (a->name() == name) { + ShaderInputVarying *v = dynamic_cast(a.get()); + assert(v); + if (v->sid() == sid && (v->location_frac() == frac)) + return a; + } + } + return nullptr; +} + +struct VaryingShaderIOLess { + bool operator () (PShaderInput lhs, PShaderInput rhs) const + { + const ShaderInputVarying& l = static_cast(*lhs); + const ShaderInputVarying& r = static_cast(*rhs); + return l.location() > r.location(); + } +}; + +void ShaderIO::sort_varying_inputs() +{ + priority_queue, VaryingShaderIOLess> q; + + vector idx; + + for (auto i = 0u; i < m_inputs.size(); ++i) { + ShaderInputVarying *vi = dynamic_cast(m_inputs[i].get()); + if (vi) { + q.push(m_inputs[i]); + idx.push_back(i); + } + } + + auto next_index = idx.begin(); + while (!q.empty()) { + auto si = q.top(); + q.pop(); + m_inputs[*next_index++] = si; + } +} + +void ShaderIO::update_lds_pos() +{ + m_lds_pos = -1; + m_ldspos.resize(m_inputs.size()); + for (auto& i : m_inputs) { + ShaderInputVarying *v = dynamic_cast(i.get()); + if (!v) + continue; + /* There are shaders that miss an input ...*/ + if (m_ldspos.size() <= static_cast(v->location())) + m_ldspos.resize(v->location() + 1); + } + + std::fill(m_ldspos.begin(), m_ldspos.end(), -1); + for (auto& i : m_inputs) { + ShaderInputVarying *v = dynamic_cast(i.get()); + if (!v) + continue; + + if (m_ldspos[v->location()] < 0) { + ++m_lds_pos; + m_ldspos[v->location()] = m_lds_pos; + } + v->set_lds_pos(m_lds_pos); + } + ++m_lds_pos; +} + +std::vector &ShaderIO::inputs() +{ + return m_inputs; +} + +ShaderInput& ShaderIO::input(size_t k) +{ + assert(k < m_inputs.size()); + return *m_inputs[k]; +} + +ShaderInput& ShaderIO::input(size_t driver_loc, int frac) +{ + for (auto& i: m_inputs) { + auto v = dynamic_cast(i.get()); + if (v && v->location() == driver_loc && v->location_frac() == frac) + return *v; + } + return input(driver_loc); +} + +void ShaderIO::set_two_sided() +{ + m_two_sided = true; +} + +} + diff --git a/src/gallium/drivers/r600/sfn/sfn_shaderio.h b/src/gallium/drivers/r600/sfn/sfn_shaderio.h new file mode 100644 index 00000000000..41cadfce5a5 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_shaderio.h @@ -0,0 +1,162 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_SHADERIO_H +#define SFN_SHADERIO_H + +#include "compiler/nir/nir.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "gallium/drivers/r600/r600_shader.h" + +#include +#include + +namespace r600 { + +class ShaderInput { +public: + ShaderInput(); + virtual ~ShaderInput(); + + ShaderInput(tgsi_semantic name); + tgsi_semantic name() const {return m_name;} + + void set_gpr(int gpr) {m_gpr = gpr;} + int gpr() const {return m_gpr;} + void set_ioinfo(r600_shader_io& io, int translated_ij_index) const; + + virtual void set_lds_pos(int lds_pos); + virtual int ij_index() const; + virtual bool interpolate() const; + virtual int lds_pos() const; + void set_uses_interpolate_at_centroid(); + +private: + virtual void set_specific_ioinfo(r600_shader_io& io) const; + + tgsi_semantic m_name; + int m_gpr; + bool m_uses_interpolate_at_centroid; +}; + +using PShaderInput = std::shared_ptr; + +class ShaderInputSystemValue: public ShaderInput { +public: + ShaderInputSystemValue(tgsi_semantic name, int gpr); + void set_specific_ioinfo(r600_shader_io& io) const; + int m_gpr; +}; + +class ShaderInputVarying : public ShaderInput { +public: + ShaderInputVarying(tgsi_semantic name, int sid, nir_variable *input); + ShaderInputVarying(tgsi_semantic name, const ShaderInputVarying& orig, + size_t location); + + void set_lds_pos(int lds_pos) override; + + int ij_index() const override; + + bool interpolate() const override; + + int lds_pos() const override; + + int sid() const {return m_sid;} + + void update_mask(int additional_comps); + + size_t location() const {return m_driver_location;} + int location_frac() const {return m_location_frac;} + +private: + void evaluate_spi_sid(); + + virtual void set_color_ioinfo(r600_shader_io& io) const; + void set_specific_ioinfo(r600_shader_io& io) const override; + size_t m_driver_location; + int m_location_frac; + int m_sid; + int m_spi_sid; + tgsi_interpolate_mode m_interpolate; + tgsi_interpolate_loc m_interpolate_loc; + int m_ij_index; + int m_lds_pos; + int m_mask; +}; + +class ShaderInputColor: public ShaderInputVarying { +public: + ShaderInputColor(tgsi_semantic name, int sid, nir_variable *input); + void set_back_color(unsigned back_color_input_idx); + unsigned back_color_input_index() const { + return m_back_color_input_idx; + } +private: + void set_color_ioinfo(UNUSED r600_shader_io& io) const override; + unsigned m_back_color_input_idx; + +}; + +class ShaderIO +{ +public: + ShaderIO(); + + size_t add_input(ShaderInput *input); + + std::vector& inputs(); + ShaderInput& input(size_t k); + + ShaderInput& input(size_t driver_loc, int frac); + + void set_two_sided(); + bool two_sided() {return m_two_sided;} + + int nlds() const { + return m_lds_pos; + } + + void sort_varying_inputs(); + + size_t size() const {return m_inputs.size();} + + PShaderInput find_varying(tgsi_semantic name, int sid, int frac); + + void update_lds_pos(); + +private: + std::vector m_inputs; + std::vector m_ldspos; + bool m_two_sided; + int m_lds_pos; + +}; + +} + +#endif // SFN_SHADERIO_H \ No newline at end of file diff --git a/src/gallium/drivers/r600/sfn/sfn_value.cpp b/src/gallium/drivers/r600/sfn/sfn_value.cpp new file mode 100644 index 00000000000..f88760cfc5b --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_value.cpp @@ -0,0 +1,251 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_value.h" +#include "util/macros.h" + +#include +#include +#include + +namespace r600 { + +using std::unique_ptr; +using std::make_shared; + +const char *Value::component_names = "xyzw01?_!"; + +Value::Value(): + m_type(gpr), + m_chan(0) +{ +} + +Value::Value(Type type, uint32_t chan): + m_type(type), + m_chan(chan) +{ + +} + + + +Value::Value(Type type): + Value(type, 0) +{ +} + +Value::Type Value::type() const +{ + return m_type; +} + +void Value::set_chan(uint32_t chan) +{ + m_chan = chan; +} + +void Value::print(std::ostream& os) const +{ + do_print(os); +} + +void Value::print(std::ostream& os, const PrintFlags& flags) const +{ + if (flags.flags & PrintFlags::has_neg) os << '-'; + if (flags.flags & PrintFlags::has_abs) os << '|'; + do_print(os, flags); + if (flags.flags & PrintFlags::has_abs) os << '|'; +} + +void Value::do_print(std::ostream& os, const PrintFlags& flags) const +{ + (void)flags; + do_print(os); +} + +bool Value::operator < (const Value& lhs) const +{ + return sel() < lhs.sel() || + (sel() == lhs.sel() && chan() < lhs.chan()); +} + + +LiteralValue::LiteralValue(float value, uint32_t chan): + Value(Value::literal, chan) +{ + m_value.f=value; +} + + +LiteralValue::LiteralValue(uint32_t value, uint32_t chan): + Value(Value::literal, chan) +{ + m_value.u=value; +} + +LiteralValue::LiteralValue(int value, uint32_t chan): + Value(Value::literal, chan) +{ + m_value.u=value; +} + +uint32_t LiteralValue::sel() const +{ + return ALU_SRC_LITERAL; +} + +uint32_t LiteralValue::value() const +{ + return m_value.u; +} + +float LiteralValue::value_float() const +{ + return m_value.f; +} + +void LiteralValue::do_print(std::ostream& os) const +{ + os << "[0x" << std::setbase(16) << m_value.u << " " << std::setbase(10) + << m_value.f << "]."; + os << component_names[chan()]; +} + +void LiteralValue::do_print(std::ostream& os, UNUSED const PrintFlags& flags) const +{ + os << "[0x" << std::setbase(16) << m_value.u << " " + << std::setbase(10); + + os << m_value.f << "f"; + + os<< "]"; +} + +bool LiteralValue::is_equal_to(const Value& other) const +{ + assert(other.type() == Value::Type::literal); + const auto& rhs = static_cast(other); + return (sel() == rhs.sel() && + value() == rhs.value()); +} + +SpecialValue::SpecialValue(Type type, int value, int chan): + Value(type, chan), + m_value(static_cast(value)) +{ +} + +uint32_t SpecialValue::sel() const +{ + return m_value; +} + + +void SpecialValue::do_print(std::ostream& os) const +{ + auto sv_info = alu_src_const.find(m_value); + if (sv_info != alu_src_const.end()) { + os << sv_info->second.descr; + if (sv_info->second.use_chan) + os << '.' << component_names[chan()]; + else if (chan() > 0) + os << "." << component_names[chan()] + << " (W: Channel ignored)"; + } else { + if (m_value >= ALU_SRC_PARAM_BASE && m_value < ALU_SRC_PARAM_BASE + 32) + os << " Param" << m_value - ALU_SRC_PARAM_BASE; + else + os << " E: unknown inline constant " << m_value; + } +} + +PValue Value::zero(new InlineConstValue(ALU_SRC_0, 0)); +PValue Value::one_f(new InlineConstValue(ALU_SRC_1, 0)); +PValue Value::one_i(new InlineConstValue(ALU_SRC_1_INT, 0)); +PValue Value::zero_dot_5(new InlineConstValue(ALU_SRC_0_5, 0)); + +InlineConstValue::InlineConstValue(int value, int chan): + SpecialValue(Value::cinline, value, chan) +{ +} + +bool InlineConstValue::is_equal_to(const Value& other) const +{ + assert(other.type() == Value::Type::cinline); + const auto& rhs = static_cast(other); + return sel() == rhs.sel(); +} + +UniformValue::UniformValue(uint32_t sel, uint32_t chan, uint32_t kcache_bank): + Value(Value::kconst, chan) +{ + if (sel < 512) { + m_index = sel & 0x1f; + m_kcache_bank = ((sel >> 5) & 1) | ((sel >> 7) & 2); + } else { + m_index = sel; + m_kcache_bank = kcache_bank; + } +} + +UniformValue::UniformValue(uint32_t sel, uint32_t chan, PValue addr): + Value(Value::kconst, chan), + m_index(sel), + m_kcache_bank(0), + m_addr(addr) +{ + +} + +uint32_t UniformValue::sel() const +{ + const int bank_base[4] = {128, 160, 256, 288}; + return m_index < 512 ? m_index + bank_base[m_kcache_bank] : m_index; +} + +uint32_t UniformValue::kcache_bank() const +{ + return m_kcache_bank; +} + +bool UniformValue::is_equal_to(const Value& other) const +{ + const UniformValue& o = static_cast(other); + return sel() == o.sel() && + m_kcache_bank == o.kcache_bank(); +} + +void UniformValue::do_print(std::ostream& os) const +{ + if (m_index < 512) + os << "KC" << m_kcache_bank << "[" << m_index; + else + os << "KCX[" << m_index; + os << "]." << component_names[chan()]; +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_value.h b/src/gallium/drivers/r600/sfn/sfn_value.h new file mode 100644 index 00000000000..3a53281a7d8 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_value.h @@ -0,0 +1,199 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_VALUE_H +#define SFN_VALUE_H + +#include "sfn_alu_defines.h" +#include "nir.h" + +#include +#include +#include +#include + +namespace r600 { + +class Value { +public: + using Pointer=std::shared_ptr; + + struct PrintFlags { + PrintFlags():index_mode(0), + flags(0) + { + } + PrintFlags(int im, int f):index_mode(im), + flags(f) + { + } + int index_mode; + int flags; + static const int is_rel = 1; + static const int has_abs = 2; + static const int has_neg = 4; + static const int literal_is_float = 8; + static const int index_ar = 16; + static const int index_loopidx = 32; + }; + + enum Type { + gpr, + kconst, + literal, + cinline, + lds_direct, + gpr_vector, + gpr_array_value, + unknown + }; + + static const char *component_names; + + using LiteralFlags=std::bitset<4>; + + Value(); + + Value(Type type); + + virtual ~Value(){} + + Type type() const; + virtual uint32_t sel() const = 0; + uint32_t chan() const {return m_chan;} + + void set_chan(uint32_t chan); + void print(std::ostream& os, const PrintFlags& flags) const; + + void print(std::ostream& os) const; + + bool operator < (const Value& lhs) const; + + static Value::Pointer zero; + static Value::Pointer one_f; + static Value::Pointer zero_dot_5; + static Value::Pointer one_i; + +protected: + Value(Type type, uint32_t chan); + +private: + virtual void do_print(std::ostream& os) const = 0; + virtual void do_print(std::ostream& os, const PrintFlags& flags) const; + + virtual bool is_equal_to(const Value& other) const = 0; + + Type m_type; + uint32_t m_chan; + + friend bool operator == (const Value& lhs, const Value& rhs); +}; + + +inline std::ostream& operator << (std::ostream& os, const Value& v) +{ + v.print(os); + return os; +} + + +inline bool operator == (const Value& lhs, const Value& rhs) +{ + if (lhs.type() == rhs.type()) + return lhs.is_equal_to(rhs); + return false; +} + +inline bool operator != (const Value& lhs, const Value& rhs) +{ + return !(lhs == rhs); +} + +using PValue=Value::Pointer; + +struct value_less { + inline bool operator () (PValue lhs, PValue rhs) const { + return *lhs < *rhs; + } +}; + +using ValueSet = std::set; + + +class LiteralValue: public Value { +public: + LiteralValue(float value, uint32_t chan= 0); + LiteralValue(uint32_t value, uint32_t chan= 0); + LiteralValue(int value, uint32_t chan= 0); + uint32_t sel() const override final; + uint32_t value() const; + float value_float() const; +private: + void do_print(std::ostream& os) const override; + void do_print(std::ostream& os, const PrintFlags& flags) const override; + bool is_equal_to(const Value& other) const override; + union { + uint32_t u; + float f; + } m_value; +}; + +class SpecialValue: public Value { +protected: + SpecialValue(Type type, int value, int chan); + uint32_t sel() const override final; +private: + void do_print(std::ostream& os) const override; + AluInlineConstants m_value; +}; + +class InlineConstValue: public SpecialValue { +public: + InlineConstValue(int value, int chan); + bool is_equal_to(const Value& other) const override; + +private: + AluInlineConstants m_value; +}; + +class UniformValue: public Value { +public: + UniformValue(uint32_t sel, uint32_t chan, uint32_t kcache_bank = 0); + UniformValue(uint32_t sel, uint32_t chan, PValue addr); + uint32_t sel() const override; + uint32_t kcache_bank() const; +private: + void do_print(std::ostream& os) const override; + bool is_equal_to(const Value& other) const override; + + uint32_t m_index; + uint32_t m_kcache_bank; + PValue m_addr; +}; + +} // end ns r600 + +#endif diff --git a/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp b/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp new file mode 100644 index 00000000000..fab4837cc77 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp @@ -0,0 +1,319 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_value_gpr.h" +#include "sfn_valuepool.h" +#include "sfn_debug.h" + +namespace r600 { + +using std::vector; +using std::array; + +GPRValue::GPRValue(uint32_t sel, uint32_t chan, int base_offset): + Value(Value::gpr, chan), + m_sel(sel), + m_base_offset(base_offset), + m_input(false) +{ +} + +GPRValue::GPRValue(uint32_t sel, uint32_t chan): + Value(Value::gpr, chan), + m_sel(sel), + m_base_offset(0), + m_input(false) +{ +} + +uint32_t GPRValue::sel() const +{ + return m_sel; +} + +void GPRValue::do_print(std::ostream& os) const +{ + os << 'R'; + os << m_sel; + os << '.' << component_names[chan()]; +} + +bool GPRValue::is_equal_to(const Value& other) const +{ + assert(other.type() == Value::Type::gpr); + const auto& rhs = static_cast(other); + return (sel() == rhs.sel() && + chan() == rhs.chan()); +} + +void GPRValue::do_print(std::ostream& os, UNUSED const PrintFlags& flags) const +{ + os << 'R'; + os << m_sel; + os << '.' << component_names[chan()]; +} + +GPRVector::GPRVector(const GPRVector& orig): + Value(gpr_vector), + m_elms(orig.m_elms), + m_valid(orig.m_valid) +{ +} + +GPRVector::GPRVector(std::array elms): + Value(gpr_vector), + m_elms(elms), + m_valid(false) +{ + for (unsigned i = 0; i < 4; ++i) + if (!m_elms[i] || (m_elms[i]->type() != Value::gpr)) { + assert(0 && "GPR vector not valid because element missing or nit a GPR"); + return; + } + unsigned sel = m_elms[0]->sel(); + for (unsigned i = 1; i < 4; ++i) + if (m_elms[i]->sel() != sel) { + assert(0 && "GPR vector not valid because sel is not equal for all elements"); + return; + } + m_valid = true; +} + +GPRVector::GPRVector(uint32_t sel, std::array swizzle): + Value (gpr_vector), + m_valid(true) +{ + for (int i = 0; i < 4; ++i) + m_elms[i] = PValue(new GPRValue(sel, swizzle[i])); +} + +GPRVector::GPRVector(const GPRVector& orig, const std::array& swizzle) +{ + for (int i = 0; i < 4; ++i) + m_elms[i] = orig.reg_i(swizzle[i]); + m_valid = orig.m_valid; +} + +void GPRVector::validate() const +{ + assert(m_elms[0]); + uint32_t sel = m_elms[0]->sel(); + if (sel >= 124) + return; + + for (unsigned i = 1; i < 4; ++i) { + assert(m_elms[i]); + if (sel != m_elms[i]->sel()) + return; + } + + m_valid = true; +} + +uint32_t GPRVector::sel() const +{ + validate(); + assert(m_valid); + return m_elms[0] ? m_elms[0]->sel() : 999; +} + +void GPRVector::set_reg_i(int i, PValue reg) +{ + m_elms[i] = reg; +} + +void GPRVector::do_print(std::ostream& os) const +{ + os << "R" << sel() << "."; + for (int i = 0; i < 4; ++i) + os << (m_elms[i] ? component_names[m_elms[i]->chan() < 8 ? m_elms[i]->chan() : 8] : '?'); +} + +void GPRVector::swizzle(const Swizzle& swz) +{ + Values v(m_elms); + for (uint32_t i = 0; i < 4; ++i) + if (i != swz[i]) { + assert(swz[i] < 4); + m_elms[i] = v[swz[i]]; + } +} + +bool GPRVector::is_equal_to(const Value& other) const +{ + if (other.type() != gpr_vector) { + std::cerr << "t"; + return false; + } + + const GPRVector& o = static_cast(other); + + for (int i = 0; i < 4; ++i) { + if (*m_elms[i] != *o.m_elms[i]) { + std::cerr << "elm" << i; + return false; + } + } + return true; +} + + +GPRArrayValue::GPRArrayValue(PValue value, PValue addr, GPRArray *array): + Value(gpr_array_value, value->chan()), + m_value(value), + m_addr(addr), + m_array(array) +{ +} + +GPRArrayValue::GPRArrayValue(PValue value, GPRArray *array): + Value(gpr_array_value, value->chan()), + m_value(value), + m_array(array) +{ +} + +static const char *swz_char = "xyzw01_"; + +void GPRArrayValue::do_print(std::ostream& os) const +{ + assert(m_array); + os << "R" << m_value->sel(); + if (m_addr) { + os << "[" << *m_addr << "] "; + } + os << swz_char[m_value->chan()]; + + os << "(" << *m_array << ")"; +} + +bool GPRArrayValue::is_equal_to(const Value& other) const +{ + const GPRArrayValue& v = static_cast(other); + + return *m_value == *v.m_value && + *m_array == *v.m_array; +} + +void GPRArrayValue::reset_value(PValue new_value) +{ + m_value = new_value; +} + +void GPRArrayValue::reset_addr(PValue new_addr) +{ + m_addr = new_addr; +} + + +GPRArray::GPRArray(int base, int size, int mask, int frac): + Value (gpr_vector), + m_base_index(base), + m_component_mask(mask), + m_frac(frac) +{ + m_values.resize(size); + for (int i = 0; i < size; ++i) { + for (int j = 0; j < 4; ++j) { + if (mask & (1 << j)) + m_values[i].set_reg_i(j, PValue(new GPRValue(base + i, j))); + } + } +} + +uint32_t GPRArray::sel() const +{ + return m_base_index; +} + +static const char *compchar = "xyzw"; +void GPRArray::do_print(std::ostream& os) const +{ + os << "ARRAY[R" << sel() << "..R" << sel() + m_values.size() - 1 << "]."; + for (int j = 0; j < 4; ++j) { + if (m_component_mask & (1 << j)) + os << compchar[j]; + } +} + +bool GPRArray::is_equal_to(const Value& other) const +{ + const GPRArray& o = dynamic_cast(other); + return o.sel() == sel() && + o.m_values.size() == m_values.size() && + o.m_component_mask == m_component_mask; +} + +uint32_t GPRArrayValue::sel() const +{ + return m_value->sel(); +} + +PValue GPRArray::get_indirect(unsigned index, PValue indirect, unsigned component) +{ + assert(index < m_values.size()); + assert(m_component_mask & (1 << (component + m_frac))); + + sfn_log << SfnLog::reg << "Create indirect register from " << *this; + + PValue v = m_values[index].reg_i(component + m_frac); + assert(v); + + sfn_log << SfnLog::reg << " -> " << *v; + + if (indirect) { + sfn_log << SfnLog::reg << "[" << *indirect << "]"; + switch (indirect->type()) { + case Value::literal: { + const LiteralValue& lv = static_cast(*indirect); + v = m_values[lv.value()].reg_i(component + m_frac); + break; + } + case Value::gpr: { + v = PValue(new GPRArrayValue(v, indirect, this)); + sfn_log << SfnLog::reg << "(" << *v << ")"; + break; + } + default: + assert(0 && !"Indirect addressing must be literal value or GPR"); + } + } + sfn_log << SfnLog::reg <<" -> " << *v << "\n"; + return v; +} + +void GPRArray::collect_registers(ValueMap& output) const +{ + for (auto& v: m_values) { + for (int i = 0; i < 4; ++i) { + auto vv = v.reg_i(i); + if (vv) + output.insert(vv); + } + } +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_value_gpr.h b/src/gallium/drivers/r600/sfn/sfn_value_gpr.h new file mode 100644 index 00000000000..2faf84aac1f --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_value_gpr.h @@ -0,0 +1,182 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_GPRARRAY_H +#define SFN_GPRARRAY_H + +#include "sfn_value.h" +#include + +namespace r600 { + +class ValuePool; +class ValueMap; + +class GPRValue : public Value { +public: + GPRValue() = default; + GPRValue(GPRValue&& orig) = default; + GPRValue(const GPRValue& orig) = default; + + GPRValue(uint32_t sel, uint32_t chan, int base_offset); + + GPRValue(uint32_t sel, uint32_t chan); + + GPRValue& operator = (const GPRValue& orig) = default; + GPRValue& operator = (GPRValue&& orig) = default; + + uint32_t sel() const override final; + + void set_as_input(){ m_input = true; } + bool is_input() const {return m_input; } + void set_pin_to_channel() { m_pin_to_channel = true;} + bool pin_to_channel() const { return m_pin_to_channel;} + +private: + void do_print(std::ostream& os) const override; + void do_print(std::ostream& os, const PrintFlags& flags) const override; + bool is_equal_to(const Value& other) const override; + uint32_t m_sel; + bool m_base_offset; + bool m_input; + bool m_pin_to_channel; +}; + +class GPRVector : public Value { +public: + using Swizzle = std::array; + using Values = std::array; + GPRVector() = default; + GPRVector(GPRVector&& orig) = default; + GPRVector(const GPRVector& orig); + + GPRVector(const GPRVector& orig, const std::array& swizzle); + GPRVector(std::array elms); + GPRVector(uint32_t sel, std::array swizzle); + + GPRVector& operator = (const GPRVector& orig) = default; + GPRVector& operator = (GPRVector&& orig) = default; + + void swizzle(const Swizzle& swz); + + uint32_t sel() const override final; + + void set_reg_i(int i, PValue reg); + + unsigned chan_i(int i) const {return m_elms[i]->chan();} + PValue reg_i(int i) const {return m_elms[i];} + PValue operator [] (int i) const {return m_elms[i];} + PValue& operator [] (int i) {return m_elms[i];} + + + PValue x() const {return m_elms[0];} + PValue y() const {return m_elms[1];} + PValue z() const {return m_elms[2];} + PValue w() const {return m_elms[3];} + + +private: + void do_print(std::ostream& os) const override; + bool is_equal_to(const Value& other) const override; + void validate() const; + + Values m_elms; + mutable bool m_valid; +}; + + +class GPRArray : public Value +{ +public: + using Pointer = std::shared_ptr; + + GPRArray(int base, int size, int comp_mask, int frac); + + uint32_t sel() const override; + + size_t size() const {return m_values.size();} + + PValue get_indirect(unsigned index, PValue indirect, unsigned component); + + void collect_registers(ValueMap& output) const; + +private: + void do_print(std::ostream& os) const override; + + bool is_equal_to(const Value& other) const override; + + int m_base_index; + int m_component_mask; + int m_frac; + + std::vector m_values; +}; + +using PGPRArray = GPRArray::Pointer; + +class GPRArrayValue :public Value { +public: + GPRArrayValue(PValue value, GPRArray *array); + GPRArrayValue(PValue value, PValue index, GPRArray *array); + + size_t array_size() const; + uint32_t sel() const override; + + PValue value() {return m_value;} + + void reset_value(PValue new_value); + void reset_addr(PValue new_addr); + + Value::Pointer indirect() const {return m_addr;} + +private: + + void do_print(std::ostream& os) const override; + + bool is_equal_to(const Value& other) const override; + + PValue m_value; + PValue m_addr; + GPRArray *m_array; +}; + +inline size_t GPRArrayValue::array_size() const +{ + return m_array->size(); +} + +inline GPRVector::Swizzle swizzle_from_mask(unsigned ncomp) +{ + GPRVector::Swizzle swz = {0,1,2,3}; + for (int i = ncomp; i < 4; ++i) + swz[i] = 7; + return swz; +} + + +} + +#endif // SFN_GPRARRAY_H diff --git a/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp b/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp new file mode 100644 index 00000000000..3c7d9d36c69 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp @@ -0,0 +1,558 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_debug.h" +#include "sfn_value_gpr.h" +#include "sfn_valuepool.h" + +#include +#include + +namespace r600 { + +using std::vector; +using std::pair; +using std::make_pair; +using std::queue; + +ValuePool::ValuePool(): + m_next_register_index(0), + current_temp_reg_index(0), + next_temp_reg_comp(4) +{ +} + +PValue ValuePool::m_undef = Value::zero; + +GPRVector ValuePool::vec_from_nir(const nir_dest& dst, int num_components) +{ + std::array result; + for (int i = 0; i < 4; ++i) + result[i] = from_nir(dst, i < num_components ? i : 7); + return GPRVector(result); + +} + +PValue ValuePool::from_nir(const nir_src& v, unsigned component, unsigned swizzled) +{ + sfn_log << SfnLog::reg << "Search " << (v.is_ssa ? "ssa_reg " : "reg ") + << (v.is_ssa ? v.ssa->index : v.reg.reg->index); + + if (!v.is_ssa) { + int idx = lookup_register_index(v); + sfn_log << SfnLog::reg << " -> got index " << idx << "\n"; + if (idx >= 0) { + auto reg = lookup_register(idx, swizzled, false); + if (reg) { + if (reg->type() == Value::gpr_vector) { + auto& array = dynamic_cast(*reg); + reg = array.get_indirect(v.reg.base_offset, + v.reg.indirect ? + from_nir(*v.reg.indirect, 0, 0) : nullptr, + component); + } + return reg; + } + } + assert(0 && "local registers should always be found"); + } + + unsigned index = v.ssa->index; + /* For undefs we use zero and let ()yet to be implemeneted dce deal with it */ + if (m_ssa_undef.find(index) != m_ssa_undef.end()) + return Value::zero; + + + int idx = lookup_register_index(v); + sfn_log << SfnLog::reg << " -> got index " << idx << "\n"; + if (idx >= 0) { + auto reg = lookup_register(idx, swizzled, false); + if (reg) + return reg; + } + + + auto literal_val = m_literal_constants.find(index); + if (literal_val != m_literal_constants.end()) { + switch (literal_val->second->def.bit_size) { + case 1: + return PValue(new LiteralValue(literal_val->second->value[swizzled].b ? 0xffffffff : 0, component)); + case 32: + return literal(literal_val->second->value[swizzled].u32); + default: + sfn_log << SfnLog::reg << "Unsupported bit size " << literal_val->second->def.bit_size + << " fall back to 32\n"; + return PValue(new LiteralValue(literal_val->second->value[swizzled].u32, component)); + } + } + + unsigned uindex = (index << 2) + swizzled; + auto u = m_uniforms.find(uindex); + if (u != m_uniforms.end()) + return u->second; + + return PValue(); +} + +PValue ValuePool::from_nir(const nir_src& v, unsigned component) +{ + return from_nir(v, component, component); +} + +PValue ValuePool::from_nir(const nir_tex_src &v, unsigned component) +{ + return from_nir(v.src, component, component); +} + +PValue ValuePool::from_nir(const nir_alu_src &v, unsigned component) +{ + return from_nir(v.src, component, v.swizzle[component]); +} + +PValue ValuePool::get_temp_register() +{ + if (next_temp_reg_comp > 3) { + current_temp_reg_index = allocate_temp_register(); + next_temp_reg_comp = 0; + } + return PValue(new GPRValue(current_temp_reg_index, next_temp_reg_comp++)); +} + +GPRVector ValuePool::get_temp_vec4() +{ + int sel = allocate_temp_register(); + return GPRVector(sel, {0,1,2,3}); +} + +PValue ValuePool::create_register_from_nir_src(const nir_src& src, int comp) +{ + int idx = src.is_ssa ? get_dst_ssa_register_index(*src.ssa): + get_local_register_index(*src.reg.reg); + + auto retval = lookup_register(idx, comp, false); + if (!retval) + retval = create_register(idx, comp); + return retval; +} + +PValue ValuePool::from_nir(const nir_alu_dest &v, unsigned component) +{ + //assert(v->write_mask & (1 << component)); + return from_nir(v.dest, component); +} + +int ValuePool::lookup_register_index(const nir_dest& dst) +{ + return dst.is_ssa ? get_dst_ssa_register_index(dst.ssa): + get_local_register_index(*dst.reg.reg); +} + +int ValuePool::lookup_register_index(const nir_src& src) const +{ + int index = 0; + + index = src.is_ssa ? + get_ssa_register_index(*src.ssa) : + get_local_register_index(*src.reg.reg); + + sfn_log << SfnLog::reg << " LIDX:" << index; + + auto r = m_register_map.find(index); + if (r == m_register_map.end()) { + return -1; + } + return static_cast(r->second.index); +} + + +int ValuePool::allocate_component(unsigned index, unsigned comp, bool pre_alloc) +{ + assert(comp < 8); + return allocate_with_mask(index, 1 << comp, pre_alloc); +} + +int ValuePool::allocate_temp_register() +{ + return m_next_register_index++; +} + + +PValue ValuePool::from_nir(const nir_dest& v, unsigned component) +{ + int idx = lookup_register_index(v); + sfn_log << SfnLog::reg << __func__ << ": "; + if (v.is_ssa) + sfn_log << "ssa_" << v.ssa.index; + else + sfn_log << "r" << v.reg.reg->index; + sfn_log << " -> " << idx << "\n"; + + auto retval = lookup_register(idx, component, false); + if (!retval) + retval = create_register(idx, component); + + if (retval->type() == Value::gpr_vector) { + assert(!v.is_ssa); + auto& array = dynamic_cast(*retval); + retval = array.get_indirect(v.reg.base_offset, + v.reg.indirect ? + from_nir(*v.reg.indirect, 0, 0) : nullptr, + component); + } + + return retval; +} + +ValueMap ValuePool::get_temp_registers() const +{ + ValueMap result; + + for (auto& v : m_registers) { + if (v.second->type() == Value::gpr) + result.insert(v.second); + else if (v.second->type() == Value::gpr_vector) { + auto& array = dynamic_cast(*v.second); + array.collect_registers(result); + } + } + return result; +} + +static const char swz[] = "xyzw01?_"; + +PValue ValuePool::create_register(unsigned sel, unsigned swizzle) +{ + sfn_log << SfnLog::reg + <<"Create register " << sel << '.' << swz[swizzle] << "\n"; + auto retval = PValue(new GPRValue(sel, swizzle)); + m_registers[(sel << 3) + swizzle] = retval; + return retval; +} + +bool ValuePool::inject_register(unsigned sel, unsigned swizzle, + const PValue& reg, bool map) +{ + uint32_t ssa_index = sel; + + if (map) { + auto pos = m_ssa_register_map.find(sel); + if (pos == m_ssa_register_map.end()) + ssa_index = m_next_register_index++; + else + ssa_index = pos->second; + } + + sfn_log << SfnLog::reg + << "Inject register " << sel << '.' << swz[swizzle] + << " at index " << ssa_index << " ..."; + + if (map) + m_ssa_register_map[sel] = ssa_index; + + allocate_with_mask(ssa_index, swizzle, true); + + unsigned idx = (ssa_index << 3) + swizzle; + auto p = m_registers.find(idx); + if ( (p != m_registers.end()) && *p->second != *reg) { + std::cerr << "Register location (" << ssa_index << ", " << swizzle << ") was already reserved\n"; + assert(0); + return false; + } + sfn_log << SfnLog::reg << " at idx:" << idx << " to " << *reg << "\n"; + m_registers[idx] = reg; + + if (m_next_register_index <= ssa_index) + m_next_register_index = ssa_index + 1; + return true; +} + + +PValue ValuePool::lookup_register(unsigned sel, unsigned swizzle, + bool required) +{ + + PValue retval; + sfn_log << SfnLog::reg + << "lookup register " << sel << '.' << swz[swizzle] << "(" + << ((sel << 3) + swizzle) << ")..."; + + + auto reg = m_registers.find((sel << 3) + swizzle); + if (reg != m_registers.end()) { + sfn_log << SfnLog::reg << " -> Found " << *reg->second << "\n"; + retval = reg->second; + } else if (swizzle == 7) { + PValue retval = create_register(sel, swizzle); + sfn_log << SfnLog::reg << " -> Created " << *retval << "\n"; + } else if (required) { + sfn_log << SfnLog::reg << "Register (" << sel << ", " + << swizzle << ") not found but required\n"; + assert(0 && "Unallocated register value requested\n"); + } + sfn_log << SfnLog::reg << " -> Not required and not allocated\n"; + return retval; +} + +unsigned ValuePool::get_dst_ssa_register_index(const nir_ssa_def& ssa) +{ + sfn_log << SfnLog::reg << __func__ << ": search dst ssa " + << ssa.index; + + auto pos = m_ssa_register_map.find(ssa.index); + if (pos == m_ssa_register_map.end()) { + sfn_log << SfnLog::reg << " Need to allocate ..."; + allocate_ssa_register(ssa); + pos = m_ssa_register_map.find(ssa.index); + assert(pos != m_ssa_register_map.end()); + } + sfn_log << SfnLog::reg << "... got " << pos->second << "\n"; + return pos->second; +} + +unsigned ValuePool::get_ssa_register_index(const nir_ssa_def& ssa) const +{ + sfn_log << SfnLog::reg << __func__ << ": search ssa " + << ssa.index; + + auto pos = m_ssa_register_map.find(ssa.index); + sfn_log << SfnLog::reg << " got " << pos->second<< "\n"; + if (pos == m_ssa_register_map.end()) { + sfn_log << SfnLog::reg << __func__ << ": ssa register " + << ssa.index << " lookup failed\n"; + return -1; + } + return pos->second; +} + +unsigned ValuePool::get_local_register_index(const nir_register& reg) +{ + auto pos = m_local_register_map.find(reg.index); + if (pos == m_local_register_map.end()) { + allocate_local_register(reg); + pos = m_local_register_map.find(reg.index); + assert(pos != m_local_register_map.end()); + } + return pos->second; +} + +unsigned ValuePool::get_local_register_index(const nir_register& reg) const +{ + auto pos = m_local_register_map.find(reg.index); + if (pos == m_local_register_map.end()) { + sfn_log << SfnLog::err << __func__ << ": local register " + << reg.index << " lookup failed"; + return -1; + } + return pos->second; +} + +void ValuePool::allocate_ssa_register(const nir_ssa_def& ssa) +{ + sfn_log << SfnLog::reg << "ValuePool: Allocate ssa register " << ssa.index + << " as " << m_next_register_index << "\n"; + int index = m_next_register_index++; + m_ssa_register_map[ssa.index] = index; + allocate_with_mask(index, 0xf, true); +} + +void ValuePool::allocate_arrays(array_list& arrays) +{ + int ncomponents = 0; + int current_index = m_next_register_index; + unsigned instance = 0; + + while (!arrays.empty()) { + auto a = arrays.top(); + arrays.pop(); + + /* This is a bit hackish, return an id that encodes the array merge. To make sure + * that the mapping doesn't go wrong we have to make sure the arrays is longer than + * the number of instances in this arrays slot */ + if (a.ncomponents + ncomponents > 4 || + a.length < instance) { + current_index = m_next_register_index; + ncomponents = 0; + instance = 0; + } + + if (ncomponents == 0) + m_next_register_index += a.length; + + uint32_t mask = ((1 << a.ncomponents) - 1) << ncomponents; + + PValue array = PValue(new GPRArray(current_index, a.length, mask, ncomponents)); + + sfn_log << SfnLog::reg << "Add array at "<< current_index + << " of size " << a.length << " with " << a.ncomponents + << " components, mask " << mask << "\n"; + + m_local_register_map[a.index] = current_index + instance; + + for (unsigned i = 0; i < a.ncomponents; ++i) + m_registers[((current_index + instance) << 3) + i] = array; + + VRec next_reg = {current_index + instance, mask, mask}; + m_register_map[current_index + instance] = next_reg; + + ncomponents += a.ncomponents; + ++instance; + } +} + +void ValuePool::allocate_local_register(const nir_register& reg) +{ + int index = m_next_register_index++; + m_local_register_map[reg.index] = index; + allocate_with_mask(index, 0xf, true); + + /* Create actual register and map it */; + for (int i = 0; i < 4; ++i) { + int k = (index << 3) + i; + m_registers[k] = PValue(new GPRValue(index, i)); + } +} + +void ValuePool::allocate_local_register(const nir_register& reg, array_list& arrays) +{ + sfn_log << SfnLog::reg << "ValuePool: Allocate local register " << reg.index + << " as " << m_next_register_index << "\n"; + + if (reg.num_array_elems) { + array_entry ae = {reg.index, reg.num_array_elems, reg.num_components}; + arrays.push(ae); + } + else + allocate_local_register(reg); +} + +bool ValuePool::create_undef(nir_ssa_undef_instr* instr) +{ + m_ssa_undef.insert(instr->def.index); + return true; +} + +bool ValuePool::set_literal_constant(nir_load_const_instr* instr) +{ + sfn_log << SfnLog::reg << "Add literal " << instr->def.index << "\n"; + m_literal_constants[instr->def.index] = instr; + return true; +} + +const nir_load_const_instr* ValuePool::get_literal_constant(int index) +{ + sfn_log << SfnLog::reg << "Try to locate literal " << index << "..."; + auto literal = m_literal_constants.find(index); + if (literal == m_literal_constants.end()) { + sfn_log << SfnLog::reg << " not found\n"; + return nullptr; + } + sfn_log << SfnLog::reg << " found\n"; + return literal->second; +} + +void ValuePool::add_uniform(unsigned index, const PValue& value) +{ + sfn_log << SfnLog::reg << "Reserve " << *value << " as " << index << "\n"; + m_uniforms[index] = value; +} + +PValue ValuePool::uniform(unsigned index) +{ + sfn_log << SfnLog::reg << "Search index " << index << "\n"; + auto i = m_uniforms.find(index); + return i == m_uniforms.end() ? PValue() : i->second; +} + +int ValuePool::allocate_with_mask(unsigned index, unsigned mask, bool pre_alloc) +{ + int retval; + VRec next_register = { index, mask }; + + sfn_log << SfnLog::reg << (pre_alloc ? "Pre-alloc" : "Allocate") + << " register (" << index << ", " << mask << ")\n"; + retval = index; + auto r = m_register_map.find(index); + + if (r != m_register_map.end()) { + if ((r->second.mask & next_register.mask) && + !(r->second.pre_alloc_mask & next_register.mask)) { + std::cerr << "r600 ERR: register (" + << index << ", " << mask + << ") already allocated as (" << r->second.index << ", " + << r->second.mask << ", " << r->second.pre_alloc_mask + << ") \n"; + retval = -1; + } else { + r->second.mask |= next_register.mask; + if (pre_alloc) + r->second.pre_alloc_mask |= next_register.mask; + retval = r->second.index; + } + } else { + if (pre_alloc) + next_register.pre_alloc_mask = mask; + m_register_map[index] = next_register; + retval = next_register.index; + } + + sfn_log << SfnLog::reg << "Allocate register (" << index << "," << mask << ") in R" + << retval << "\n"; + + return retval; +} + +PValue ValuePool::literal(uint32_t value) +{ + const uint32_t float_1 = 0x3f800000; + const uint32_t float_05 = 0x3f000000; + + auto l = m_literals.find(value); + if (l != m_literals.end()) + return l->second; + + switch (value) { + case 0: + m_literals[0] = PValue(new InlineConstValue(ALU_SRC_0, 0)); + return m_literals[0]; + case 1: + m_literals[1] = PValue(new InlineConstValue(ALU_SRC_1_INT, 0)); + return m_literals[1]; + case float_1: + m_literals[float_1] = PValue(new InlineConstValue(ALU_SRC_1, 0)); + return m_literals[float_1]; + case float_05: + m_literals[float_05] = PValue(new InlineConstValue(ALU_SRC_0_5, 0)); + return m_literals[float_05]; + case 0xffffffff: + m_literals[0xffffffff] = PValue(new InlineConstValue(ALU_SRC_M_1_INT, 0)); + return m_literals[0xffffffff]; + default: + m_literals[value] = PValue(new LiteralValue(value)); + return m_literals[value]; + } +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_valuepool.h b/src/gallium/drivers/r600/sfn/sfn_valuepool.h new file mode 100644 index 00000000000..4dcdc2c78d4 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_valuepool.h @@ -0,0 +1,255 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef SFN_VALUEPOOL_H +#define SFN_VALUEPOOL_H + +#include "sfn_value.h" +#include "sfn_value_gpr.h" + +#include +#include + +namespace r600 { + +using LiteralBuffer = std::map; + +class ValueMap { +public: + void insert(const PValue& v) { + auto idx = index_from(v->sel(), v->chan()); + m_map[idx] = v; + } + PValue get_or_inject(uint32_t index, uint32_t chan) { + auto idx = index_from(index, chan); + auto v = m_map.find(idx); + if (v == m_map.end()) { + insert(PValue(new GPRValue(index, chan))); + v = m_map.find(idx); + } + return v->second; + } + std::map::const_iterator begin() const {return m_map.begin();} + std::map::const_iterator end() const {return m_map.end();} + +private: + uint32_t index_from(uint32_t index, uint32_t chan) { + return (index << 3) + chan; + } + std::map m_map; +}; + +/** \brief Class to keep track of registers, uniforms, and literals + * This class holds the references to the uniforms and the literals + * and is responsible for allocating the registers. + */ +class ValuePool +{ +public: + + struct array_entry { + unsigned index; + unsigned length; + unsigned ncomponents; + + bool operator ()(const array_entry& a, const array_entry& b) const { + return a.length < b.length || (a.length == b.length && a.ncomponents > b.ncomponents); + } + }; + + using array_list = std::priority_queue, + array_entry>; + + ValuePool(); + + + GPRVector vec_from_nir(const nir_dest& dst, int num_components); + + PValue from_nir(const nir_src& v, unsigned component, unsigned swizzled); + + PValue from_nir(const nir_src& v, unsigned component); + /** Get a register that is used as source register in an ALU instruction + * The PValue holds one componet as specified. If the register refers to + * a GPR it must already have been allocated, uniforms and literals on + * the other hand might be pre-loaded. + */ + PValue from_nir(const nir_alu_src& v, unsigned component); + + /** Get a register that is used as source register in an Texture instruction + * The PValue holds one componet as specified. + */ + PValue from_nir(const nir_tex_src& v, unsigned component); + + /** Allocate a register that is used as destination register in an ALU + * instruction. The PValue holds one componet as specified. + */ + PValue from_nir(const nir_alu_dest& v, unsigned component); + + /** Allocate a register that is used as destination register in any + * instruction. The PValue holds one componet as specified. + */ + PValue from_nir(const nir_dest& v, unsigned component); + + /** Get the register index mapped from the NIR code to the r600 ir + * \param index NIR index of register + * \returns r600 ir inxex + */ + int lookup_register_index(const nir_src& src) const; + + /** Get the register index mapped from the NIR code to the r600 ir + * \param index NIR index of register + * \returns r600 ir inxex + */ + int lookup_register_index(const nir_dest& dst); + + /** Inject a register into a given ssa index position + * This is used to redirect loads from system values and vertex attributes + * that are already loaded into registers */ + bool inject_register(unsigned sel, unsigned swizzle, const PValue ®, bool map); + + /** Reserve space for a local register */ + void allocate_local_register(const nir_register& reg); + void allocate_local_register(const nir_register ®, array_list& arrays); + + void allocate_arrays(array_list& arrays); + + + void increment_reserved_registers() { + ++m_next_register_index; + } + + void set_reserved_registers(unsigned rr) { + m_next_register_index =rr; + } + + /** Allocate a register that is is needed for lowering an instruction + * that requires complex calculations, + */ + int allocate_temp_register(); + + /** Reserve a undef register, currently it uses (0,7), + * \todo should be eliminated in the final pass + */ + bool create_undef(nir_ssa_undef_instr* instr); + + bool set_literal_constant(nir_load_const_instr* instr); + + const nir_load_const_instr *get_literal_constant(int index); + + void add_uniform(unsigned index, const PValue &value); + + PValue uniform(unsigned index); + + /** Create a new register with the given index and store it in the + * lookup map + */ + PValue create_register_from_nir_src(const nir_src& sel, int comp); + + ValueMap get_temp_registers() const; + + PValue lookup_register(unsigned sel, unsigned swizzle, bool required); + + size_t register_count() const {return m_next_register_index;} + + PValue create_register(unsigned index, unsigned swizzle); + + unsigned get_dst_ssa_register_index(const nir_ssa_def& ssa); + + PValue literal(uint32_t value); + + PValue get_temp_register(); + + GPRVector get_temp_vec4(); + +private: + + unsigned get_ssa_register_index(const nir_ssa_def& ssa) const; + + unsigned get_local_register_index(const nir_register& reg); + + unsigned get_local_register_index(const nir_register& reg) const; + + void allocate_ssa_register(const nir_ssa_def& ssa); + + void allocate_array(const nir_register& reg); + + + /** Allocate a register index with the given component mask. + * If one of the components is already been allocated the function + * will signal an error bz returning -1, otherwise a register index is + * returned. + */ + int allocate_with_mask(unsigned index, unsigned mask, bool pre_alloc); + + /** Allocate a register index with the given component. + * If the component is already been allocated the function + * will signal an error bz returning -1, otherwise a register index is + * returned. + */ + int allocate_component(unsigned index, unsigned comp, bool pre_alloc); + + /** search for a new register with the given index in the + * lookup map. + * \param sel register sel value + * \param swizzle register component, can also be 4,5, and 7 + * \param required true: in debug mode assert when register doesn't exist + * false: return nullptr on failure + */ + + std::set m_ssa_undef; + + LiteralBuffer m_literal_constants; + + std::map m_local_register_map; + std::map m_ssa_register_map; + + std::map m_uniforms; + + std::map m_registers; + + static PValue m_undef; + + struct VRec { + unsigned index; + unsigned mask; + unsigned pre_alloc_mask; + }; + std::map m_register_map; + + unsigned m_next_register_index; + + std::map m_arrays_map; + + std::map m_literals; + + int current_temp_reg_index; + int next_temp_reg_comp; +}; + +} + +#endif // SFN_VALUEPOOL_H -- 2.30.2