sb/sb_shader.cpp \
sb/sb_shader.h \
sb/sb_ssa_builder.cpp \
- sb/sb_valtable.cpp
+ sb/sb_valtable.cpp \
+ sfn/sfn_alu_defines.cpp \
+ sfn/sfn_alu_defines.h \
+ sfn/sfn_callstack.cpp \
+ sfn/sfn_callstack.h \
+ sfn/sfn_conditionaljumptracker.cpp \
+ sfn/sfn_conditionaljumptracker.h \
+ sfn/sfn_defines.h \
+ sfn/sfn_debug.cpp \
+ sfn/sfn_debug.h \
+ sfn/sfn_emitaluinstruction.cpp \
+ sfn/sfn_emitaluinstruction.h \
+ sfn/sfn_emitinstruction.cpp \
+ sfn/sfn_emitinstruction.h \
+ sfn/sfn_emittexinstruction.cpp \
+ sfn/sfn_emittexinstruction.h \
+ sfn/sfn_emitinstruction.h \
+ sfn/sfn_instruction_alu.cpp \
+ sfn/sfn_instruction_alu.h \
+ sfn/sfn_instruction_base.cpp \
+ sfn/sfn_instruction_base.h \
+ sfn/sfn_instruction_cf.cpp \
+ sfn/sfn_instruction_cf.h \
+ sfn/sfn_instruction_export.cpp \
+ sfn/sfn_instruction_export.h \
+ sfn/sfn_instruction_fetch.cpp \
+ sfn/sfn_instruction_fetch.h \
+ sfn/sfn_instruction_tex.cpp \
+ sfn/sfn_instruction_tex.h \
+ sfn/sfn_ir_to_assembly.cpp \
+ sfn/sfn_ir_to_assembly.h \
+ sfn/sfn_nir.cpp \
+ sfn/sfn_nir.h \
+ sfn/sfn_shader_base.cpp \
+ sfn/sfn_shader_base.h \
+ sfn/sfn_shader_fragment.cpp \
+ sfn/sfn_shader_fragment.h \
+ sfn/sfn_shader_geometry.cpp \
+ sfn/sfn_shader_geometry.h \
+ sfn/sfn_shader_vertex.cpp \
+ sfn/sfn_shader_vertex.h \
+ sfn/sfn_shaderio.cpp \
+ sfn/sfn_shaderio.h \
+ sfn/sfn_value.cpp \
+ sfn/sfn_value.h \
+ sfn/sfn_value_gpr.cpp \
+ sfn/sfn_value_gpr.h \
+ sfn/sfn_valuepool.cpp \
+ sfn/sfn_valuepool.h
R600_GENERATED_FILES = \
- egd_tables.h
\ No newline at end of file
+ egd_tables.h
'sb/sb_shader.h',
'sb/sb_ssa_builder.cpp',
'sb/sb_valtable.cpp',
-)
+ 'sfn/sfn_alu_defines.cpp',
+ 'sfn/sfn_alu_defines.h',
+ 'sfn/sfn_callstack.cpp',
+ 'sfn/sfn_callstack.h',
+ 'sfn/sfn_conditionaljumptracker.cpp',
+ 'sfn/sfn_conditionaljumptracker.h',
+ 'sfn/sfn_defines.h',
+ 'sfn/sfn_debug.cpp',
+ 'sfn/sfn_debug.h',
+ 'sfn/sfn_emitaluinstruction.cpp',
+ 'sfn/sfn_emitaluinstruction.h',
+ 'sfn/sfn_emitinstruction.cpp',
+ 'sfn/sfn_emitinstruction.h',
+ 'sfn/sfn_emittexinstruction.cpp',
+ 'sfn/sfn_emittexinstruction.h',
+ 'sfn/sfn_emitinstruction.h',
+ 'sfn/sfn_instruction_alu.cpp',
+ 'sfn/sfn_instruction_alu.h',
+ 'sfn/sfn_instruction_base.cpp',
+ 'sfn/sfn_instruction_base.h',
+ 'sfn/sfn_instruction_cf.cpp',
+ 'sfn/sfn_instruction_cf.h',
+ 'sfn/sfn_instruction_export.cpp',
+ 'sfn/sfn_instruction_export.h',
+ 'sfn/sfn_instruction_fetch.cpp',
+ 'sfn/sfn_instruction_fetch.h',
+ 'sfn/sfn_instruction_tex.cpp',
+ 'sfn/sfn_instruction_tex.h',
+ 'sfn/sfn_ir_to_assembly.cpp',
+ 'sfn/sfn_ir_to_assembly.h',
+ 'sfn/sfn_nir.cpp',
+ 'sfn/sfn_nir.h',
+ 'sfn/sfn_shader_base.cpp',
+ 'sfn/sfn_shader_base.h',
+ 'sfn/sfn_shader_fragment.cpp',
+ 'sfn/sfn_shader_fragment.h',
+ 'sfn/sfn_shader_vertex.cpp',
+ 'sfn/sfn_shader_vertex.h',
+ 'sfn/sfn_shaderio.cpp',
+ 'sfn/sfn_shaderio.h',
+ 'sfn/sfn_value.cpp',
+ 'sfn/sfn_value.h',
+ 'sfn/sfn_value_gpr.cpp',
+ 'sfn/sfn_value_gpr.h',
+ 'sfn/sfn_valuepool.cpp',
+ 'sfn/sfn_valuepool.h',
+ )
egd_tables_h = custom_target(
'egd_tables.h',
c_args : [c_vis_args, r600_c_args, '-Wstrict-overflow=0'],
cpp_args : [cpp_vis_args],
include_directories : [
- inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_amd_common,
+ inc_src, inc_common, inc_include, inc_compiler, inc_gallium, inc_gallium_aux, inc_amd_common,
inc_gallium_drivers,
],
- dependencies: [dep_libdrm_radeon, dep_elf, dep_llvm],
+ dependencies: [dep_libdrm_radeon, dep_elf, dep_llvm, idep_nir, idep_nir_headers],
)
driver_r600 = declare_dependency(
compile_args : '-DGALLIUM_R600',
- link_with : [libr600, libradeonwinsys],
+ link_with : [libr600, libmesa_gallium, libradeonwinsys],
)
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_alu_defines.h"
+
+namespace r600 {
+
+const std::map<EAluOp, AluOp> alu_ops = {
+ {op0_nop ,AluOp(0, 0, AluOp::a,"NOP")},
+ {op0_group_barrier ,AluOp(0, 0, AluOp::a,"GROUP_BARRIER")},
+ {op0_group_seq_begin ,AluOp(0, 0, AluOp::a,"GROUP_SEQ_BEGIN")},
+ {op0_group_seq_end ,AluOp(0, 0, AluOp::a,"GROUP_SEQ_END")},
+ {op0_pred_set_clr ,AluOp(0, 1, AluOp::a,"PRED_SET_CLR")},
+ {op0_store_flags ,AluOp(0, 0, AluOp::v,"STORE_FLAGS")},
+ {op0_lds_1a ,AluOp(0, 0, AluOp::v,"LDS_1A")},
+ {op0_lds_1a1d ,AluOp(0, 0, AluOp::v,"LDS_1A1D")},
+ {op0_lds_2a ,AluOp(0, 0, AluOp::v,"LDS_2A")},
+
+ {op1_bcnt_int ,AluOp(1, 0, AluOp::v,"BCNT_INT")},
+ {op1_bcnt_accum_prev_int ,AluOp(1, 0, AluOp::v,"BCNT_ACCUM_PREV_INT")},
+ {op1_bfrev_int ,AluOp(1, 0, AluOp::a,"BFREV_INT")},
+ {op1_ceil ,AluOp(1, 1, AluOp::a,"CEIL")},
+ {op1_cos ,AluOp(1, 1, AluOp::t,"COS")},
+ {op1_exp_ieee ,AluOp(1, 1, AluOp::t,"EXP_IEEE")},
+ {op1_floor ,AluOp(1, 1, AluOp::a,"FLOOR")},
+ {op1_flt_to_int ,AluOp(1, 0, AluOp::a,"FLT_TO_INT")},
+ {op1_flt_to_uint ,AluOp(1, 1, AluOp::t,"FLT_TO_UINT")},
+ {op1_flt_to_int_rpi ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_RPI")},
+ {op1_flt_to_int_floor ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_FLOOR")},
+ {op1_flt16_to_flt32 ,AluOp(1, 1, AluOp::v,"FLT16_TO_FLT32")},
+ {op1_flt32_to_flt16 ,AluOp(1, 1, AluOp::v,"FLT32_TO_FLT16")},
+ {op1_flt32_to_flt64 ,AluOp(1, 1, AluOp::v,"FLT32_TO_FLT64")},
+ {op1_flt64_to_flt32 ,AluOp(1, 1, AluOp::a,"FLT64_TO_FLT32")},
+ {op1_fract ,AluOp(1, 1, AluOp::a,"FRACT")},
+ {op1_fract_64 ,AluOp(1, 1, AluOp::v,"FRACT_64")},
+ {op1_frexp_64 ,AluOp(1, 1, AluOp::v,"FREXP_64")},
+ {op1_int_to_flt ,AluOp(1, 0, AluOp::t,"INT_TO_FLT")},
+ {op1_ldexp_64 ,AluOp(1, 1, AluOp::v,"LDEXP_64")},
+ {op1_interp_load_p0 ,AluOp(1, 1, AluOp::v,"INTERP_LOAD_P0")},
+ {op1_interp_load_p10 ,AluOp(1, 1, AluOp::v,"INTERP_LOAD_P10")},
+ {op1_interp_load_p20 ,AluOp(1, 1, AluOp::v,"INTERP_LOAD_P20")},
+ {op1_load_store_flags ,AluOp(1, 0, AluOp::v,"LOAD_STORE_FLAGS")},
+ {op1_log_clamped ,AluOp(1, 1, AluOp::t,"LOG_CLAMPED")},
+ {op1_log_ieee ,AluOp(1, 1, AluOp::t,"LOG_IEEE")},
+ {op1_max4 ,AluOp(1, 1, AluOp::v,"MAX4")},
+ {op1_mbcnt_32hi_int ,AluOp(1, 0, AluOp::v,"MBCNT_32HI_INT")},
+ {op1_mbcnt_32lo_accum_prev_int ,AluOp(1, 0, AluOp::v,"MBCNT_32LO_ACCUM_PREV_INT")},
+ {op1_mov ,AluOp(1, 0, AluOp::a,"MOV")},
+ {op1_mova_int ,AluOp(1, 0, AluOp::v,"MOVA_INT")},
+ {op1_not_int ,AluOp(1, 0, AluOp::a,"NOT_INT")},
+ {op1_offset_to_flt ,AluOp(1, 0, AluOp::v,"OFFSET_TO_FLT")},
+ {op1_pred_set_inv ,AluOp(1, 1, AluOp::a,"PRED_SET_INV")},
+ {op1_pred_set_restore ,AluOp(1, 1, AluOp::a,"PRED_SET_RESTORE")},
+ {op1_set_cf_idx0 ,AluOp(1, 0, AluOp::a,"SET_CF_IDX0")}, /* Reads from AR register? */
+ {op1_set_cf_idx1 ,AluOp(1, 0, AluOp::a,"SET_CF_IDX1")}, /* Reads from AR register? */
+ {op1_recip_clamped ,AluOp(1, 1, AluOp::t,"RECIP_CLAMPED")},
+ {op1_recip_ff ,AluOp(1, 1, AluOp::t,"RECIP_FF")},
+ {op1_recip_ieee ,AluOp(1, 1, AluOp::t,"RECIP_IEEE")},
+ {op1_recipsqrt_clamped ,AluOp(1, 1, AluOp::t,"RECIPSQRT_CLAMPED")},
+ {op1_recipsqrt_ff ,AluOp(1, 1, AluOp::t,"RECIPSQRT_FF")},
+ {op1_recipsqrt_ieee1 ,AluOp(1, 1, AluOp::t,"RECIPSQRT_IEEE")},
+ {op1_recip_int ,AluOp(1, 0, AluOp::t,"RECIP_INT")},
+ {op1_recip_uint ,AluOp(1, 0, AluOp::t,"RECIP_UINT")},
+ {op1_recip_64 ,AluOp(1, 1, AluOp::t,"RECIP_64")},
+ {op1_recip_clamped_64 ,AluOp(1, 1, AluOp::t,"RECIP_CLAMPED_64")},
+ {op1_recipsqrt_64 ,AluOp(1, 1, AluOp::t,"RECIPSQRT_64")},
+ {op1_recipsqrt_clamped_64,AluOp(1, 1, AluOp::t,"RECIPSQRT_CLAMPED_64")},
+ {op1_rndne ,AluOp(1, 1, AluOp::a,"RNDNE")},
+ {op1_sqrt_ieee ,AluOp(1, 1, AluOp::t,"SQRT_IEEE")},
+ {op1_sin ,AluOp(1, 1, AluOp::t,"SIN")},
+ {op1_trunc ,AluOp(1, 1, AluOp::a,"TRUNC")},
+ {op1_sqrt_64 ,AluOp(1, 1, AluOp::t,"SQRT_64")},
+ {op1_ubyte0_flt ,AluOp(1, 1, AluOp::v,"UBYTE0_FLT")},
+ {op1_ubyte1_flt ,AluOp(1, 1, AluOp::v,"UBYTE1_FLT")},
+ {op1_ubyte2_flt ,AluOp(1, 1, AluOp::v,"UBYTE2_FLT")},
+ {op1_ubyte3_flt ,AluOp(1, 1, AluOp::v,"UBYTE3_FLT")},
+ {op1_uint_to_flt ,AluOp(1, 0, AluOp::t,"UINT_TO_FLT")},
+ {op1_ffbh_uint ,AluOp(1, 0, AluOp::v,"FFBH_UINT")},
+ {op1_ffbl_int ,AluOp(1, 0, AluOp::v,"FFBL_INT")},
+ {op1_ffbh_int ,AluOp(1, 0, AluOp::v,"FFBH_INT")},
+ {op1_flt_to_uint4 ,AluOp(1, 1, AluOp::v,"FLT_TO_UINT4")},
+ {op1v_flt32_to_flt64 ,AluOp(1, 1, AluOp::a,"FLT32_TO_FLT64")},
+ {op1v_flt64_to_flt32 ,AluOp(1, 1, AluOp::v,"FLT64_TO_FLT32")},
+
+ {op2_add ,AluOp(2, 1, AluOp::a,"ADD")},
+ {op2_bfm_int ,AluOp(2, 0, AluOp::v,"BFM_INT")},
+ {op2_mul ,AluOp(2, 1, AluOp::a,"MUL")},
+ {op2_mul_ieee ,AluOp(2, 1, AluOp::a,"MUL_IEEE")},
+ {op2_max ,AluOp(2, 1, AluOp::a,"MAX")},
+ {op2_min ,AluOp(2, 1, AluOp::a,"MIN")},
+ {op2_max_dx10 ,AluOp(2, 1, AluOp::a,"MAX_DX10")},
+ {op2_min_dx10 ,AluOp(2, 1, AluOp::a,"MIN_DX10")},
+ {op2_sete ,AluOp(2, 1, AluOp::a,"SETE")},
+ {op2_setgt ,AluOp(2, 1, AluOp::a,"SETGT")},
+ {op2_setge ,AluOp(2, 1, AluOp::a,"SETGE")},
+ {op2_setne ,AluOp(2, 1, AluOp::a,"SETNE")},
+ {op2_sete_dx10 ,AluOp(2, 1, AluOp::a,"SETE_DX10")},
+ {op2_setgt_dx10 ,AluOp(2, 1, AluOp::a,"SETGT_DX10")},
+ {op2_setge_dx10 ,AluOp(2, 1, AluOp::a,"SETGE_DX10")},
+ {op2_setne_dx10 ,AluOp(2, 1, AluOp::a,"SETNE_DX10")},
+ {op2_ashr_int ,AluOp(2, 0, AluOp::a,"ASHR_INT")},
+ {op2_lshr_int ,AluOp(2, 0, AluOp::a,"LSHR_INT")},
+ {op2_lshl_int ,AluOp(2, 0, AluOp::a,"LSHL_INT")},
+ {op2_mul_64 ,AluOp(2, 1, AluOp::a,"MUL_64")},
+ {op2_pred_setgt_uint ,AluOp(2, 0, AluOp::a,"PRED_SETGT_UINT")},
+ {op2_pred_setge_uint ,AluOp(2, 0, AluOp::a,"PRED_SETGE_UINT")},
+ {op2_pred_sete ,AluOp(2, 1, AluOp::a,"PRED_SETE")},
+ {op2_pred_setgt ,AluOp(2, 1, AluOp::a,"PRED_SETGT")},
+ {op2_pred_setge ,AluOp(2, 1, AluOp::a,"PRED_SETGE")},
+ {op2_pred_setne ,AluOp(2, 1, AluOp::a,"PRED_SETNE")},
+ {op2_pred_set_pop ,AluOp(2, 1, AluOp::a,"PRED_SET_POP")},
+ {op2_pred_sete_push ,AluOp(2, 1, AluOp::a,"PRED_SETE_PUSH")},
+ {op2_pred_setgt_push ,AluOp(2, 1, AluOp::a,"PRED_SETGT_PUSH")},
+ {op2_pred_setge_push ,AluOp(2, 1, AluOp::a,"PRED_SETGE_PUSH")},
+ {op2_pred_setne_push ,AluOp(2, 1, AluOp::a,"PRED_SETNE_PUSH")},
+ {op2_kille ,AluOp(2, 1, AluOp::a,"KILLE")},
+ {op2_killgt ,AluOp(2, 1, AluOp::a,"KILLGT")},
+ {op2_killge ,AluOp(2, 1, AluOp::a,"KILLGE")},
+ {op2_killne ,AluOp(2, 1, AluOp::a,"KILLNE")},
+ {op2_and_int ,AluOp(2, 0, AluOp::a,"AND_INT")},
+ {op2_or_int ,AluOp(2, 0, AluOp::a,"OR_INT")},
+ {op2_xor_int ,AluOp(2, 0, AluOp::a,"XOR_INT")},
+ {op2_add_int ,AluOp(2, 0, AluOp::a,"ADD_INT")},
+ {op2_sub_int ,AluOp(2, 0, AluOp::a,"SUB_INT")},
+ {op2_max_int ,AluOp(2, 0, AluOp::a,"MAX_INT")},
+ {op2_min_int ,AluOp(2, 0, AluOp::a,"MIN_INT")},
+ {op2_max_uint ,AluOp(2, 0, AluOp::a,"MAX_UINT")},
+ {op2_min_uint ,AluOp(2, 0, AluOp::a,"MIN_UINT")},
+ {op2_sete_int ,AluOp(2, 0, AluOp::a,"SETE_INT")},
+ {op2_setgt_int ,AluOp(2, 0, AluOp::a,"SETGT_INT")},
+ {op2_setge_int ,AluOp(2, 0, AluOp::a,"SETGE_INT")},
+ {op2_setne_int ,AluOp(2, 0, AluOp::a,"SETNE_INT")},
+ {op2_setgt_uint ,AluOp(2, 0, AluOp::a,"SETGT_UINT")},
+ {op2_setge_uint ,AluOp(2, 0, AluOp::a,"SETGE_UINT")},
+ {op2_killgt_uint ,AluOp(2, 0, AluOp::a,"KILLGT_UINT")},
+ {op2_killge_uint ,AluOp(2, 0, AluOp::a,"KILLGE_UINT")},
+ {op2_prede_int ,AluOp(2, 0, AluOp::a,"PREDE_INT")},
+ {op2_pred_setgt_int ,AluOp(2, 0, AluOp::a,"PRED_SETGT_INT")},
+ {op2_pred_setge_int ,AluOp(2, 0, AluOp::a,"PRED_SETGE_INT")},
+ {op2_pred_setne_int ,AluOp(2, 0, AluOp::a,"PRED_SETNE_INT")},
+ {op2_kille_int ,AluOp(2, 0, AluOp::a,"KILLE_INT")},
+ {op2_killgt_int ,AluOp(2, 0, AluOp::a,"KILLGT_INT")},
+ {op2_killge_int ,AluOp(2, 0, AluOp::a,"KILLGE_INT")},
+ {op2_killne_int ,AluOp(2, 0, AluOp::a,"KILLNE_INT")},
+ {op2_pred_sete_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETE_PUSH_INT")},
+ {op2_pred_setgt_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETGT_PUSH_INT")},
+ {op2_pred_setge_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETGE_PUSH_INT")},
+ {op2_pred_setne_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETNE_PUSH_INT")},
+ {op2_pred_setlt_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETLT_PUSH_INT")},
+ {op2_pred_setle_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETLE_PUSH_INT")},
+ {op2_addc_uint ,AluOp(2, 0, AluOp::a,"ADDC_UINT")},
+ {op2_subb_uint ,AluOp(2, 0, AluOp::a,"SUBB_UINT")},
+ {op2_set_mode ,AluOp(2, 0, AluOp::a,"SET_MODE")},
+ {op2_set_lds_size ,AluOp(2, 0, AluOp::a,"SET_LDS_SIZE")},
+ {op2_mullo_int ,AluOp(2, 0, AluOp::t,"MULLO_INT")},
+ {op2_mulhi_int ,AluOp(2, 0, AluOp::t,"MULHI_INT")},
+ {op2_mullo_uint ,AluOp(2, 0, AluOp::t,"MULLO_UINT")},
+ {op2_mulhi_uint ,AluOp(2, 0, AluOp::t,"MULHI_UINT")},
+ {op2_dot_ieee ,AluOp(2, 1, AluOp::v,"DOT_IEEE")},
+ {op2_mulhi_uint24 ,AluOp(2, 0, AluOp::v,"MULHI_UINT24")},
+ {op2_mul_uint24 ,AluOp(2, 0, AluOp::v,"MUL_UINT24")},
+ {op2_sete_64 ,AluOp(2, 1, AluOp::v,"SETE_64")},
+ {op2_setne_64 ,AluOp(2, 1, AluOp::v,"SETNE_64")},
+ {op2_setgt_64 ,AluOp(2, 1, AluOp::v,"SETGT_64")},
+ {op2_setge_64 ,AluOp(2, 1, AluOp::v,"SETGE_64")},
+ {op2_min_64 ,AluOp(2, 1, AluOp::v,"MIN_64")},
+ {op2_max_64 ,AluOp(2, 1, AluOp::v,"MAX_64")},
+ {op2_dot4 ,AluOp(2, 1, AluOp::v,"DOT4")},
+ {op2_dot4_ieee ,AluOp(2, 1, AluOp::v,"DOT4_IEEE")},
+ {op2_cube ,AluOp(2, 1, AluOp::v,"CUBE")},
+ {op2_pred_setgt_64 ,AluOp(2, 1, AluOp::v,"PRED_SETGT_64")},
+ {op2_pred_sete_64 ,AluOp(2, 1, AluOp::v,"PRED_SETE_64")},
+ {op2_pred_setge_64 ,AluOp(2, 1, AluOp::v,"PRED_SETGE_64")},
+ {OP2V_MUL_64 ,AluOp(2, 1, AluOp::v,"MUL_64")},
+ {op2_add_64 ,AluOp(2, 1, AluOp::v,"ADD_64")},
+ {op2_sad_accum_prev_uint ,AluOp(2, 0, AluOp::v,"SAD_ACCUM_PREV_UINT")},
+ {op2_dot ,AluOp(2, 1, AluOp::v,"DOT")},
+ {op2_mul_prev ,AluOp(2, 1, AluOp::v,"MUL_PREV")},
+ {op2_mul_ieee_prev ,AluOp(2, 1, AluOp::v,"MUL_IEEE_PREV")},
+ {op2_add_prev ,AluOp(2, 1, AluOp::v,"ADD_PREV")},
+ {op2_muladd_prev ,AluOp(2, 1, AluOp::v,"MULADD_PREV")},
+ {op2_muladd_ieee_prev ,AluOp(2, 1, AluOp::v,"MULADD_IEEE_PREV")},
+ {op2_interp_xy ,AluOp(2, 1, AluOp::v,"INTERP_XY")},
+ {op2_interp_zw ,AluOp(2, 1, AluOp::v,"INTERP_ZW")},
+ {op2_interp_x ,AluOp(2, 1, AluOp::v,"INTERP_X")},
+ {op2_interp_z ,AluOp(2, 1, AluOp::v,"INTERP_Z")},
+
+ {op3_bfe_uint ,AluOp(3, 0, AluOp::v,"BFE_UINT")},
+ {op3_bfe_int ,AluOp(3, 0, AluOp::v,"BFE_INT")},
+ {op3_bfi_int ,AluOp(3, 0, AluOp::v,"BFI_INT")},
+ {op3_fma ,AluOp(3, 1, AluOp::v,"FMA")},
+ {op3_cndne_64 ,AluOp(3, 1, AluOp::v,"CNDNE_64")},
+ {op3_fma_64 ,AluOp(3, 1, AluOp::v,"FMA_64")},
+ {op3_lerp_uint ,AluOp(3, 0, AluOp::v,"LERP_UINT")},
+ {op3_bit_align_int ,AluOp(3, 0, AluOp::v,"BIT_ALIGN_INT")},
+ {op3_byte_align_int ,AluOp(3, 0, AluOp::v,"BYTE_ALIGN_INT")},
+ {op3_sad_accum_uint ,AluOp(3, 0, AluOp::v,"SAD_ACCUM_UINT")},
+ {op3_sad_accum_hi_uint ,AluOp(3, 0, AluOp::v,"SAD_ACCUM_HI_UINT")},
+ {op3_muladd_uint24 ,AluOp(3, 0, AluOp::v,"MULADD_UINT24")},
+ {op3_lds_idx_op ,AluOp(3, 0, AluOp::x,"LDS_IDX_OP")},
+ {op3_muladd ,AluOp(3, 1, AluOp::a,"MULADD")},
+ {op3_muladd_m2 ,AluOp(3, 1, AluOp::a,"MULADD_M2")},
+ {op3_muladd_m4 ,AluOp(3, 1, AluOp::a,"MULADD_M4")},
+ {op3_muladd_d2 ,AluOp(3, 1, AluOp::a,"MULADD_D2")},
+ {op3_muladd_ieee ,AluOp(3, 1, AluOp::a,"MULADD_IEEE")},
+ {op3_cnde ,AluOp(3, 1, AluOp::a,"CNDE")},
+ {op3_cndgt ,AluOp(3, 1, AluOp::a,"CNDGT")},
+ {op3_cndge ,AluOp(3, 1, AluOp::a,"CNDGE")},
+ {op3_cnde_int ,AluOp(3, 0, AluOp::a,"CNDE_INT")},
+ {op3_cndgt_int ,AluOp(3, 0, AluOp::a,"CNDGT_INT")},
+ {op3_cndge_int ,AluOp(3, 0, AluOp::a,"CNDGE_INT")},
+ {op3_mul_lit ,AluOp(3, 1, AluOp::t,"MUL_LIT")}
+};
+
+const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const = {
+ {ALU_SRC_LDS_OQ_A, {false, "LDS_OQ_A"}},
+ {ALU_SRC_LDS_OQ_B, {false, "LDS_OQ_B"}},
+ {ALU_SRC_LDS_OQ_A_POP, {false, "LDS_OQ_A_POP"}},
+ {ALU_SRC_LDS_OQ_B_POP, {false, "LDS_OQ_B_POP"}},
+ {ALU_SRC_LDS_DIRECT_A, {false, "LDS_DIRECT_A"}},
+ {ALU_SRC_LDS_DIRECT_B, {false, "LDS_DIRECT_B"}},
+ {ALU_SRC_TIME_HI, {false, "TIME_HI"}},
+ {ALU_SRC_TIME_LO, {false, "TIME_LO"}},
+ {ALU_SRC_MASK_HI, {false, "MASK_HI"}},
+ {ALU_SRC_MASK_LO, {false, "MASK_LO"}},
+ {ALU_SRC_HW_WAVE_ID, {false, "HW_WAVE_ID"}},
+ {ALU_SRC_SIMD_ID, {false, "SIMD_ID"}},
+ {ALU_SRC_SE_ID, {false, "SE_ID"}},
+ {ALU_SRC_HW_THREADGRP_ID, {false, "HW_THREADGRP_ID"}},
+ {ALU_SRC_WAVE_ID_IN_GRP, {false, "WAVE_ID_IN_GRP"}},
+ {ALU_SRC_NUM_THREADGRP_WAVES, {false, "NUM_THREADGRP_WAVES"}},
+ {ALU_SRC_HW_ALU_ODD, {false, "HW_ALU_ODD"}},
+ {ALU_SRC_LOOP_IDX, {false, "LOOP_IDX"}},
+ {ALU_SRC_PARAM_BASE_ADDR, {false, "PARAM_BASE_ADDR"}},
+ {ALU_SRC_NEW_PRIM_MASK, {false, "NEW_PRIM_MASK"}},
+ {ALU_SRC_PRIM_MASK_HI, {false, "PRIM_MASK_HI"}},
+ {ALU_SRC_PRIM_MASK_LO, {false, "PRIM_MASK_LO"}},
+ {ALU_SRC_1_DBL_L, {false, "1.0L"}},
+ {ALU_SRC_1_DBL_M, {false, "1.0H"}},
+ {ALU_SRC_0_5_DBL_L, {false, "0.5L"}},
+ {ALU_SRC_0_5_DBL_M, {false, "0.5H"}},
+ {ALU_SRC_0, {false, "0"}},
+ {ALU_SRC_1, {false, "1.0"}},
+ {ALU_SRC_1_INT, {false, "1"}},
+ {ALU_SRC_M_1_INT, {false, "-1"}},
+ {ALU_SRC_0_5, {false, "0.5"}},
+ {ALU_SRC_LITERAL, {true, "ALU_SRC_LITERAL"}},
+ {ALU_SRC_PV, {true, "PV"}},
+ {ALU_SRC_PS, {false, "PS"}}
+};
+
+const std::map<ESDOp, LDSOp> lds_ops = {
+ {DS_OP_ADD , {2, "DS_ADD"}},
+ {DS_OP_SUB , {2, "DS_SUB"}},
+ {DS_OP_RSUB , {2, "DS_RSUB"}},
+ {DS_OP_INC , {2, "DS_INC"}},
+ {DS_OP_DEC , {2, "DS_DEC"}},
+ {DS_OP_MIN_INT , {2, "DS_MIN_INT"}},
+ {DS_OP_MAX_INT , {2, "DS_MAX_INT"}},
+ {DS_OP_MIN_UINT , {2, "DS_MIN_UINT"}},
+ {DS_OP_MAX_UINT , {2, "DS_MAX_UINT"}},
+ {DS_OP_AND , {2, "DS_AND"}},
+ {DS_OP_OR , {2, "DS_OR"}},
+ {DS_OP_XOR , {2, "DS_XOR"}},
+ {DS_OP_MSKOR , {3, "DS_MSKOR"}},
+ {DS_OP_WRITE , {2, "DS_WRITE"}},
+ {DS_OP_WRITE_REL , {3, "DS_WRITE_REL"}},
+ {DS_OP_WRITE2 , {3, "DS_WRITE2"}},
+ {DS_OP_CMP_STORE , {3, "DS_CMP_STORE"}},
+ {DS_OP_CMP_STORE_SPF , {3, "DS_CMP_STORE_SPF"}},
+ {DS_OP_BYTE_WRITE , {2, "DS_BYTE_WRITE"}},
+ {DS_OP_SHORT_WRITE , {2, "DS_SHORT_WRITE"}},
+ {DS_OP_ADD_RET , {2, "DS_ADD_RET"}},
+ {DS_OP_SUB_RET , {2, "DS_SUB_RET"}},
+ {DS_OP_RSUB_RET , {2, "DS_RSUB_RET"}},
+ {DS_OP_INC_RET , {2, "DS_INC_RET"}},
+ {DS_OP_DEC_RET , {2, "DS_DEC_RET"}},
+ {DS_OP_MIN_INT_RET , {2, "DS_MIN_INT_RET"}},
+ {DS_OP_MAX_INT_RET , {2, "DS_MAX_INT_RET"}},
+ {DS_OP_MIN_UINT_RET , {2, "DS_MIN_UINT_RET"}},
+ {DS_OP_MAX_UINT_RET , {2, "DS_MAX_UINT_RET"}},
+ {DS_OP_AND_RET , {2, "DS_AND_RET"}},
+ {DS_OP_OR_RET , {2, "DS_OR_RET"}},
+ {DS_OP_XOR_RET , {2, "DS_XOR_RET"}},
+ {DS_OP_MSKOR_RET , {3, "DS_MSKOR_RET"}},
+ {DS_OP_XCHG_RET , {2, "DS_XCHG_RET"}},
+ {DS_OP_XCHG_REL_RET , {3, "DS_XCHG_REL_RET"}},
+ {DS_OP_XCHG2_RET , {3, "DS_XCHG2_RET"}},
+ {DS_OP_CMP_XCHG_RET , {3, "DS_CMP_XCHG_RET"}},
+ {DS_OP_CMP_XCHG_SPF_RET, {3, "DS_CMP_XCHG_SPF_RET"}},
+ {DS_OP_READ_RET , {1, "DS_READ_RET"}},
+ {DS_OP_READ_REL_RET , {1, "DS_READ_REL_RET"}},
+ {DS_OP_READ2_RET , {2, "DS_READ2_RET"}},
+ {DS_OP_READWRITE_RET , {3, "DS_READWRITE_RET"}},
+ {DS_OP_BYTE_READ_RET , {1, "DS_BYTE_READ_RET"}},
+ {DS_OP_UBYTE_READ_RET, {1, "DS_UBYTE_READ_RET"}},
+ {DS_OP_SHORT_READ_RET, {1, "DS_SHORT_READ_RET"}},
+ {DS_OP_USHORT_READ_RET, {1, "DS_USHORT_READ_RET"}},
+ {DS_OP_ATOMIC_ORDERED_ALLOC_RET , {3, "DS_ATOMIC_ORDERED_ALLOC_RET"}}
+};
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef r600_sfn_alu_defines_h
+#define r600_sfn_alu_defines_h
+
+#include <map>
+#include <bitset>
+
+namespace r600 {
+
+/* ALU op2 instructions 17:7 top three bits alwayss zero. */
+enum EAluOp {
+ op2_add = 0,
+ op2_mul = 1,
+ op2_mul_ieee = 2,
+ op2_max = 3,
+ op2_min = 4,
+ op2_max_dx10 = 5,
+ op2_min_dx10 = 6,
+ op2_sete = 8,
+ op2_setgt = 9,
+ op2_setge = 10,
+ op2_setne = 11,
+ op2_sete_dx10 = 12,
+ op2_setgt_dx10 = 13,
+ op2_setge_dx10 = 14,
+ op2_setne_dx10 = 15,
+ op1_fract = 16,
+ op1_trunc = 17,
+ op1_ceil = 18,
+ op1_rndne = 19,
+ op1_floor = 20,
+ op2_ashr_int = 21,
+ op2_lshr_int = 22,
+ op2_lshl_int = 23,
+ op1_mov = 25,
+ op0_nop = 26,
+ op2_mul_64 = 27,
+ op1_flt64_to_flt32 = 28,
+ op1v_flt32_to_flt64 = 29,
+ op2_pred_setgt_uint = 30,
+ op2_pred_setge_uint = 31,
+ op2_pred_sete = 32,
+ op2_pred_setgt = 33,
+ op2_pred_setge = 34,
+ op2_pred_setne = 35,
+ op1_pred_set_inv = 36,
+ op2_pred_set_pop = 37,
+ op0_pred_set_clr = 38,
+ op1_pred_set_restore = 39,
+ op2_pred_sete_push = 40,
+ op2_pred_setgt_push = 41,
+ op2_pred_setge_push = 42,
+ op2_pred_setne_push = 43,
+ op2_kille = 44,
+ op2_killgt = 45,
+ op2_killge = 46,
+ op2_killne = 47,
+ op2_and_int = 48,
+ op2_or_int = 49,
+ op2_xor_int = 50,
+ op1_not_int = 51,
+ op2_add_int = 52,
+ op2_sub_int = 53,
+ op2_max_int = 54,
+ op2_min_int = 55,
+ op2_max_uint = 56,
+ op2_min_uint = 57,
+ op2_sete_int = 58,
+ op2_setgt_int = 59,
+ op2_setge_int = 60,
+ op2_setne_int = 61,
+ op2_setgt_uint = 62,
+ op2_setge_uint = 63,
+ op2_killgt_uint = 64,
+ op2_killge_uint = 65,
+ op2_prede_int = 66,
+ op2_pred_setgt_int = 67,
+ op2_pred_setge_int = 68,
+ op2_pred_setne_int = 69,
+ op2_kille_int = 70,
+ op2_killgt_int = 71,
+ op2_killge_int = 72,
+ op2_killne_int = 73,
+ op2_pred_sete_push_int = 74,
+ op2_pred_setgt_push_int = 75,
+ op2_pred_setge_push_int = 76,
+ op2_pred_setne_push_int = 77,
+ op2_pred_setlt_push_int = 78,
+ op2_pred_setle_push_int = 79,
+ op1_flt_to_int = 80,
+ op1_bfrev_int = 81,
+ op2_addc_uint = 82,
+ op2_subb_uint = 83,
+ op0_group_barrier = 84,
+ op0_group_seq_begin = 85,
+ op0_group_seq_end = 86,
+ op2_set_mode = 87,
+ op1_set_cf_idx0 = 88,
+ op1_set_cf_idx1 = 89,
+ op2_set_lds_size = 90,
+ op1_exp_ieee = 129,
+ op1_log_clamped = 130,
+ op1_log_ieee = 131,
+ op1_recip_clamped = 132,
+ op1_recip_ff = 133,
+ op1_recip_ieee = 134,
+ op1_recipsqrt_clamped = 135,
+ op1_recipsqrt_ff = 136,
+ op1_recipsqrt_ieee1 = 137,
+ op1_sqrt_ieee = 138,
+ op1_sin = 141,
+ op1_cos = 142,
+ op2_mullo_int = 143,
+ op2_mulhi_int = 144,
+ op2_mullo_uint = 145,
+ op2_mulhi_uint = 146,
+ op1_recip_int = 147,
+ op1_recip_uint = 148,
+ op1_recip_64 = 149,
+ op1_recip_clamped_64 = 150,
+ op1_recipsqrt_64 = 151,
+ op1_recipsqrt_clamped_64 = 152,
+ op1_sqrt_64 = 153,
+ op1_flt_to_uint = 154,
+ op1_int_to_flt = 155,
+ op1_uint_to_flt = 156,
+ op2_bfm_int = 160,
+ op1_flt32_to_flt16 = 162,
+ op1_flt16_to_flt32 = 163,
+ op1_ubyte0_flt = 164,
+ op1_ubyte1_flt = 165,
+ op1_ubyte2_flt = 166,
+ op1_ubyte3_flt = 167,
+ op1_bcnt_int = 170,
+ op1_ffbh_uint = 171,
+ op1_ffbl_int = 172,
+ op1_ffbh_int = 173,
+ op1_flt_to_uint4 = 174,
+ op2_dot_ieee = 175,
+ op1_flt_to_int_rpi = 176,
+ op1_flt_to_int_floor = 177,
+ op2_mulhi_uint24 = 178,
+ op1_mbcnt_32hi_int = 179,
+ op1_offset_to_flt = 180,
+ op2_mul_uint24 = 181,
+ op1_bcnt_accum_prev_int = 182,
+ op1_mbcnt_32lo_accum_prev_int = 183,
+ op2_sete_64 = 184,
+ op2_setne_64 = 185,
+ op2_setgt_64 = 186,
+ op2_setge_64 = 187,
+ op2_min_64 = 188,
+ op2_max_64 = 189,
+ op2_dot4 = 190,
+ op2_dot4_ieee = 191,
+ op2_cube = 192,
+ op1_max4 = 193,
+ op1_frexp_64 = 196,
+ op1_ldexp_64 = 197,
+ op1_fract_64 = 198,
+ op2_pred_setgt_64 = 199,
+ op2_pred_sete_64 = 198,
+ op2_pred_setge_64 = 201,
+ OP2V_MUL_64 = 202,
+ op2_add_64 = 203,
+ op1_mova_int = 204,
+ op1v_flt64_to_flt32 = 205,
+ op1_flt32_to_flt64 = 206,
+ op2_sad_accum_prev_uint = 207,
+ op2_dot = 208,
+ op2_mul_prev = 209,
+ op2_mul_ieee_prev = 210,
+ op2_add_prev = 211,
+ op2_muladd_prev = 212,
+ op2_muladd_ieee_prev = 213,
+ op2_interp_xy = 214,
+ op2_interp_zw = 215,
+ op2_interp_x = 216,
+ op2_interp_z = 217,
+ op0_store_flags = 218,
+ op1_load_store_flags = 219,
+ op0_lds_1a = 220,
+ op0_lds_1a1d = 221,
+ op0_lds_2a = 223,
+ op1_interp_load_p0 = 224,
+ op1_interp_load_p10 = 125,
+ op1_interp_load_p20 = 126,
+ // op 3 all left shift 6
+ op3_bfe_uint = 4<< 6,
+ op3_bfe_int = 5<< 6,
+ op3_bfi_int = 6<< 6,
+ op3_fma = 7<< 6,
+ op3_cndne_64 = 9<< 6,
+ op3_fma_64 = 10<< 6,
+ op3_lerp_uint = 11<< 6,
+ op3_bit_align_int = 12<< 6,
+ op3_byte_align_int = 13<< 6,
+ op3_sad_accum_uint = 14<< 6,
+ op3_sad_accum_hi_uint = 15<< 6,
+ op3_muladd_uint24 = 16<< 6,
+ op3_lds_idx_op = 17<< 6,
+ op3_muladd = 20<< 6,
+ op3_muladd_m2 = 21<< 6,
+ op3_muladd_m4 = 22<< 6,
+ op3_muladd_d2 = 23<< 6,
+ op3_muladd_ieee = 24<< 6,
+ op3_cnde = 25<< 6,
+ op3_cndgt = 26<< 6,
+ op3_cndge = 27<< 6,
+ op3_cnde_int = 28<< 6,
+ op3_cndgt_int = 29<< 6,
+ op3_cndge_int = 30<< 6,
+ op3_mul_lit = 31<< 6
+};
+
+
+
+using AluOpFlags=std::bitset<32>;
+
+struct AluOp {
+ static constexpr int x = 1;
+ static constexpr int y = 2;
+ static constexpr int z = 4;
+ static constexpr int w = 8;
+ static constexpr int v = 15;
+ static constexpr int t = 16;
+ static constexpr int a = 31;
+
+ AluOp(int ns, int f, int um, const char *n):
+ nsrc(ns), is_float(f), unit_mask(um), name(n)
+ {
+ }
+
+ bool can_channel(int flags) const {
+ return flags & unit_mask;
+ }
+
+ int nsrc: 4;
+ int is_float:1;
+ int unit_mask: 5;
+ const char *name;
+};
+
+extern const std::map<EAluOp, AluOp> alu_ops;
+
+enum AluInlineConstants {
+ ALU_SRC_LDS_OQ_A = 219,
+ ALU_SRC_LDS_OQ_B = 220,
+ ALU_SRC_LDS_OQ_A_POP = 221,
+ ALU_SRC_LDS_OQ_B_POP = 222,
+ ALU_SRC_LDS_DIRECT_A = 223,
+ ALU_SRC_LDS_DIRECT_B = 224,
+ ALU_SRC_TIME_HI = 227,
+ ALU_SRC_TIME_LO = 228,
+ ALU_SRC_MASK_HI = 229,
+ ALU_SRC_MASK_LO = 230,
+ ALU_SRC_HW_WAVE_ID = 231,
+ ALU_SRC_SIMD_ID = 232,
+ ALU_SRC_SE_ID = 233,
+ ALU_SRC_HW_THREADGRP_ID = 234,
+ ALU_SRC_WAVE_ID_IN_GRP = 235,
+ ALU_SRC_NUM_THREADGRP_WAVES = 236,
+ ALU_SRC_HW_ALU_ODD = 237,
+ ALU_SRC_LOOP_IDX = 238,
+ ALU_SRC_PARAM_BASE_ADDR = 240,
+ ALU_SRC_NEW_PRIM_MASK = 241,
+ ALU_SRC_PRIM_MASK_HI = 242,
+ ALU_SRC_PRIM_MASK_LO = 243,
+ ALU_SRC_1_DBL_L = 244,
+ ALU_SRC_1_DBL_M = 245,
+ ALU_SRC_0_5_DBL_L = 246,
+ ALU_SRC_0_5_DBL_M = 247,
+ ALU_SRC_0 = 248,
+ ALU_SRC_1 = 249,
+ ALU_SRC_1_INT = 250,
+ ALU_SRC_M_1_INT = 251,
+ ALU_SRC_0_5 = 252,
+ ALU_SRC_LITERAL = 253,
+ ALU_SRC_PV = 254,
+ ALU_SRC_PS = 255,
+ ALU_SRC_PARAM_BASE = 0x1C0,
+ ALU_SRC_UNKNOWN
+};
+
+struct AluInlineConstantDescr {
+ bool use_chan;
+ const char *descr;
+};
+
+extern const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const;
+
+enum ESDOp {
+ DS_OP_ADD = 0,
+ DS_OP_SUB = 1,
+ DS_OP_RSUB = 2,
+ DS_OP_INC = 3,
+ DS_OP_DEC = 4,
+ DS_OP_MIN_INT = 5,
+ DS_OP_MAX_INT = 6,
+ DS_OP_MIN_UINT = 7,
+ DS_OP_MAX_UINT = 8,
+ DS_OP_AND = 9,
+ DS_OP_OR = 10,
+ DS_OP_XOR = 11,
+ DS_OP_MSKOR = 12,
+ DS_OP_WRITE = 13,
+ DS_OP_WRITE_REL = 14,
+ DS_OP_WRITE2 = 15,
+ DS_OP_CMP_STORE = 16,
+ DS_OP_CMP_STORE_SPF = 17,
+ DS_OP_BYTE_WRITE = 18,
+ DS_OP_SHORT_WRITE = 19,
+ DS_OP_ADD_RET = 32,
+ DS_OP_SUB_RET = 33,
+ DS_OP_RSUB_RET = 34,
+ DS_OP_INC_RET = 35,
+ DS_OP_DEC_RET = 36,
+ DS_OP_MIN_INT_RET = 37,
+ DS_OP_MAX_INT_RET = 38,
+ DS_OP_MIN_UINT_RET = 39,
+ DS_OP_MAX_UINT_RET = 40,
+ DS_OP_AND_RET = 41,
+ DS_OP_OR_RET = 42,
+ DS_OP_XOR_RET = 43,
+ DS_OP_MSKOR_RET = 44,
+ DS_OP_XCHG_RET = 45,
+ DS_OP_XCHG_REL_RET = 46,
+ DS_OP_XCHG2_RET = 47,
+ DS_OP_CMP_XCHG_RET = 48,
+ DS_OP_CMP_XCHG_SPF_RET = 49,
+ DS_OP_READ_RET = 50,
+ DS_OP_READ_REL_RET = 51,
+ DS_OP_READ2_RET = 52,
+ DS_OP_READWRITE_RET = 53,
+ DS_OP_BYTE_READ_RET = 54,
+ DS_OP_UBYTE_READ_RET = 55,
+ DS_OP_SHORT_READ_RET = 56,
+ DS_OP_USHORT_READ_RET = 57,
+ DS_OP_ATOMIC_ORDERED_ALLOC_RET = 63,
+ DS_OP_INVALID = 64
+};
+
+struct LDSOp {
+ int nsrc;
+ const char *name;
+};
+
+extern const std::map<ESDOp, LDSOp> lds_ops;
+
+}
+
+#endif // ALU_DEFINES_H
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_callstack.h"
+
+namespace r600 {
+
+CallStack::CallStack(r600_bytecode& bc):
+ m_bc(bc)
+{
+
+}
+
+CallStack::~CallStack()
+{
+}
+
+int CallStack::push(unsigned type)
+{
+ switch (type) {
+ case FC_PUSH_VPM:
+ ++m_bc.stack.push;
+ break;
+ case FC_PUSH_WQM:
+ ++m_bc.stack.push_wqm;
+ break;
+ case FC_LOOP:
+ ++m_bc.stack.loop;
+ break;
+ default:
+ assert(0);
+ }
+
+ return update_max_depth(type);
+}
+
+void CallStack::pop(unsigned type)
+{
+ switch(type) {
+ case FC_PUSH_VPM:
+ --m_bc.stack.push;
+ assert(m_bc.stack.push >= 0);
+ break;
+ case FC_PUSH_WQM:
+ --m_bc.stack.push_wqm;
+ assert(m_bc.stack.push_wqm >= 0);
+ break;
+ case FC_LOOP:
+ --m_bc.stack.loop;
+ assert(m_bc.stack.loop >= 0);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+int CallStack::update_max_depth(unsigned type)
+{
+
+ r600_stack_info& stack = m_bc.stack;
+ int elements;
+ int entries;
+
+ int entry_size = stack.entry_size;
+
+ elements = (stack.loop + stack.push_wqm ) * entry_size;
+ elements += stack.push;
+
+ /* These next three lines are EVERGREEN specific and should
+ * be moved to a virtual function when other chipsets are to
+ * be supported */
+ assert(m_bc.chip_class == EVERGREEN);
+ if (type == FC_PUSH_VPM || stack.push > 0) {
+ elements += 1;
+ }
+
+ entry_size = 4;
+
+ entries = (elements + (entry_size - 1)) / entry_size;
+
+ if (entries > stack.max_entries)
+ stack.max_entries = entries;
+
+ return elements;
+}
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_CALLSTACK_HH
+#define SFN_CALLSTACK_HH
+
+#include "gallium/drivers/r600/r600_asm.h"
+
+namespace r600 {
+
+class CallStack {
+public:
+ CallStack(r600_bytecode& bc);
+ ~CallStack();
+ int push(unsigned type);
+ void pop(unsigned type);
+ int update_max_depth(unsigned type);
+private:
+ r600_bytecode& m_bc;
+};
+
+}
+
+#endif // SFN_CALLSTACK_HH
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_conditionaljumptracker.h"
+#include "sfn_debug.h"
+
+#include <stack>
+#include <vector>
+#include <memory>
+#include <iostream>
+
+namespace r600 {
+
+using std::stack;
+using std::vector;
+using std::shared_ptr;
+
+struct StackFrame {
+
+ StackFrame(r600_bytecode_cf *s, JumpType t):
+ type(t),
+ start(s)
+ {}
+
+ virtual ~StackFrame();
+
+ JumpType type;
+ r600_bytecode_cf *start;
+ vector<r600_bytecode_cf *> mid;
+
+ virtual void fixup_mid(r600_bytecode_cf *cf) = 0;
+ virtual void fixup_pop(r600_bytecode_cf *final) = 0;
+};
+
+using PStackFrame = shared_ptr<StackFrame>;
+
+struct IfFrame : public StackFrame {
+ IfFrame(r600_bytecode_cf *s);
+ void fixup_mid(r600_bytecode_cf *cf) override;
+ void fixup_pop(r600_bytecode_cf *final) override;
+};
+
+struct LoopFrame : public StackFrame {
+ LoopFrame(r600_bytecode_cf *s);
+ void fixup_mid(r600_bytecode_cf *cf) override;
+ void fixup_pop(r600_bytecode_cf *final) override;
+};
+
+struct ConditionalJumpTrackerImpl {
+ ConditionalJumpTrackerImpl();
+ stack<PStackFrame> m_jump_stack;
+ stack<PStackFrame> m_loop_stack;
+ int m_current_loop_stack_pos;
+};
+
+ConditionalJumpTrackerImpl::ConditionalJumpTrackerImpl():
+ m_current_loop_stack_pos(0)
+{
+
+}
+
+ConditionalJumpTracker::~ConditionalJumpTracker()
+{
+ delete impl;
+}
+
+ConditionalJumpTracker::ConditionalJumpTracker()
+{
+ impl = new ConditionalJumpTrackerImpl();
+}
+
+void ConditionalJumpTracker::push(r600_bytecode_cf *start, JumpType type)
+{
+ PStackFrame f;
+ switch (type) {
+ case jt_if:
+ f.reset(new IfFrame(start));
+ break;
+ case jt_loop:
+ f.reset(new LoopFrame(start));
+ impl->m_loop_stack.push(f);
+ break;
+ }
+ impl->m_jump_stack.push(f);
+}
+
+bool ConditionalJumpTracker::pop(r600_bytecode_cf *final, JumpType type)
+{
+ if (impl->m_jump_stack.empty())
+ return false;
+
+ auto& frame = *impl->m_jump_stack.top();
+ if (frame.type != type)
+ return false;
+
+ frame.fixup_pop(final);
+ if (frame.type == jt_loop)
+ impl->m_loop_stack.pop();
+ impl->m_jump_stack.pop();
+ return true;
+}
+
+bool ConditionalJumpTracker::add_mid(r600_bytecode_cf *source, JumpType type)
+{
+ if (impl->m_jump_stack.empty()) {
+ sfn_log << "Jump stack empty\n";
+ return false;
+ }
+
+ PStackFrame pframe;
+ if (type == jt_loop) {
+ if (impl->m_loop_stack.empty()) {
+ sfn_log << "Loop jump stack empty\n";
+ return false;
+ }
+ pframe = impl->m_loop_stack.top();
+ } else {
+ pframe = impl->m_jump_stack.top();
+ }
+
+ pframe->mid.push_back(source);
+ pframe->fixup_mid(source);
+ return true;
+}
+
+IfFrame::IfFrame(r600_bytecode_cf *s):
+ StackFrame (s, jt_if)
+{
+}
+
+StackFrame::~StackFrame()
+{
+}
+
+void IfFrame::fixup_mid(r600_bytecode_cf *source)
+{
+ /* JUMP target is ELSE */
+ start->cf_addr = source->id;
+}
+
+void IfFrame::fixup_pop(r600_bytecode_cf *final)
+{
+ /* JUMP or ELSE target is one past last CF instruction */
+ unsigned offset = final->eg_alu_extended ? 4 : 2;
+ auto src = mid.empty() ? start : mid[0];
+ src->cf_addr = final->id + offset;
+ src->pop_count = 1;
+}
+
+LoopFrame::LoopFrame(r600_bytecode_cf *s):
+ StackFrame(s, jt_loop)
+{
+}
+
+void LoopFrame::fixup_mid(UNUSED r600_bytecode_cf *mid)
+{
+}
+
+void LoopFrame::fixup_pop(r600_bytecode_cf *final)
+{
+ /* LOOP END addess is past LOOP START */
+ final->cf_addr = start->id + 2;
+
+ /* LOOP START addess is past LOOP END*/
+ start->cf_addr = final->id + 2;
+
+ /* BREAK amd CONINUE point at LOOP END*/
+ for (auto m : mid)
+ m->cf_addr = final->id;
+}
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_CONDITIONALJUMPTRACKER_H
+#define SFN_CONDITIONALJUMPTRACKER_H
+
+#include "gallium/drivers/r600/r600_asm.h"
+
+namespace r600 {
+
+enum JumpType {
+ jt_loop,
+ jt_if
+};
+
+/**
+ Class to link the jump locations
+
+*/
+
+
+class ConditionalJumpTracker
+{
+public:
+ ConditionalJumpTracker();
+ ~ConditionalJumpTracker();
+
+ /* Mark the start of a loop or a if/else */
+
+ void push(r600_bytecode_cf *start, JumpType type);
+
+ /* Mark the end of a loop or a if/else and fixup the jump sites */
+ bool pop(r600_bytecode_cf *final, JumpType type);
+
+ /* Add middle sites to the call frame i.e. continue,
+ * break inside loops, and else in if-then-else constructs.
+ */
+ bool add_mid(r600_bytecode_cf *source, JumpType type);
+
+private:
+ struct ConditionalJumpTrackerImpl * impl;
+};
+
+}
+
+#endif // SFN_CONDITIONALJUMPTRACKER_H
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_debug.h"
+#include "sfn_debug.h"
+
+namespace r600 {
+
+class stderr_streambuf : public std::streambuf
+{
+public:
+ stderr_streambuf();
+protected:
+ int sync();
+ int overflow(int c);
+ std::streamsize xsputn ( const char *s, std::streamsize n );
+};
+
+stderr_streambuf::stderr_streambuf()
+{
+
+}
+
+int stderr_streambuf::sync()
+{
+ fflush(stderr);
+ return 0;
+}
+
+int stderr_streambuf::overflow(int c)
+{
+ fputc(c, stderr);
+ return 0;
+}
+
+static const struct debug_named_value sfn_debug_options[] = {
+ {"instr", SfnLog::instr, "Log all consumed nir instructions"},
+ {"ir", SfnLog::r600ir, "Log created R600 IR"},
+ {"cc", SfnLog::cc, "Log R600 IR to assembly code creation"},
+ {"noerr", SfnLog::err, "Don't log shader conversion errors"},
+ {"si", SfnLog::shader_info, "Log shader info (non-zero values)"},
+ {"ts", SfnLog::test_shader, "Log shaders in tests"},
+ {"reg", SfnLog::reg, "Log register allocation and lookup"},
+ {"io", SfnLog::io, "Log shader in and output"},
+ {"ass", SfnLog::assembly, "Log IR to assembly conversion"},
+ {"flow", SfnLog::flow, "Log Flow instructions"},
+ {"merge", SfnLog::merge, "Log register merge operations"},
+ {"nomerge", SfnLog::nomerge, "Skup egister merge step"},
+ {"tex", SfnLog::tex, "Log texture ops"},
+ {"trans", SfnLog::trans, "Log generic translation messages"},
+ DEBUG_NAMED_VALUE_END
+};
+
+SfnLog sfn_log;
+
+std::streamsize stderr_streambuf::xsputn ( const char *s, std::streamsize n )
+{
+ std::streamsize i = n;
+ while (i--)
+ fputc(*s++, stderr);
+ return n;
+}
+
+SfnLog::SfnLog():
+ m_active_log_flags(0),
+ m_log_mask(0),
+ m_output(new stderr_streambuf())
+{
+ m_log_mask = debug_get_flags_option("R600_NIR_DEBUG", sfn_debug_options, 0);
+ m_log_mask ^= err;
+}
+
+SfnLog& SfnLog::operator << (SfnLog::LogFlag const l)
+{
+ m_active_log_flags = l;
+ return *this;
+}
+
+SfnLog& SfnLog::operator << (UNUSED std::ostream & (*f)(std::ostream&))
+{
+ if (m_active_log_flags & m_log_mask)
+ m_output << f;
+ return *this;
+}
+
+SfnLog& SfnLog::operator << (nir_shader& sh)
+{
+ if (m_active_log_flags & m_log_mask)
+ nir_print_shader(&sh, stderr);
+ return *this;
+}
+
+SfnLog& SfnLog::operator << (nir_instr &instr)
+{
+ if (m_active_log_flags & m_log_mask)
+ nir_print_instr(&instr, stderr);
+ return *this;
+}
+
+SfnTrace::SfnTrace(SfnLog::LogFlag flag, const char *msg):
+ m_flag(flag),
+ m_msg(msg)
+{
+ sfn_log << m_flag << std::string(" ", 2 * m_indention++)
+ << "BEGIN: " << m_msg << "\n";
+}
+
+SfnTrace::~SfnTrace()
+{
+ sfn_log << m_flag << std::string(" ", 2 * m_indention--)
+ << "END: " << m_msg << "\n";
+}
+
+int SfnTrace::m_indention = 0;
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_STDERR_STREAMLOG_H
+#define SFN_STDERR_STREAMLOG_H
+
+
+#include <streambuf>
+#include <ostream>
+#include <fstream>
+#include "compiler/nir/nir.h"
+
+namespace r600 {
+/* Implemnt some logging for shader-from-nir
+
+*/
+
+class SfnLog {
+public:
+ enum LogFlag {
+ instr = 1 << 0,
+ r600ir = 1 << 1,
+ cc = 1 << 2,
+ err = 1 << 3,
+ shader_info = 1 << 4,
+ test_shader = 1 << 5,
+ reg = 1 << 6,
+ io = 1 << 7,
+ assembly = 1 << 8,
+ flow = 1 << 9,
+ merge = 1 << 10,
+ tex = 1 << 11,
+ trans = 1 << 12,
+ all = (1 << 13) - 1,
+ nomerge = 1 << 16,
+ };
+
+ SfnLog();
+
+ /** a special handling to set the output level "inline"
+ \param l the level of the following messages
+ */
+ SfnLog& operator << (LogFlag const l);
+
+ /* general output routine; output is only given, if the log flags and the
+ * currently active log mask overlap
+ \returns a reference to this object
+ */
+ template <class T>
+ SfnLog& operator << (const T& text)
+ {
+ if (m_active_log_flags & m_log_mask)
+ m_output << text;
+
+ return *this;
+ }
+
+ /* A funny construct to enable std::endl to work on this stream
+ idea of Dave Brondsema:
+ http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8567
+ */
+ SfnLog& operator << (std::ostream & (*f)(std::ostream&));
+
+ SfnLog& operator << (nir_shader &sh);
+
+ SfnLog& operator << (nir_instr& instr);
+
+ int has_debug_flag(uint64_t flag) {
+ return (m_log_mask & flag) == flag;
+ }
+
+private:
+ uint64_t m_active_log_flags;
+ uint64_t m_log_mask;
+ std::ostream m_output;
+};
+
+class SfnTrace {
+public:
+ SfnTrace(SfnLog::LogFlag flag, const char *msg);
+ ~SfnTrace();
+private:
+ SfnLog::LogFlag m_flag;
+ const char *m_msg;
+ static int m_indention;
+};
+
+
+#ifndef NDEBUG
+#define SFN_TRACE_FUNC(LEVEL, MSG) SfnTrace __trace(LEVEL, MSG)
+#else
+#define SFN_TRACE_FUNC(LEVEL, MSG)
+#endif
+
+extern SfnLog sfn_log;
+
+}
+#endif // SFN_STDERR_STREAMBUF_H
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef sfn_defines_h
+#define sfn_defines_h
+
+#include "../r600_isa.h"
+
+namespace r600 {
+
+
+enum EGWSOpCode {
+ cf_sema_v = 0,
+ cf_sema_p = 1,
+ cf_gws_barrier = 2,
+ cf_gws_init = 3,
+};
+
+/* CF ALU instructions [29:26], highest bit always set. */
+enum ECFAluOpCode {
+ cf_alu_undefined = 0,
+ cf_alu = CF_OP_ALU,
+ cf_alu_push_before = CF_OP_ALU_PUSH_BEFORE,
+ cf_alu_pop_after = CF_OP_ALU_POP_AFTER,
+ cf_alu_pop2_after = CF_OP_ALU_POP2_AFTER,
+ cf_alu_extended = CF_OP_ALU_EXT,
+ cf_alu_continue = CF_OP_ALU_CONTINUE,
+ cf_alu_break = CF_OP_ALU_BREAK,
+ cf_alu_else_after = CF_OP_ALU_ELSE_AFTER,
+};
+
+enum ECFAluOpCodeEG {
+ eg_cf_alu_undefined = 0,
+ eg_cf_alu = 8,
+ eg_cf_alu_push_before = 9,
+ eg_cf_alu_pop_after = 10,
+ eg_cf_alu_pop2_after = 11,
+ eg_cf_alu_extended = 12,
+ eg_cf_alu_continue = 13,
+ eg_cf_alu_break = 14,
+ eg_cf_alu_else_after = 15,
+};
+
+
+enum ECFOpCode {
+ cf_nop = CF_OP_NOP,
+ cf_tc = CF_OP_TEX,
+ cf_vc = CF_OP_VTX,
+ cf_gds = CF_OP_GDS,
+ cf_loop_start = CF_OP_LOOP_START,
+ cf_loop_end = CF_OP_LOOP_END,
+ cf_loop_start_dx10 = CF_OP_LOOP_START_DX10,
+ cf_loop_start_no_al = CF_OP_LOOP_START_NO_AL,
+ cf_loop_continue = CF_OP_LOOP_CONTINUE,
+ cf_loop_break = CF_OP_LOOP_BREAK,
+ cf_jump = CF_OP_JUMP,
+ cf_push = CF_OP_PUSH,
+ cf_else = CF_OP_ELSE,
+ cf_pop = CF_OP_POP,
+ /* 15 - 17 reserved */
+ cf_call = CF_OP_CALL,
+ cf_call_fs = CF_OP_CALL_FS,
+ cf_return = CF_OP_RET,
+ cf_emit_vertex = CF_OP_EMIT_VERTEX,
+ cf_emit_cut_vertex = CF_OP_EMIT_CUT_VERTEX,
+ cf_cut_vertex = CF_OP_CUT_VERTEX,
+ cf_kill = CF_OP_KILL,
+ /* 25 reserved */
+ cf_wait_ack = CF_OP_WAIT_ACK,
+ cf_tc_ack = CF_OP_TEX_ACK,
+ cf_vc_ack = CF_OP_VTX_ACK,
+ cf_jump_table = CF_OP_JUMPTABLE,
+ cf_global_wave_sync = CF_OP_WAVE_SYNC,
+ cf_halt = CF_OP_HALT,
+ /* gap 32-63*/
+ cf_mem_stream0_buf0 = CF_OP_MEM_STREAM0_BUF0,
+ cf_mem_stream0_buf1 = CF_OP_MEM_STREAM0_BUF1,
+ cf_mem_stream0_buf2 = CF_OP_MEM_STREAM0_BUF2,
+ cf_mem_stream0_buf3 = CF_OP_MEM_STREAM0_BUF3,
+
+ cf_mem_stream1_buf0 = CF_OP_MEM_STREAM1_BUF0,
+ cf_mem_stream1_buf1 = CF_OP_MEM_STREAM1_BUF1,
+ cf_mem_stream1_buf2 = CF_OP_MEM_STREAM1_BUF2,
+ cf_mem_stream1_buf3 = CF_OP_MEM_STREAM1_BUF3,
+
+ cf_mem_stream2_buf0 = CF_OP_MEM_STREAM2_BUF0,
+ cf_mem_stream2_buf1 = CF_OP_MEM_STREAM2_BUF1,
+ cf_mem_stream2_buf2 = CF_OP_MEM_STREAM2_BUF2,
+ cf_mem_stream2_buf3 = CF_OP_MEM_STREAM2_BUF3,
+
+ cf_mem_stream3_buf0 = CF_OP_MEM_STREAM3_BUF0,
+ cf_mem_stream3_buf1 = CF_OP_MEM_STREAM3_BUF1,
+ cf_mem_stream3_buf2 = CF_OP_MEM_STREAM3_BUF2,
+ cf_mem_stream3_buf3 = CF_OP_MEM_STREAM3_BUF3,
+
+ cf_mem_write_scratch = CF_OP_MEM_SCRATCH ,
+ /* reserved 81 */
+ cf_mem_ring = CF_OP_MEM_RING,
+ cf_export = CF_OP_EXPORT,
+ cf_export_done = CF_OP_EXPORT_DONE,
+ cf_mem_export = CF_OP_MEM_EXPORT,
+ cf_mem_rat = CF_OP_MEM_RAT,
+ cf_mem_rat_cacheless = CF_OP_MEM_RAT_NOCACHE,
+
+ cf_mem_ring1 = CF_OP_MEM_RING1,
+ cf_mem_ring2 = CF_OP_MEM_RING2,
+ cf_mem_ring3 = CF_OP_MEM_RING3,
+ cf_mem_export_combined = CF_OP_MEM_MEM_COMBINED,
+ cf_mem_rat_combined_cacheless = CF_OP_MEM_RAT_COMBINED_NOCACHE
+
+};
+
+enum ECFOpCodeEG {
+ eg_cf_nop = 0,
+ eg_cf_tc = 1,
+ eg_cf_vc = 2,
+ eg_cf_gds = 3,
+ eg_cf_loop_start = 4,
+ eg_cf_loop_end = 5,
+ eg_cf_loop_start_dx10 = 6,
+ eg_cf_loop_start_no_al = 7,
+ eg_cf_loop_continue = 8,
+ eg_cf_loop_break = 9,
+ eg_cf_jump = 10,
+ eg_cf_push = 11,
+ eg_cf_else = 13,
+ eg_cf_pop = 14,
+ /* 15 - 17 reserved */
+ eg_cf_call = 18,
+ eg_cf_call_fs,
+ eg_cf_return,
+ eg_cf_emit_vertex,
+ eg_cf_emit_cut_vertex,
+ eg_cf_cut_vertex,
+ eg_cf_kill,
+ /* 25 reserved */
+ eg_cf_wait_ack = 26,
+ eg_cf_tc_ack,
+ eg_cf_vc_ack,
+ eg_cf_jump_table,
+ eg_cf_global_wave_sync,
+ eg_cf_halt,
+ /* gap 32-63*/
+ eg_cf_mem_stream0_buf0 = 64,
+ eg_cf_mem_stream0_buf1,
+ eg_cf_mem_stream0_buf2,
+ eg_cf_mem_stream0_buf3,
+
+ eg_cf_mem_stream1_buf0,
+ eg_cf_mem_stream1_buf1,
+ eg_cf_mem_stream1_buf2,
+ eg_cf_mem_stream1_buf3,
+
+ eg_cf_mem_stream2_buf0,
+ eg_cf_mem_stream2_buf1,
+ eg_cf_mem_stream2_buf2,
+ eg_cf_mem_stream2_buf3,
+
+ eg_cf_mem_stream3_buf0,
+ eg_cf_mem_stream3_buf1,
+ eg_cf_mem_stream3_buf2,
+ eg_cf_mem_stream3_buf3,
+
+ eg_cf_mem_write_scratch,
+ /* reserved 81 */
+ eg_cf_mem_ring = 82,
+ eg_cf_export,
+ eg_cf_export_done,
+ eg_cf_mem_export,
+ eg_cf_mem_rat,
+ eg_cf_mem_rat_cacheless,
+
+ eg_cf_mem_ring1,
+ eg_cf_mem_ring2,
+ eg_cf_mem_ring3,
+ eg_cf_mem_export_combined,
+ eg_cf_mem_rat_combined_cacheless
+};
+
+
+enum EVFetchInstr {
+ vc_fetch = FETCH_OP_VFETCH,
+ vc_semantic = FETCH_OP_SEMFETCH,
+ vc_get_buf_resinfo = FETCH_OP_GET_BUFFER_RESINFO,
+ vc_read_scratch = FETCH_OP_READ_SCRATCH,
+ vc_unknown
+};
+
+enum EVFetchType {
+ vertex_data = 0,
+ instance_data = 1,
+ no_index_offset = 2
+};
+
+enum EVTXDataFormat {
+ fmt_invalid = 0,
+ fmt_8 = 1,
+ fmt_4_4 = 2,
+ fmt_3_3_2 = 3,
+ fmt_reserved_4 = 4,
+ fmt_16 = 5,
+ fmt_16_float = 6,
+ fmt_8_8 = 7,
+ fmt_5_6_5 = 8,
+ fmt_6_5_5 = 9,
+ fmt_1_5_5_5 = 10,
+ fmt_4_4_4_4 = 11,
+ fmt_5_5_5_1 = 12,
+ fmt_32 = 13,
+ fmt_32_float = 14,
+ fmt_16_16 = 15,
+ fmt_16_16_float = 16,
+ fmt_8_24 = 17,
+ fmt_8_24_float = 18,
+ fmt_24_8 = 19,
+ fmt_24_8_float = 20,
+ fmt_10_11_11 = 21,
+ fmt_10_11_11_float = 22,
+ fmt_11_11_10 = 23,
+ fmt_11_11_10_float = 24,
+ fmt_2_10_10_10 = 25,
+ fmt_8_8_8_8 = 26,
+ fmt_10_10_10_2 = 27,
+ fmt_x24_8_32_float = 28,
+ fmt_32_32 = 29,
+ fmt_32_32_float = 30,
+ fmt_16_16_16_16 = 31,
+ fmt_16_16_16_16_float = 32,
+ fmt_reserved_33 = 33,
+ fmt_32_32_32_32 = 34,
+ fmt_32_32_32_32_float = 35,
+ fmt_reserved_36 = 36,
+ fmt_1 = 37,
+ fmt_1_reversed = 38,
+ fmt_gb_gr = 39,
+ fmt_bg_rg = 40,
+ fmt_32_as_8 = 41,
+ fmt_32_as_8_8 = 42,
+ fmt_5_9_9_9_sharedexp = 43,
+ fmt_8_8_8 = 44,
+ fmt_16_16_16 = 45,
+ fmt_16_16_16_float = 46,
+ fmt_32_32_32 = 47,
+ fmt_32_32_32_float = 48,
+ fmt_bc1 = 49,
+ fmt_bc2 = 50,
+ fmt_bc3 = 51,
+ fmt_bc4 = 52,
+ fmt_bc5 = 53,
+ fmt_apc0 = 54,
+ fmt_apc1 = 55,
+ fmt_apc2 = 56,
+ fmt_apc3 = 57,
+ fmt_apc4 = 58,
+ fmt_apc5 = 59,
+ fmt_apc6 = 60,
+ fmt_apc7 = 61,
+ fmt_ctx1 = 62,
+ fmt_reserved_63 = 63
+};
+
+enum EVFetchNumFormat {
+ vtx_nf_norm = 0,
+ vtx_nf_int = 1,
+ vtx_nf_scaled = 2
+};
+
+enum EVFetchEndianSwap {
+ vtx_es_none = 0,
+ vtx_es_8in16 = 1,
+ vtx_es_8in32 = 2
+};
+
+enum EVFetchFlagShift {
+ vtx_fetch_whole_quad,
+ vtx_use_const_field,
+ vtx_format_comp_signed,
+ vtx_srf_mode,
+ vtx_buf_no_stride,
+ vtx_alt_const,
+ vtx_use_tc,
+ vtx_vpm,
+ vtx_unknwon
+};
+
+enum EBufferIndexMode {
+ bim_none,
+ bim_zero,
+ bim_one,
+ bim_invalid
+};
+
+}
+
+#endif // DEFINES_H
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "sfn_emitaluinstruction.h"
+#include "sfn_debug.h"
+
+#include "gallium/drivers/r600/r600_shader.h"
+
+namespace r600 {
+
+using std::vector;
+
+EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor):
+ EmitInstruction (processor)
+{
+
+}
+
+bool EmitAluInstruction::do_emit(nir_instr* ir)
+{
+ const nir_alu_instr& instr = *nir_instr_as_alu(ir);
+
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *ir
+ << " bitsize: " << static_cast<int>(instr.dest.dest.ssa.bit_size)
+ << "' (" << __func__ << ")\n";
+
+ split_constants(instr);
+
+ switch (instr.op) {
+ case nir_op_b2f32: return emit_alu_b2f(instr);
+ case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int);
+ case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10);
+ case nir_op_mov:return emit_alu_op1(instr, op1_mov);
+ case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc);
+ case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs});
+ case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg});
+ case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp});
+ case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee);
+ case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1);
+ case nir_op_fsin: return emit_alu_trig_op1(instr, op1_sin);
+ case nir_op_fcos: return emit_alu_trig_op1(instr, op1_cos);
+ case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee);
+ case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped);
+
+ case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne);
+ case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee);
+ case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt);
+ case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt);
+ case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int);
+ case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint);
+
+ case nir_op_fceil: return emit_alu_op1(instr, op1_ceil);
+ case nir_op_ffract: return emit_alu_op1(instr, op1_fract);
+ case nir_op_ffloor: return emit_alu_op1(instr, op1_floor);
+
+ case nir_op_fsign: return emit_fsign(instr);
+ case nir_op_fdph: return emit_fdph(instr);
+
+ case nir_op_ibitfield_extract: return emit_bitfield_extract(instr, op3_bfe_int);
+ case nir_op_ubitfield_extract: return emit_bitfield_extract(instr, op3_bfe_uint);
+ case nir_op_bitfield_insert: return emit_bitfield_insert(instr);
+ case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int);
+ case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int);
+
+ case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int);
+ case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int);
+ case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int);
+ case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int);
+ case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int);
+ case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
+ case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int);
+ case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int);
+ case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int);
+ case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int);
+ case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int);
+ case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint);
+ case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint);
+ case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint);
+ case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int);
+ case nir_op_inot: return emit_alu_op1(instr, op1_not_int);
+ case nir_op_iabs: return emit_alu_iabs(instr);
+ case nir_op_ineg: return emit_alu_ineg(instr);
+ case nir_op_idiv: return emit_alu_div_int(instr, true, false);
+ case nir_op_udiv: return emit_alu_div_int(instr, false, false);
+ case nir_op_umod: return emit_alu_div_int(instr, false, true);
+ case nir_op_isign: return emit_alu_isign(instr);
+
+ case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint);
+ case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
+ case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int);
+
+ case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
+
+ case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10);
+ case nir_op_fne: return emit_alu_op2(instr, op2_setne_dx10);
+ case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10);
+
+ case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10);
+ case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10);
+ case nir_op_fmul: return emit_alu_op2(instr, op2_mul_ieee);
+ case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int);
+ case nir_op_fadd: return emit_alu_op2(instr, op2_add);
+ case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1);
+ case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int);
+ case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int);
+ case nir_op_fdot2: return emit_dot(instr, 2);
+ case nir_op_fdot3: return emit_dot(instr, 3);
+ case nir_op_fdot4: return emit_dot(instr, 4);
+
+ case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
+ case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
+ case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
+
+ case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
+ case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
+ case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
+
+ case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
+ case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
+ case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
+
+ case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
+ case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
+ case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
+
+
+ case nir_op_ffma: return emit_alu_op3(instr, op3_muladd_ieee);
+ case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1});
+ case nir_op_vec2: return emit_create_vec(instr, 2);
+ case nir_op_vec3: return emit_create_vec(instr, 3);
+ case nir_op_vec4: return emit_create_vec(instr, 4);
+
+ case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int);
+ case nir_op_ufind_msb: return emit_find_msb(instr, false);
+ case nir_op_ifind_msb: return emit_find_msb(instr, true);
+ case nir_op_b2i32: return emit_b2i32(instr);
+ case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr);
+ case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0);
+ case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1);
+ case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr);
+ case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr);
+ case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr);
+
+
+ /* These are in the ALU instruction list, but they should be texture instructions */
+ case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true);
+ case nir_op_fddx_coarse:
+ case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
+
+ case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v, true);
+ case nir_op_fddy_coarse:
+ case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false);
+
+ default:
+ return false;
+ }
+}
+
+void EmitAluInstruction::split_constants(const nir_alu_instr& instr)
+{
+ const nir_op_info *op_info = &nir_op_infos[instr.op];
+ if (op_info->num_inputs < 2)
+ return;
+
+ int nconst = 0;
+ std::array<PValue,4> c;
+ std::array<int,4> idx;
+ for (unsigned i = 0; i < op_info->num_inputs; ++i) {
+ PValue src = from_nir(instr.src[i], 0);
+ assert(src);
+ if (src->type() == Value::kconst) {
+ c[nconst] = src;
+
+ idx[nconst++] = i;
+ }
+ }
+ if (nconst < 2)
+ return;
+
+ unsigned sel = c[0]->sel();
+ sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ;
+
+ for (int i = 1; i < nconst; ++i) {
+ sfn_log << "sel[" << i << "] = " << c[i]->sel() << "\n";
+ if (c[i]->sel() != sel) {
+ load_uniform(instr.src[idx[i]]);
+ }
+ }
+}
+
+bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr)
+{
+ if (instr.src[0].negate || instr.src[0].abs) {
+ std::cerr << "source modifiers not supported with int ops\n";
+ return false;
+ }
+
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i),
+ from_nir(instr.src[0], i), write);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode,
+ const AluOpFlags& flags)
+{
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(opcode, from_nir(instr.dest, i),
+ from_nir(instr.src[0], i), write);
+
+ if (flags.test(alu_src0_abs) || instr.src[0].abs)
+ ir->set_flag(alu_src0_abs);
+
+ if (instr.src[0].negate ^ flags.test(alu_src0_neg))
+ ir->set_flag(alu_src0_neg);
+
+ if (flags.test(alu_dst_clamp) || instr.dest.saturate)
+ ir->set_flag(alu_dst_clamp);
+
+ emit_instruction(ir);
+ }
+ }
+ make_last(ir);
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_trig_op1(const nir_alu_instr& instr, EAluOp opcode)
+{
+ // normalize by dividing by 2*PI, shift by 0.5, take fraction, and
+ // then shift back
+
+ const float inv_2_pi = 0.15915494f;
+
+ PValue v[4]; // this might need some additional temp register creation
+ for (unsigned i = 0; i < 4 ; ++i)
+ v[i] = from_nir(instr.dest, i);
+
+ PValue inv_pihalf = PValue(new LiteralValue(inv_2_pi, 0));
+ AluInstruction *ir = nullptr;
+ for (unsigned i = 0; i < 4 ; ++i) {
+ if (!(instr.dest.write_mask & (1 << i)))
+ continue;
+ ir = new AluInstruction(op3_muladd_ieee, v[i],
+ {from_nir(instr.src[0],i), inv_pihalf, Value::zero_dot_5},
+ {alu_write});
+ if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
+ emit_instruction(ir);
+ }
+ make_last(ir);
+
+ for (unsigned i = 0; i < 4 ; ++i) {
+ if (!(instr.dest.write_mask & (1 << i)))
+ continue;
+ ir = new AluInstruction(op1_fract, v[i], v[i], {alu_write});
+ emit_instruction(ir);
+ }
+ make_last(ir);
+
+ for (unsigned i = 0; i < 4 ; ++i) {
+ if (!(instr.dest.write_mask & (1 << i)))
+ continue;
+ ir = new AluInstruction(op2_add, v[i], v[i], Value::zero_dot_5, write);
+ ir->set_flag(alu_src1_neg);
+ emit_instruction(ir);
+ }
+ make_last(ir);
+
+ for (unsigned i = 0; i < 4 ; ++i) {
+ if (!(instr.dest.write_mask & (1 << i)))
+ continue;
+
+ ir = new AluInstruction(opcode, v[i], v[i], last_write);
+ emit_instruction(ir);
+ }
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode,
+ bool absolute)
+{
+ AluInstruction *ir = nullptr;
+ std::set<int> src_idx;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(opcode, from_nir(instr.dest, i),
+ from_nir(instr.src[0], i), last_write);
+ if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
+ if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
+ if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+ emit_instruction(ir);
+ }
+ }
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op)
+{
+ AluInstruction *ir = nullptr;
+ std::array<PValue, 4> v;
+
+ for (int i = 0; i < 4; ++i) {
+ if (!(instr.dest.write_mask & (1 << i)))
+ continue;
+ v[i] = from_nir(instr.dest, i);
+ ir = new AluInstruction(op1_trunc, v[i], from_nir(instr.src[0], i), {alu_write});
+ if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
+ if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
+ emit_instruction(ir);
+ }
+ make_last(ir);
+
+ for (int i = 0; i < 4; ++i) {
+ if (!(instr.dest.write_mask & (1 << i)))
+ continue;
+ ir = new AluInstruction(op, v[i], v[i], {alu_write});
+ emit_instruction(ir);
+ if (op == op1_flt_to_uint)
+ make_last(ir);
+ }
+ make_last(ir);
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_find_msb(const nir_alu_instr& instr, bool sgn)
+{
+ int sel_tmp = allocate_temp_register();
+ int sel_tmp2 = allocate_temp_register();
+ GPRVector tmp(sel_tmp, {0,1,2,3});
+ GPRVector tmp2(sel_tmp2, {0,1,2,3});
+ AluInstruction *ir = nullptr;
+ EAluOp opcode = sgn ? op1_ffbh_int : op1_ffbh_uint;
+ for (int i = 0; i < 4; ++i) {
+ if (!(instr.dest.write_mask & (1 << i)))
+ continue;
+
+ ir = new AluInstruction(opcode, tmp.reg_i(i), from_nir(instr.src[0], i), write);
+ emit_instruction(ir);
+ }
+ make_last(ir);
+
+ for (int i = 0; i < 4 ; ++i) {
+ if (!(instr.dest.write_mask & (1 << i)))
+ continue;
+
+ ir = new AluInstruction(op2_sub_int, tmp2.reg_i(i),
+ PValue(new LiteralValue(31u, 0)), tmp.reg_i(i), write);
+ emit_instruction(ir);
+ }
+ make_last(ir);
+
+ for (int i = 0; i < 4 ; ++i) {
+ if (!(instr.dest.write_mask & (1 << i)))
+ continue;
+
+ ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), tmp.reg_i(i),
+ tmp2.reg_i(i), tmp.reg_i(i), write);
+ emit_instruction(ir);
+ }
+ make_last(ir);
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr)
+{
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (!(instr.dest.write_mask & (1 << i)))
+ continue;
+
+ ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
+ from_nir(instr.src[0], i), Value::one_i, write);
+ emit_instruction(ir);
+ }
+ make_last(ir);
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr)
+{
+ AluInstruction *ir = nullptr;
+ for (unsigned i = 0; i < 2; ++i) {
+ if (!(instr.dest.write_mask & (1 << i)))
+ continue;
+ ir = new AluInstruction(op1_mov, from_nir(instr.dest, i),
+ from_nir(instr.src[0], i), write);
+ emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp)
+{
+ emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0),
+ from_nir(instr.src[0], comp), last_write));
+ return true;
+}
+
+bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc)
+{
+ AluInstruction *ir = nullptr;
+ std::set<int> src_slot;
+ for(unsigned i = 0; i < nc; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ auto src = from_nir(instr.src[i], 0);
+ ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write);
+ if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+
+ // FIXME: This is a rather crude approach to fix the problem that
+ // r600 can't read from four different slots of the same component
+ // here we check only for the register index
+ if (src->type() == Value::gpr)
+ src_slot.insert(src->sel());
+ if (src_slot.size() >= 3) {
+ src_slot.clear();
+ ir->set_flag(alu_last_instr);
+ }
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n)
+{
+ const nir_alu_src& src0 = instr.src[0];
+ const nir_alu_src& src1 = instr.src[1];
+
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < n ; ++i) {
+ ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
+ from_nir(src0, i), from_nir(src1, i),
+ instr.dest.write_mask & (1 << i) ? write : empty);
+
+ if (src0.negate) ir->set_flag(alu_src0_neg);
+ if (src0.abs) ir->set_flag(alu_src0_abs);
+ if (src1.negate) ir->set_flag(alu_src1_neg);
+ if (src1.abs) ir->set_flag(alu_src1_abs);
+
+ if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+ emit_instruction(ir);
+ }
+ for (int i = n; i < 4 ; ++i) {
+ ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
+ Value::zero, Value::zero,
+ instr.dest.write_mask & (1 << i) ? write : empty);
+ emit_instruction(ir);
+ }
+
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr)
+{
+ const nir_alu_src& src0 = instr.src[0];
+ const nir_alu_src& src1 = instr.src[1];
+
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 3 ; ++i) {
+ ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
+ from_nir(src0, i), from_nir(src1, i),
+ instr.dest.write_mask & (1 << i) ? write : empty);
+ if (src0.negate) ir->set_flag(alu_src0_neg);
+ if (src0.abs) ir->set_flag(alu_src0_abs);
+ if (src1.negate) ir->set_flag(alu_src1_neg);
+ if (src1.abs) ir->set_flag(alu_src1_abs);
+ if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+ emit_instruction(ir);
+ }
+
+ ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f,
+ from_nir(src1, 3), (instr.dest.write_mask) & (1 << 3) ? write : empty);
+ if (src1.negate) ir->set_flag(alu_src1_neg);
+ if (src1.abs) ir->set_flag(alu_src1_abs);
+ emit_instruction(ir);
+
+ ir->set_flag(alu_last_instr);
+ return true;
+
+}
+
+bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op)
+{
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)) {
+ ir = new AluInstruction(op, from_nir(instr.dest, i),
+ from_nir(instr.src[0], i), Value::zero,
+ write);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr)
+{
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
+ from_nir(instr.src[0], i), Value::one_f, write);
+ if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
+ if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
+ if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
+{
+
+ AluInstruction *ir = nullptr;
+ PValue v[4]; // this might need some additional temp register creation
+ for (unsigned i = 0; i < 4 ; ++i)
+ v[i] = from_nir(instr.dest, i);
+
+ EAluOp combine = all ? op2_and_int : op2_or_int;
+
+ /* For integers we can not use the modifiers, so this needs some emulation */
+ /* Should actually be lowered with NIR */
+ if (instr.src[0].negate == instr.src[1].negate &&
+ instr.src[0].abs == instr.src[1].abs) {
+
+ for (unsigned i = 0; i < nc ; ++i) {
+ ir = new AluInstruction(op, v[i], from_nir(instr.src[0], i),
+ from_nir(instr.src[1], i), write);
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ } else {
+ std::cerr << "Negate in iequal/inequal not (yet) supported\n";
+ return false;
+ }
+
+ for (unsigned i = 0; i < nc/2 ; ++i) {
+ ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write);
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+
+ if (nc > 2) {
+ ir = new AluInstruction(combine, v[0], v[0], v[2], last_write);
+ emit_instruction(ir);
+ }
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
+{
+ AluInstruction *ir = nullptr;
+ PValue v[4]; // this might need some additional temp register creation
+ for (unsigned i = 0; i < 4 ; ++i)
+ v[i] = from_nir(instr.dest, i);
+
+ for (unsigned i = 0; i < nc ; ++i) {
+ ir = new AluInstruction(op, v[i], from_nir(instr.src[0],i),
+ from_nir(instr.src[1],i), write);
+
+ if (instr.src[0].abs)
+ ir->set_flag(alu_src0_abs);
+ if (instr.src[0].negate)
+ ir->set_flag(alu_src0_neg);
+
+ if (instr.src[1].abs)
+ ir->set_flag(alu_src1_abs);
+ if (instr.src[1].negate)
+ ir->set_flag(alu_src1_neg);
+
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+
+ for (unsigned i = 0; i < nc ; ++i) {
+ ir = new AluInstruction(op1_max4, v[i], v[i], write);
+ if (all) ir->set_flag(alu_src0_neg);
+ emit_instruction(ir);
+ }
+
+ for (unsigned i = nc; i < 4 ; ++i) {
+ ir = new AluInstruction(op1_max4, v[i],
+ all ? Value::one_f : Value::zero, write);
+ if (all)
+ ir->set_flag(alu_src0_neg);
+
+ emit_instruction(ir);
+ }
+
+ ir->set_flag(alu_last_instr);
+
+ if (all)
+ op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
+ else
+ op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
+
+ ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write);
+ if (all)
+ ir->set_flag(alu_src1_neg);
+ emit_instruction(ir);
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all)
+{
+ AluInstruction *ir = nullptr;
+ PValue v[4]; // this might need some additional temp register creation
+ for (unsigned i = 0; i < 4 ; ++i)
+ v[i] = from_nir(instr.dest, i);
+
+ for (unsigned i = 0; i < 2 ; ++i) {
+ ir = new AluInstruction(op, v[i], from_nir(instr.src[0],i),
+ from_nir(instr.src[1],i), write);
+ if (instr.src[0].abs)
+ ir->set_flag(alu_src0_abs);
+ if (instr.src[0].negate)
+ ir->set_flag(alu_src0_neg);
+
+ if (instr.src[1].abs)
+ ir->set_flag(alu_src1_abs);
+ if (instr.src[1].negate)
+ ir->set_flag(alu_src1_neg);
+
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+
+ op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int;
+ ir = new AluInstruction(op, v[0], v[0], v[1], last_write);
+ emit_instruction(ir);
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode)
+{
+ const nir_alu_src& src0 = instr.src[0];
+ const nir_alu_src& src1 = instr.src[1];
+
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(opcode, from_nir(instr.dest, i), from_nir(src0, i), from_nir(src1, i), last_write);
+ if (src0.negate) ir->set_flag(alu_src0_neg);
+ if (src0.abs) ir->set_flag(alu_src0_abs);
+ if (src1.negate) ir->set_flag(alu_src1_neg);
+ if (src1.abs) ir->set_flag(alu_src1_abs);
+ if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+ emit_instruction(ir);
+ }
+ }
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts)
+{
+
+ const nir_alu_src& src0 = instr.src[0];
+ const nir_alu_src& src1 = instr.src[1];
+
+ if (src0.negate || src1.negate ||
+ src0.abs || src1.abs) {
+ std::cerr << "R600: don't support modifiers with integer operations";
+ return false;
+ }
+ return emit_alu_op2(instr, opcode, opts);
+}
+
+bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
+{
+ const nir_alu_src *src0 = &instr.src[0];
+ const nir_alu_src *src1 = &instr.src[1];
+
+ if (ops & op2_opt_reverse)
+ std::swap(src0, src1);
+
+ bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate;
+
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(opcode, from_nir(instr.dest, i),
+ from_nir(*src0, i), from_nir(*src1, i), write);
+
+ if (src0->negate) ir->set_flag(alu_src0_neg);
+ if (src0->abs) ir->set_flag(alu_src0_abs);
+ if (src1_negate) ir->set_flag(alu_src1_neg);
+ if (src1->abs) ir->set_flag(alu_src1_abs);
+ if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_op2_split_src_mods(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
+{
+ const nir_alu_src *src0 = &instr.src[0];
+ const nir_alu_src *src1 = &instr.src[1];
+
+ if (ops & op2_opt_reverse)
+ std::swap(src0, src1);
+
+ GPRVector::Values v0;
+ for (int i = 0; i < 4 ; ++i)
+ v0[i] = from_nir(*src0, i);
+
+ GPRVector::Values v1;
+ for (int i = 0; i < 4 ; ++i)
+ v1[i] = from_nir(*src1, i);
+
+ if (src0->abs || src0->negate) {
+ int src0_tmp = allocate_temp_register();
+ GPRVector::Values v0_temp;
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)) {
+ v0_temp[i] = PValue(new GPRValue(src0_tmp, i));
+ ir = new AluInstruction(op1_mov, v0_temp[i], v0[i], write);
+ if (src0->abs) ir->set_flag(alu_src0_abs);
+ if (src0->negate) ir->set_flag(alu_src0_neg);
+ emit_instruction(ir);
+ v0[i] = v0_temp[i];
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ }
+
+ if (src1->abs || src1->negate) {
+ int src1_tmp = allocate_temp_register();
+ GPRVector::Values v1_temp;
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)) {
+ v1_temp[i] = PValue(new GPRValue(src1_tmp, i));
+ ir = new AluInstruction(op1_mov, v1_temp[i], v1[i], {alu_write});
+ if (src1->abs) ir->set_flag(alu_src0_abs);
+ if (src1->negate) ir->set_flag(alu_src0_neg);
+ emit_instruction(ir);
+ v1[i] = v1_temp[i];
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ }
+
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(opcode, from_nir(instr.dest, i), {v0[i], v1[i]}, {alu_write});
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+
+bool EmitAluInstruction::emit_alu_isign(const nir_alu_instr& instr)
+{
+ int sel_tmp = allocate_temp_register();
+ GPRVector tmp(sel_tmp, {0,1,2,3});
+
+ AluInstruction *ir = nullptr;
+ PValue help[4];
+
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ help[i] = from_nir(instr.dest, i);
+ auto s = from_nir(instr.src[0], i);
+ ir = new AluInstruction(op3_cndgt_int, help[i], s, Value::one_i, s, write);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, help[i], write);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+
+ ir = new AluInstruction(op3_cndgt_int, help[i], tmp.reg_i(i),
+ PValue(new LiteralValue(-1,0)), help[i], write);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool EmitAluInstruction::emit_fsign(const nir_alu_instr& instr)
+{
+ PValue help[4];
+ PValue src[4];
+ AluInstruction *ir = nullptr;
+
+ for (int i = 0; i < 4 ; ++i) {
+ help[i] = from_nir(instr.dest, i);
+ src[i] = from_nir(instr.src[0], i);
+ }
+
+ if (instr.src[0].abs) {
+
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(op2_setgt, help[i], src[i], Value::zero, write);
+ ir->set_flag(alu_src0_abs);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+
+ if (instr.src[0].negate) {
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(op1_mov, help[i], help[i], write);
+ ir->set_flag(alu_src0_neg);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ }
+
+ return true;
+ }
+
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(op3_cndgt, help[i], src[i], Value::one_f, src[i], write);
+ if (instr.src[0].negate) {
+ ir->set_flag(alu_src0_neg);
+ ir->set_flag(alu_src2_neg);
+ }
+ emit_instruction(ir);
+ }
+ }
+
+ if (ir)
+ ir->set_flag(alu_last_instr);
+
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(op3_cndgt, help[i], help[i], Value::one_f, help[i], write);
+ ir->set_flag(alu_src0_neg);
+ ir->set_flag(alu_src1_neg);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode,
+ std::array<uint8_t, 3> reorder)
+{
+ const nir_alu_src *src[3];
+ src[0] = &instr.src[reorder[0]];
+ src[1] = &instr.src[reorder[1]];
+ src[2] = &instr.src[reorder[2]];
+
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(opcode, from_nir(instr.dest, i),
+ from_nir(*src[0], i), from_nir(*src[1], i),
+ from_nir(*src[2], i), write);
+
+ if (src[0]->negate) ir->set_flag(alu_src0_neg);
+ if (src[1]->negate) ir->set_flag(alu_src1_neg);
+ if (src[2]->negate) ir->set_flag(alu_src2_neg);
+
+ if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+ ir->set_flag(alu_write);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr)
+{
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero,
+ from_nir(instr.src[0], i), write);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+
+ return true;
+}
+
+static const char swz[] = "xyzw01?_";
+
+
+
+bool EmitAluInstruction::emit_alu_iabs(const nir_alu_instr& instr)
+{
+ int sel_tmp = allocate_temp_register();
+ GPRVector tmp(sel_tmp, {0,1,2,3});
+
+ std::array<PValue,4> src;
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ src[i] = from_nir(instr.src[0],i);
+ ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, src[i], write);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), src[i],
+ src[i], tmp.reg_i(i), write);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_div_int(const nir_alu_instr& instr, bool use_signed, bool mod)
+{
+
+ int sel_tmp = allocate_temp_register();
+ int sel_tmp0 = allocate_temp_register();
+ int sel_tmp1 = allocate_temp_register();
+
+ PValue asrc1(new GPRValue(sel_tmp, 0));
+ PValue asrc2(new GPRValue(sel_tmp, 1));
+ PValue rsign(new GPRValue(sel_tmp, 2));
+ PValue err(new GPRValue(sel_tmp, 3));
+
+ GPRVector tmp0(sel_tmp0, {0,1,2,3});
+ GPRVector tmp1(sel_tmp1, {0,1,2,3});
+
+ std::array<PValue, 4> src0;
+ std::array<PValue, 4> src1;
+
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)) {
+ src0[i] = from_nir(instr.src[0], i);
+ src1[i] = from_nir(instr.src[1], i);
+ }
+ }
+
+
+ for (int i = 3; i >= 0 ; --i) {
+ if (!(instr.dest.write_mask & (1 << i)))
+ continue;
+ if (use_signed) {
+ emit_instruction(op2_sub_int, asrc1, {Value::zero, src0[i]}, {alu_write});
+ emit_instruction(op2_sub_int, asrc2, {Value::zero, src1[i]}, {alu_write});
+ emit_instruction(op2_xor_int, rsign, {src0[i], src1[i]}, {alu_write, alu_last_instr});
+
+
+ emit_instruction(op3_cndge_int, asrc1, {src0[i], src0[i], asrc1}, {alu_write});
+ emit_instruction(op3_cndge_int, asrc2, {src1[i], src1[i], asrc2}, {alu_write, alu_last_instr});
+ } else {
+ asrc1 = src0[i];
+ asrc2 = src1[i];
+ }
+
+ emit_instruction(op1_recip_uint, tmp0.x(), {asrc2}, {alu_write, alu_last_instr});
+
+ emit_instruction(op2_mullo_uint, tmp0.z(), {tmp0.x(), asrc2}, {alu_write, alu_last_instr});
+
+ emit_instruction(op2_sub_int, tmp0.w(), {Value::zero, tmp0.z()}, {alu_write});
+ emit_instruction(op2_mulhi_uint, tmp0.y(), {tmp0.x(), asrc2 }, {alu_write, alu_last_instr});
+
+ emit_instruction(op3_cnde_int, tmp0.z(), {tmp0.y(), tmp0.w(), tmp0.z()}, {alu_write, alu_last_instr});
+
+ emit_instruction(op2_mulhi_uint, err, {tmp0.z(), tmp0.x()}, {alu_write, alu_last_instr});
+
+ emit_instruction(op2_sub_int, tmp1.x(), {tmp0.x(), err}, {alu_write});
+ emit_instruction(op2_add_int, tmp1.y(), {tmp0.x(), err}, {alu_write, alu_last_instr});
+
+ emit_instruction(op3_cnde_int, tmp0.x(), {tmp0.y(), tmp1.y(), tmp1.x()}, {alu_write, alu_last_instr});
+
+ emit_instruction(op2_mulhi_uint, tmp0.z(), {tmp0.x(), asrc1 }, {alu_write, alu_last_instr});
+ emit_instruction(op2_mullo_uint, tmp0.y(), {tmp0.z(), asrc2 }, {alu_write, alu_last_instr});
+
+ emit_instruction(op2_sub_int, tmp0.w(), {asrc1, tmp0.y()}, {alu_write, alu_last_instr});
+
+
+ emit_instruction(op2_setge_uint, tmp1.x(), {tmp0.w(), asrc2}, {alu_write});
+ emit_instruction(op2_setge_uint, tmp1.y(), {asrc1, tmp0.y()}, {alu_write});
+
+ if (mod) {
+ emit_instruction(op2_sub_int, tmp1.z(), {tmp0.w(), asrc2}, {alu_write});
+ emit_instruction(op2_add_int, tmp1.w(), {tmp0.w(), asrc2}, {alu_write, alu_last_instr});
+ } else {
+ emit_instruction(op2_add_int, tmp1.z(), {tmp0.z(), Value::one_i}, {alu_write});
+ emit_instruction(op2_sub_int, tmp1.w(), {tmp0.z(), Value::one_i}, {alu_write, alu_last_instr});
+ }
+
+ emit_instruction(op2_and_int, tmp1.x(), {tmp1.x(), tmp1.y()}, {alu_write, alu_last_instr});
+
+ if (mod)
+ emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.x(), tmp0.w(), tmp1.z()}, {alu_write, alu_last_instr});
+ else
+ emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.x(), tmp0.z(), tmp1.z()}, {alu_write, alu_last_instr});
+
+ if (use_signed) {
+ emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr});
+ emit_instruction(op2_sub_int, tmp0.y(), {Value::zero, tmp0.z()}, {alu_write, alu_last_instr});
+
+ if (mod)
+ emit_instruction(op3_cndge_int, from_nir(instr.dest, i), {src0[i], tmp0.z(), tmp0.y()},
+ {alu_write, alu_last_instr});
+ else
+ emit_instruction(op3_cndge_int, from_nir(instr.dest, i), {rsign, tmp0.z(), tmp0.y()},
+ {alu_write, alu_last_instr});
+ } else {
+ emit_instruction(op3_cnde_int, from_nir(instr.dest, i), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr});
+ }
+ }
+ return true;
+}
+
+void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src, GPRVector::Values& s,
+ GPRVector::Values& v, int ncomp)
+{
+
+ AluInstruction *alu = nullptr;
+ for (int i = 0; i < ncomp; ++i) {
+ alu = new AluInstruction(op1_mov, v[i], s[i], {alu_write});
+ if (src.abs)
+ alu->set_flag(alu_src0_abs);
+ if (src.negate)
+ alu->set_flag(alu_src0_neg);
+ emit_instruction(alu);
+ }
+ make_last(alu);
+}
+
+bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op,
+ bool fine)
+{
+
+ GPRVector::Values v;
+ GPRVector::Values s;
+ GPRVector::Values *source = &s;
+ std::array<int, 4> writemask = {0,1,2,3};
+
+ int ncomp = instr.src[0].src.is_ssa ? instr.src[0].src.ssa->num_components :
+ instr.src[0].src.reg.reg->num_components;
+
+ for (int i = 0; i < 4; ++i) {
+ writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7;
+ v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0);
+ s[i] = from_nir(instr.src[0], (i < ncomp) ? i : 0);
+ }
+
+ if (instr.src[0].abs || instr.src[0].negate) {
+ split_alu_modifiers(instr.src[0], s, v, ncomp);
+ source = &v;
+ }
+
+ /* This is querying the dreivatives of the output fb, so we would either need
+ * access to the neighboring pixels or to the framebuffer. Neither is currently
+ * implemented */
+ GPRVector dst(v);
+ GPRVector src(*source);
+
+ auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue());
+ tex->set_dest_swizzle(writemask);
+
+ if (fine) {
+ std::cerr << "Sewt fine flag\n";
+ tex->set_flag(TexInstruction::grad_fine);
+ }
+
+ emit_instruction(tex);
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_bitfield_extract(const nir_alu_instr& instr, EAluOp opcode)
+{
+ int itmp = allocate_temp_register();
+ std::array<PValue, 4> tmp;
+ std::array<PValue, 4> dst;
+ std::array<PValue, 4> src0;
+ std::array<PValue, 4> shift;
+
+ PValue l32(new LiteralValue(32));
+ unsigned write_mask = instr.dest.write_mask;
+
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+ dst[i] = from_nir(instr.dest, i);
+ src0[i] = from_nir(instr.src[0], i);
+ shift[i] = from_nir(instr.src[2], i);
+
+ ir = new AluInstruction(opcode, dst[i],
+ {src0[i], from_nir(instr.src[1], i), shift[i]},
+ {alu_write});
+ emit_instruction(ir);
+ }
+ make_last(ir);
+
+ for (int i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+ tmp[i] = PValue(new GPRValue(itmp, i));
+ ir = new AluInstruction(op2_setge_int, tmp[i], {shift[i], l32},
+ {alu_write});
+ emit_instruction(ir);
+ }
+ make_last(ir);
+
+ for (int i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+ ir = new AluInstruction(op3_cnde_int, dst[i], {tmp[i], dst[i], src0[i]},
+ {alu_write});
+ emit_instruction(ir);
+ }
+ make_last(ir);
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_bitfield_insert(const nir_alu_instr& instr)
+{
+ auto t0 = get_temp_vec4();
+ auto t1 = get_temp_vec4();
+ auto t2 = get_temp_vec4();
+
+ PValue l32(new LiteralValue(32));
+ unsigned write_mask = instr.dest.write_mask;
+ if (!write_mask) return true;
+
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+
+ ir = new AluInstruction(op2_setge_int, t0[i], {from_nir(instr.src[3], i), l32}, {alu_write});
+ emit_instruction(ir);
+ }
+ make_last(ir);
+
+ for (int i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+ ir = new AluInstruction(op2_bfm_int, t1[i], {from_nir(instr.src[3], i),
+ from_nir(instr.src[2], i)}, {alu_write});
+ emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+
+ for (int i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+ ir = new AluInstruction(op2_lshl_int, t2[i], {from_nir(instr.src[1], i),
+ from_nir(instr.src[2], i)}, {alu_write});
+ emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+
+
+ for (int i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+ ir = new AluInstruction(op3_bfi_int, from_nir(instr.dest, i),
+ {t1[i], t2[i], from_nir(instr.src[0], i)}, {alu_write});
+ emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+
+ for (int i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+ ir = new AluInstruction(op3_cnde_int, from_nir(instr.dest, i),
+ {t0[i], from_nir(instr.dest, i),
+ from_nir(instr.src[1], i)}, {alu_write});
+ emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr)
+{
+ emit_instruction(op2_lshr_int, from_nir(instr.dest, 0),
+ {from_nir(instr.src[0], 0), PValue(new LiteralValue(16))},
+ {alu_write, alu_last_instr});
+
+ emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
+ {from_nir(instr.dest, 0)},{alu_write, alu_last_instr});
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr)
+{
+ emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
+ {from_nir(instr.src[0], 0)},{alu_write, alu_last_instr});
+ return true;
+}
+
+bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr)
+{
+ int it0 = allocate_temp_register();
+ PValue x(new GPRValue(it0, 0));
+ PValue y(new GPRValue(it0, 1));
+
+ emit_instruction(op1_flt32_to_flt16, x,{from_nir(instr.src[0], 0)},{alu_write});
+ emit_instruction(op1_flt32_to_flt16, y,{from_nir(instr.src[1], 0)},{alu_write, alu_last_instr});
+
+ emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr});
+
+ emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr});
+
+ return true;
+}
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_EMITALUINSTRUCTION_H
+#define SFN_EMITALUINSTRUCTION_H
+
+#include "sfn_emitinstruction.h"
+
+#include "sfn_alu_defines.h"
+#include "sfn_instruction_alu.h"
+#include "sfn_instruction_tex.h"
+
+namespace r600 {
+
+
+class EmitAluInstruction : public EmitInstruction
+{
+public:
+ EmitAluInstruction(ShaderFromNirProcessor& processor);
+
+private:
+
+ enum AluOp2Opts {
+ op2_opt_none = 0,
+ op2_opt_reverse = 1,
+ op2_opt_neg_src1 = 1 << 1
+ };
+
+ bool do_emit(nir_instr* instr) override;
+
+ void split_constants(const nir_alu_instr& instr);
+
+ bool emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode, const AluOpFlags &flags = 0);
+ bool emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none);
+ bool emit_alu_op2_split_src_mods(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none);
+
+ bool emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode);
+
+ bool emit_alu_inot(const nir_alu_instr& instr);
+ bool emit_alu_iabs(const nir_alu_instr& instr);
+ bool emit_alu_ineg(const nir_alu_instr& instr);
+ bool emit_alu_isign(const nir_alu_instr& instr);
+ bool emit_alu_div_int(const nir_alu_instr& instr, bool use_signed, bool mod);
+ bool emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none);
+
+ bool emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode, std::array<uint8_t, 3> reorder={0,1,2});
+ bool emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode, bool absolute = false);
+ bool emit_alu_trig_op1(const nir_alu_instr& instr, EAluOp opcode);
+
+ bool emit_alu_b2f(const nir_alu_instr& instr);
+ bool emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op);
+ bool emit_dot(const nir_alu_instr& instr, int n);
+ bool emit_fsign(const nir_alu_instr& instr);
+ bool emit_create_vec(const nir_alu_instr& instr, unsigned nc);
+ bool emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all);
+ bool emit_any_iequal(const nir_alu_instr& instr, unsigned nc);
+
+ bool emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all);
+ bool emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all);
+
+ bool emit_fdph(const nir_alu_instr &instr);
+ bool emit_discard_if(const nir_intrinsic_instr *instr);
+
+ bool emit_find_msb(const nir_alu_instr& instr, bool sgn);
+ bool emit_b2i32(const nir_alu_instr& instr);
+ bool emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op);
+ bool emit_pack_64_2x32_split(const nir_alu_instr& instr);
+ bool emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp);
+
+ bool emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op, bool fine);
+ bool emit_bitfield_extract(const nir_alu_instr& instr, EAluOp opcode);
+ bool emit_bitfield_insert(const nir_alu_instr& instr);
+ bool emit_unpack_32_2x16_split_y(const nir_alu_instr& instr);
+ bool emit_unpack_32_2x16_split_x(const nir_alu_instr& instr);
+ bool emit_pack_32_2x16_split(const nir_alu_instr& instr);
+
+private:
+ void make_last(AluInstruction *ir) const;
+ void split_alu_modifiers(const nir_alu_src &src, GPRVector::Values& s, GPRVector::Values& v, int ncomp);
+
+ using vreg = std::array<PValue, 4>;
+
+};
+
+inline void EmitAluInstruction::make_last(AluInstruction *ir) const
+{
+ if (ir)
+ ir->set_flag(alu_last_instr);
+}
+
+}
+
+#endif // SFN_EMITALUINSTRUCTION_H
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_emitinstruction.h"
+
+#include "sfn_shader_base.h"
+
+namespace r600 {
+
+EmitInstruction::EmitInstruction(ShaderFromNirProcessor& processor):
+ m_proc(processor)
+{
+
+}
+
+EmitInstruction::~EmitInstruction()
+{
+}
+
+bool EmitInstruction::emit(nir_instr* instr)
+{
+ return do_emit(instr);
+}
+
+PValue EmitInstruction::from_nir(const nir_src& v, unsigned component, unsigned swizzled)
+{
+ return m_proc.from_nir(v, component, swizzled);
+}
+
+PValue EmitInstruction::from_nir(const nir_alu_src& v, unsigned component)
+{
+ return m_proc.from_nir(v, component);
+}
+
+PValue EmitInstruction::from_nir(const nir_tex_src& v, unsigned component)
+{
+ return m_proc.from_nir(v, component);
+}
+
+PValue EmitInstruction::from_nir(const nir_alu_dest& v, unsigned component)
+{
+ return m_proc.from_nir(v, component);
+}
+
+PValue EmitInstruction::from_nir(const nir_dest& v, unsigned component)
+{
+ return m_proc.from_nir(v, component);
+}
+
+PValue EmitInstruction::from_nir(const nir_src& v, unsigned component)
+{
+ return m_proc.from_nir(v, component);
+}
+
+void EmitInstruction::emit_instruction(Instruction *ir)
+{
+ return m_proc.emit_instruction(ir);
+}
+
+bool EmitInstruction::emit_instruction(EAluOp opcode, PValue dest,
+ std::vector<PValue> src0,
+ const std::set<AluModifiers>& m_flags)
+{
+ return m_proc.emit_instruction(opcode, dest,src0, m_flags);
+}
+
+const nir_variable *
+EmitInstruction::get_deref_location(const nir_src& v) const
+{
+ return m_proc.get_deref_location(v);
+}
+
+PValue EmitInstruction::from_nir_with_fetch_constant(const nir_src& src, unsigned component)
+{
+ return m_proc.from_nir_with_fetch_constant(src, component);
+}
+
+GPRVector *EmitInstruction::vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
+ const GPRVector::Swizzle& swizzle)
+{
+ return m_proc.vec_from_nir_with_fetch_constant(src, mask, swizzle);
+}
+
+void EmitInstruction::load_uniform(const nir_alu_src& src)
+{
+ m_proc.load_uniform(src);
+}
+
+int EmitInstruction::lookup_register_index(const nir_src& src) const
+{
+ return m_proc.lookup_register_index(src);
+}
+
+int EmitInstruction::allocate_temp_register()
+{
+ return m_proc.allocate_temp_register();
+}
+
+int EmitInstruction::lookup_register_index(const nir_dest& dst)
+{
+ return m_proc.lookup_register_index(dst);
+}
+
+const nir_load_const_instr*
+EmitInstruction::get_literal_register(const nir_src& src) const
+{
+ if (src.is_ssa)
+ return m_proc.get_literal_constant(src.ssa->index);
+ else
+ return nullptr;
+}
+
+PValue EmitInstruction::get_temp_register()
+{
+ return m_proc.get_temp_register();
+}
+
+GPRVector EmitInstruction::get_temp_vec4()
+{
+ return m_proc.get_temp_vec4();
+}
+
+PValue EmitInstruction::create_register_from_nir_src(const nir_src& src, unsigned swizzle)
+{
+ return m_proc.create_register_from_nir_src(src, swizzle);
+}
+
+const std::set<AluModifiers> EmitInstruction::empty = {};
+const std::set<AluModifiers> EmitInstruction::write = {alu_write};
+const std::set<AluModifiers> EmitInstruction::last_write = {alu_write, alu_last_instr};
+const std::set<AluModifiers> EmitInstruction::last = {alu_last_instr};
+
+}
+
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef EMITINSTRUCTION_H
+#define EMITINSTRUCTION_H
+
+#include "compiler/nir/nir.h"
+#include "sfn_value.h"
+#include "sfn_instruction_alu.h"
+
+namespace r600 {
+
+class ShaderFromNirProcessor;
+
+class EmitInstruction
+{
+public:
+ EmitInstruction(ShaderFromNirProcessor& processor);
+ virtual ~EmitInstruction();
+ bool emit(nir_instr* instr);
+
+ static const std::set<AluModifiers> empty;
+ static const std::set<AluModifiers> write;
+ static const std::set<AluModifiers> last_write;
+ static const std::set<AluModifiers> last;
+
+protected:
+ virtual bool do_emit(nir_instr* instr) = 0;
+
+ // forwards from ValuePool
+ PValue from_nir(const nir_src& v, unsigned component, unsigned swizzled);
+ PValue from_nir(const nir_src& v, unsigned component);
+ PValue from_nir(const nir_alu_src& v, unsigned component);
+ PValue from_nir(const nir_tex_src& v, unsigned component);
+ PValue from_nir(const nir_alu_dest& v, unsigned component);
+ PValue from_nir(const nir_dest& v, unsigned component);
+
+ const nir_load_const_instr* get_literal_register(const nir_src& src) const;
+
+ int lookup_register_index(const nir_src& src) const;
+ int lookup_register_index(const nir_dest& dst);
+ PValue create_register_from_nir_src(const nir_src& src, unsigned comp);
+
+ int allocate_temp_register();
+
+ PValue get_temp_register();
+ GPRVector get_temp_vec4();
+
+ // forwards from ShaderFromNirProcessor
+ void emit_instruction(Instruction *ir);
+ bool emit_instruction(EAluOp opcode, PValue dest,
+ std::vector<PValue> src0,
+ const std::set<AluModifiers>& m_flags);
+
+ PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component);
+ GPRVector *vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
+ const GPRVector::Swizzle& swizzle);
+
+ void load_uniform(const nir_alu_src& src);
+ const nir_variable *get_deref_location(const nir_src& v) const;
+
+
+
+private:
+
+ ShaderFromNirProcessor& m_proc;
+};
+
+}
+
+
+
+#endif // EMITINSTRUCTION_H
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_emittexinstruction.h"
+#include "sfn_shader_base.h"
+#include "sfn_instruction_fetch.h"
+
+namespace r600 {
+
+EmitTexInstruction::EmitTexInstruction(ShaderFromNirProcessor &processor):
+ EmitInstruction (processor)
+{
+}
+
+bool EmitTexInstruction::do_emit(nir_instr* instr)
+{
+ nir_tex_instr* ir = nir_instr_as_tex(instr);
+
+ TexInputs src;
+ if (!get_inputs(*ir, src))
+ return false;
+
+ if (ir->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+ switch (ir->op) {
+ case nir_texop_tex:
+ return emit_cube_tex(ir, src);
+ case nir_texop_txf:
+ return emit_cube_txf(ir, src);
+ case nir_texop_txb:
+ return emit_cube_txb(ir, src);
+ case nir_texop_txl:
+ return emit_cube_txl(ir, src);
+ case nir_texop_txs:
+ return emit_tex_txs(ir, src, {0,1,2,3});
+ case nir_texop_txd:
+ return emit_cube_txd(ir, src);
+ case nir_texop_lod:
+ return emit_cube_lod(ir, src);
+ case nir_texop_tg4:
+ return emit_cube_tg4(ir, src);
+ case nir_texop_query_levels:
+ return emit_tex_txs(ir, src, {3,7,7,7});
+ default:
+ return false;
+ }
+ } else if (ir->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
+ switch (ir->op) {
+ case nir_texop_txf:
+ return emit_buf_txf(ir, src);
+ case nir_texop_txs:
+ return emit_tex_txs(ir, src, {0,1,2,3});
+ default:
+ return false;
+ }
+ } else {
+ switch (ir->op) {
+ case nir_texop_tex:
+ return emit_tex_tex(ir, src);
+ case nir_texop_txf:
+ return emit_tex_txf(ir, src);
+ case nir_texop_txb:
+ return emit_tex_txb(ir, src);
+ case nir_texop_txl:
+ return emit_tex_txl(ir, src);
+ case nir_texop_txd:
+ return emit_tex_txd(ir, src);
+ case nir_texop_txs:
+ return emit_tex_txs(ir, src, {0,1,2,3});
+ case nir_texop_lod:
+ return emit_tex_lod(ir, src);
+ case nir_texop_tg4:
+ return emit_tex_tg4(ir, src);
+ case nir_texop_txf_ms:
+ return emit_tex_txf_ms(ir, src);
+ case nir_texop_query_levels:
+ return emit_tex_txs(ir, src, {3,7,7,7});
+ default:
+ return false;
+ }
+ }
+}
+
+bool EmitTexInstruction::emit_cube_txf(UNUSED nir_tex_instr* instr, UNUSED TexInputs &src)
+{
+ return false;
+}
+
+bool EmitTexInstruction::emit_cube_txd(nir_tex_instr* instr, TexInputs& tex_src)
+{
+
+ assert(instr->src[0].src.is_ssa);
+
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ auto tex_op = TexInstruction::sample_g;
+
+ std::array<PValue, 4> v;
+ for (int i = 0; i < 4; ++i)
+ v[i] = from_nir(instr->dest, i);
+
+ GPRVector cubed(v);
+ emit_cube_prep(tex_src.coord, cubed, instr->is_array);
+
+ std::array<PValue,4> dst_elms;
+ std::array<PValue,4> src_elms;
+
+ const uint16_t lookup[4] = {1, 0, 3, 2};
+ for (uint16_t i = 0; i < 4; ++i) {
+ dst_elms[i] = v[i];
+ src_elms[i] = cubed.reg_i(lookup[i]);
+ }
+
+ GPRVector empty_dst(0, {7,7,7,7});
+
+ if (instr->is_shadow) {
+ emit_instruction(new AluInstruction(op1_mov, src_elms[3], tex_src.comperator,
+ {alu_last_instr, alu_write}));
+ tex_op = TexInstruction::sample_c_g;
+ }
+
+
+ PValue half(new LiteralValue(0.5f));
+ for (int i = 0; i < 3; ++i) {
+ emit_instruction(new AluInstruction(op2_mul_ieee, tex_src.ddx.reg_i(i), {tex_src.ddx.reg_i(i), half},
+ {alu_last_instr, alu_write}));
+ }
+ for (int i = 0; i < 3; ++i) {
+ emit_instruction(new AluInstruction(op2_mul_ieee, tex_src.ddy.reg_i(i), {tex_src.ddy.reg_i(i), half},
+ {alu_last_instr, alu_write}));
+ }
+
+ auto sampler = get_samplerr_id(instr->sampler_index, tex_src.sampler_deref);
+ assert(!sampler.indirect);
+
+ TexInstruction *irgh = new TexInstruction(TexInstruction::set_gradient_h, empty_dst, tex_src.ddx,
+ sampler.id, sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset);
+ irgh->set_dest_swizzle({7,7,7,7});
+
+ TexInstruction *irgv = new TexInstruction(TexInstruction::set_gradient_v, empty_dst, tex_src.ddy,
+ sampler.id, sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset);
+ irgv->set_dest_swizzle({7,7,7,7});
+
+ GPRVector dst(dst_elms);
+ GPRVector src(src_elms);
+ TexInstruction *ir = new TexInstruction(tex_op, dst, src, instr->sampler_index,
+ sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset);
+
+ set_rect_coordinate_flags(instr, ir);
+ //set_offsets(ir, tex_src.offset);
+
+ emit_instruction(irgh);
+ emit_instruction(irgv);
+ emit_instruction(ir);
+ return true;
+}
+
+
+bool EmitTexInstruction::emit_cube_txl(nir_tex_instr* instr, TexInputs& tex_src)
+{
+ assert(instr->src[0].src.is_ssa);
+
+ if (instr->is_shadow)
+ return false;
+
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ std::array<PValue, 4> v;
+ for (int i = 0; i < 4; ++i)
+ v[i] = from_nir(instr->dest, i);
+
+ GPRVector cubed(v);
+ emit_cube_prep(tex_src.coord, cubed, instr->is_array);
+
+ std::array<PValue,4> dst_elms;
+ std::array<PValue,4> src_elms;
+
+ const uint16_t lookup[4] = {1, 0, 3, 2};
+ for (uint16_t i = 0; i < 4; ++i) {
+ dst_elms[i] = v[i];
+ src_elms[i] = cubed.reg_i(lookup[i]);
+ }
+
+ auto *ir = new AluInstruction(op1_mov, src_elms[3], tex_src.lod,
+ {alu_last_instr, alu_write});
+ emit_instruction(ir);
+
+ GPRVector src(src_elms);
+ GPRVector dst(dst_elms);
+
+ auto sampler = get_samplerr_id(instr->sampler_index, tex_src.sampler_deref);
+ assert(!sampler.indirect);
+
+ auto tir = new TexInstruction(TexInstruction::sample_l, dst, src,
+ sampler.id,sampler.id + R600_MAX_CONST_BUFFERS,
+ tex_src.sampler_offset);
+
+ if (instr->is_array)
+ tir->set_flag(TexInstruction::z_unnormalized);
+
+ emit_instruction(tir);
+ return true;
+}
+
+bool EmitTexInstruction::emit_cube_lod(nir_tex_instr* instr, TexInputs& src)
+{
+ auto tex_op = TexInstruction::get_tex_lod;
+
+ std::array<PValue, 4> v;
+ for (int i = 0; i < 4; ++i)
+ v[i] = from_nir(instr->dest, i);
+
+ GPRVector cubed(v);
+ emit_cube_prep(src.coord, cubed, instr->is_array);
+
+ auto sampler = get_samplerr_id(instr->sampler_index, src.sampler_deref);
+ assert(!sampler.indirect);
+
+ auto dst = make_dest(*instr);
+ auto irt = new TexInstruction(tex_op, dst, cubed, sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS,
+ src.sampler_offset);
+
+ emit_instruction(irt);
+ return true;
+
+}
+
+
+bool EmitTexInstruction::emit_cube_txb(nir_tex_instr* instr, TexInputs& tex_src)
+{
+ assert(instr->src[0].src.is_ssa);
+
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ std::array<PValue, 4> v;
+ for (int i = 0; i < 4; ++i)
+ v[i] = from_nir(instr->dest, i);
+
+ GPRVector cubed(v);
+ emit_cube_prep(tex_src.coord, cubed, instr->is_array);
+
+ std::array<PValue,4> dst_elms;
+ std::array<PValue,4> src_elms;
+
+ const uint16_t lookup[4] = {1, 0, 3, 2};
+ for (uint16_t i = 0; i < 4; ++i) {
+ dst_elms[i] = v[i];
+ src_elms[i] = v[lookup[i]];
+ }
+
+ GPRVector src(src_elms);
+ GPRVector dst(dst_elms);
+
+ auto tex_op = TexInstruction::sample_lb;
+ if (!instr->is_shadow) {
+ emit_instruction(new AluInstruction(op1_mov, src_elms[3], tex_src.bias,
+ {alu_last_instr, alu_write}));
+ } else {
+ emit_instruction(new AluInstruction(op1_mov, src_elms[3], tex_src.comperator,
+ {alu_last_instr, alu_write}));
+ tex_op = TexInstruction::sample_c_lb;
+ }
+
+ auto sampler = get_samplerr_id(instr->sampler_index, tex_src.sampler_deref);
+ assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+ auto tir = new TexInstruction(tex_op, dst, src,
+ sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset);
+ emit_instruction(tir);
+ return true;
+
+}
+
+bool EmitTexInstruction::emit_cube_tex(nir_tex_instr* instr, TexInputs& tex_src)
+{
+ std::array<PValue, 4> v;
+ for (int i = 0; i < 4; ++i)
+ v[i] = from_nir(instr->dest, i);
+
+ auto tex_op = TexInstruction::sample;
+ GPRVector cubed(v);
+ emit_cube_prep(tex_src.coord, cubed, instr->is_array);
+
+ std::array<PValue,4> dst_elms;
+ std::array<PValue,4> src_elms;
+
+ const uint16_t lookup[4] = {1, 0, 3, 2};
+ for (uint16_t i = 0; i < 4; ++i) {
+ dst_elms[i] = v[i];
+ src_elms[i] = v[lookup[i]];
+ }
+
+ if (instr->is_shadow) {
+ emit_instruction(new AluInstruction(op1_mov, src_elms[3], tex_src.comperator,
+ {alu_last_instr, alu_write}));
+ tex_op = TexInstruction::sample_c;
+ }
+
+ GPRVector dst(dst_elms);
+ GPRVector src(src_elms);
+
+ auto sampler = get_samplerr_id(instr->sampler_index, tex_src.sampler_deref);
+ assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+ auto tir = new TexInstruction(tex_op, dst, src,
+ sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset);
+ if (instr->is_array)
+ tir->set_flag(TexInstruction::z_unnormalized);
+
+ emit_instruction(tir);
+ return true;
+
+}
+
+bool EmitTexInstruction::emit_cube_prep(const GPRVector& coord, GPRVector& cubed, bool is_array)
+{
+ AluInstruction *ir = nullptr;
+ const uint16_t src0_chan[4] = {2, 2, 0, 1};
+ const uint16_t src1_chan[4] = {1, 0, 2, 2};
+
+ for (int i = 0; i < 4; ++i) {
+ ir = new AluInstruction(op2_cube, cubed.reg_i(i), coord.reg_i(src0_chan[i]),
+ coord.reg_i(src1_chan[i]), {alu_write});
+
+ emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+
+ ir = new AluInstruction(op1_recip_ieee, cubed.reg_i(2), cubed.reg_i(2), {alu_write, alu_last_instr});
+ ir->set_flag(alu_src0_abs);
+ emit_instruction(ir);
+
+ PValue one_p_5(new LiteralValue(1.5f));
+ for (int i = 0; i < 2; ++i) {
+ ir = new AluInstruction(op3_muladd, cubed.reg_i(i), cubed.reg_i(i), cubed.reg_i(2),
+ one_p_5, {alu_write});
+ emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+
+ if (is_array) {
+ auto face = cubed.reg_i(3);
+ PValue array_index = get_temp_register();
+
+ ir = new AluInstruction(op1_rndne, array_index, coord.reg_i(3), {alu_write, alu_last_instr});
+ emit_instruction(ir);
+
+ ir = new AluInstruction(op2_max, array_index, {array_index, Value::zero}, {alu_write, alu_last_instr});
+ emit_instruction(ir);
+
+ ir = new AluInstruction(op3_muladd, face, {array_index, PValue (new LiteralValue(8.0f)), face},
+ {alu_write, alu_last_instr});
+ emit_instruction(ir);
+ }
+
+ return true;
+}
+
+bool EmitTexInstruction::emit_buf_txf(nir_tex_instr* instr, TexInputs &src)
+{
+ auto dst = make_dest(*instr);
+
+ auto ir = new FetchInstruction(vc_fetch, no_index_offset, dst, src.coord.reg_i(0), 0,
+ instr->texture_index + R600_MAX_CONST_BUFFERS,
+ PValue(), bim_none);
+ ir->set_flag(vtx_use_const_field);
+ emit_instruction(ir);
+ return true;
+}
+
+bool EmitTexInstruction::emit_tex_tex(nir_tex_instr* instr, TexInputs& src)
+{
+
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ auto tex_op = TexInstruction::sample;
+
+ auto sampler = get_samplerr_id(instr->sampler_index, src.sampler_deref);
+ assert(!sampler.indirect);
+
+ if (instr->is_shadow) {
+ emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
+ {alu_last_instr, alu_write}));
+ tex_op = TexInstruction::sample_c;
+ }
+
+ auto dst = make_dest(*instr);
+ auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+ if (instr->is_array)
+ handle_array_index(*instr, src.coord, irt);
+
+ set_rect_coordinate_flags(instr, irt);
+ set_offsets(irt, src.offset);
+
+ emit_instruction(irt);
+ return true;
+}
+
+bool EmitTexInstruction::emit_tex_txd(nir_tex_instr* instr, TexInputs& src)
+{
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ auto tex_op = TexInstruction::sample_g;
+ auto dst = make_dest(*instr);
+
+ GPRVector empty_dst(0,{7,7,7,7});
+
+ if (instr->is_shadow) {
+ emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
+ {alu_last_instr, alu_write}));
+ tex_op = TexInstruction::sample_c_g;
+ }
+
+ auto sampler = get_samplerr_id(instr->sampler_index, src.sampler_deref);
+ assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+ TexInstruction *irgh = new TexInstruction(TexInstruction::set_gradient_h, empty_dst, src.ddx,
+ sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+ irgh->set_dest_swizzle({7,7,7,7});
+
+ TexInstruction *irgv = new TexInstruction(TexInstruction::set_gradient_v, empty_dst, src.ddy,
+ sampler.id, sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+ irgv->set_dest_swizzle({7,7,7,7});
+
+ TexInstruction *ir = new TexInstruction(tex_op, dst, src.coord, sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+ if (instr->is_array)
+ handle_array_index(*instr, src.coord, ir);
+
+ set_rect_coordinate_flags(instr, ir);
+ set_offsets(ir, src.offset);
+
+ emit_instruction(irgh);
+ emit_instruction(irgv);
+ emit_instruction(ir);
+ return true;
+}
+
+bool EmitTexInstruction::emit_tex_txf(nir_tex_instr* instr, TexInputs& src)
+{
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ auto dst = make_dest(*instr);
+
+ emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, {alu_write, alu_last_instr}));
+
+ auto sampler = get_samplerr_id(instr->sampler_index, src.sampler_deref);
+ assert(!sampler.indirect);
+
+ /* txf doesn't need rounding for the array index, but 1D has the array index
+ * in the z component */
+ if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D)
+ src.coord.set_reg_i(2, src.coord.reg_i(1));
+
+ auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord,
+ sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+
+
+ if (src.offset) {
+ assert(src.offset->is_ssa);
+ AluInstruction *ir = nullptr;
+ for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) {
+ ir = new AluInstruction(op2_add_int, src.coord.reg_i(i),
+ {src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write});
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ }
+
+ emit_instruction(tex_ir);
+ return true;
+}
+
+bool EmitTexInstruction::emit_tex_lod(nir_tex_instr* instr, TexInputs& src)
+{
+ auto tex_op = TexInstruction::get_tex_lod;
+
+ auto sampler = get_samplerr_id(instr->sampler_index, src.sampler_deref);
+ assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+ auto dst = make_dest(*instr);
+ auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+ irt->set_dest_swizzle({1,0,7,7});
+ emit_instruction(irt);
+ return true;
+
+}
+
+bool EmitTexInstruction::emit_tex_txl(nir_tex_instr* instr, TexInputs& src)
+{
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ auto tex_op = TexInstruction::sample_l;
+ emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod,
+ {alu_last_instr, alu_write}));
+
+ if (instr->is_shadow) {
+ emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator,
+ {alu_last_instr, alu_write}));
+ tex_op = TexInstruction::sample_c_l;
+ }
+
+ auto sampler = get_samplerr_id(instr->sampler_index, src.sampler_deref);
+ assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+ auto dst = make_dest(*instr);
+ auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+
+ if (instr->is_array)
+ handle_array_index(*instr, src.coord, irt);
+
+ set_rect_coordinate_flags(instr, irt);
+ set_offsets(irt, src.offset);
+
+ emit_instruction(irt);
+ return true;
+}
+
+bool EmitTexInstruction::emit_tex_txb(nir_tex_instr* instr, TexInputs& src)
+{
+ auto tex_op = TexInstruction::sample_lb;
+
+ std::array<uint8_t, 4> in_swizzle = {0,1,2,3};
+
+ emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.bias,
+ {alu_last_instr, alu_write}));
+
+ if (instr->is_shadow) {
+ emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator,
+ {alu_last_instr, alu_write}));
+ tex_op = TexInstruction::sample_c_lb;
+ }
+
+ GPRVector tex_src(src.coord, in_swizzle);
+
+ auto sampler = get_samplerr_id(instr->sampler_index, src.sampler_deref);
+ assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+ auto dst = make_dest(*instr);
+ auto irt = new TexInstruction(tex_op, dst, tex_src, sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+ if (instr->is_array)
+ handle_array_index(*instr, tex_src, irt);
+
+ set_rect_coordinate_flags(instr, irt);
+ set_offsets(irt, src.offset);
+
+ emit_instruction(irt);
+ return true;
+}
+
+bool EmitTexInstruction::emit_tex_txs(nir_tex_instr* instr, TexInputs& tex_src,
+ const std::array<int,4>& dest_swz)
+{
+ std::array<PValue,4> dst_elms;
+ std::array<PValue,4> src_elms;
+
+ for (uint16_t i = 0; i < 4; ++i) {
+ dst_elms[i] = from_nir(instr->dest, (i < instr->dest.ssa.num_components) ? i : 7);
+ }
+
+ GPRVector dst(dst_elms);
+
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
+ emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)),
+ instr->sampler_index + R600_MAX_CONST_BUFFERS,
+ bim_none));
+ } else {
+ for (uint16_t i = 0; i < 4; ++i)
+ src_elms[i] = tex_src.lod;
+ GPRVector src(src_elms);
+
+ auto sampler = get_samplerr_id(instr->sampler_index, tex_src.sampler_deref);
+ assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+ auto ir = new TexInstruction(TexInstruction::get_resinfo, dst, src,
+ sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset);
+ ir->set_dest_swizzle(dest_swz);
+ emit_instruction(ir);
+ }
+
+ return true;
+
+}
+
+bool EmitTexInstruction::emit_tex_tg4(nir_tex_instr* instr, TexInputs& src)
+{
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ auto tex_op = TexInstruction::gather4;
+
+ if (instr->is_shadow) {
+ emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
+ {alu_last_instr, alu_write}));
+ tex_op = TexInstruction::gather4_c;
+ }
+
+ auto sampler = get_samplerr_id(instr->sampler_index, src.sampler_deref);
+ assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+ bool literal_offset = false;
+ if (src.offset) {
+ literal_offset = src.offset->is_ssa && get_literal_register(*src.offset);
+ r600::sfn_log << SfnLog::tex << " really have offsets and they are " <<
+ (literal_offset ? "literal" : "varying") <<
+ "\n";
+
+ if (!literal_offset) {
+ GPRVector::Swizzle swizzle = {4,4,4,4};
+ for (unsigned i = 0; i < instr->coord_components; ++i)
+ swizzle[i] = i;
+
+ std::unique_ptr<GPRVector> ofs(vec_from_nir_with_fetch_constant(*src.offset,
+ ( 1 << instr->coord_components) -1,
+ swizzle));
+ GPRVector dummy(0, {7,7,7,7});
+ tex_op = (tex_op == TexInstruction::gather4_c) ?
+ TexInstruction::gather4_c_o : TexInstruction::gather4_o;
+
+ auto set_ofs = new TexInstruction(TexInstruction::set_offsets, dummy,
+ *ofs, sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+ set_ofs->set_dest_swizzle({7,7,7,7});
+ emit_instruction(set_ofs);
+ }
+ }
+
+
+ /* pre CAYMAN needs swizzle */
+ auto dst = make_dest(*instr);
+ auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+
+ irt->set_dest_swizzle({1,2,0,3});
+ irt->set_gather_comp(instr->component);
+
+ if (instr->is_array)
+ handle_array_index(*instr, src.coord, irt);
+
+ if (literal_offset) {
+ r600::sfn_log << SfnLog::tex << "emit literal offsets\n";
+ set_offsets(irt, src.offset);
+ }
+
+ set_rect_coordinate_flags(instr, irt);
+
+ emit_instruction(irt);
+ return true;
+}
+
+bool EmitTexInstruction::emit_cube_tg4(nir_tex_instr* instr, TexInputs& tex_src)
+{
+ std::array<PValue, 4> v;
+ for (int i = 0; i < 4; ++i)
+ v[i] = from_nir(instr->dest, i);
+
+ auto tex_op = TexInstruction::gather4;
+ GPRVector cubed(v);
+ emit_cube_prep(tex_src.coord, cubed, instr->is_array);
+
+ std::array<PValue,4> dst_elms;
+ std::array<PValue,4> src_elms;
+
+ const uint16_t lookup[4] = {1, 0, 3, 2};
+ for (uint16_t i = 0; i < 4; ++i) {
+ dst_elms[i] = v[i];
+ src_elms[i] = v[lookup[i]];
+ }
+
+ if (instr->is_shadow) {
+ emit_instruction(new AluInstruction(op1_mov, src_elms[3], tex_src.comperator,
+ {alu_last_instr, alu_write}));
+ tex_op = TexInstruction::gather4_c;
+ }
+
+ GPRVector dst(dst_elms);
+ GPRVector src(src_elms);
+
+ auto sampler = get_samplerr_id(instr->sampler_index, tex_src.sampler_deref);
+ assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+ auto tir = new TexInstruction(tex_op, dst, src, sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset);
+
+ tir->set_gather_comp(instr->component);
+
+ tir->set_dest_swizzle({1, 2, 0, 3});
+
+ if (instr->is_array)
+ tir->set_flag(TexInstruction::z_unnormalized);
+
+ emit_instruction(tir);
+ return true;
+}
+
+bool EmitTexInstruction::emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src)
+{
+ assert(instr->src[0].src.is_ssa);
+
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ auto sampler = get_samplerr_id(instr->sampler_index, src.sampler_deref);
+ assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+ int sample_id = allocate_temp_register();
+
+ GPRVector sample_id_dest(sample_id, {0,7,7,7});
+ PValue help(new GPRValue(sample_id, 1));
+
+ /* FIXME: Texture destination registers must be handled differently,
+ * because the swizzle identfies which source componnet has to be written
+ * at a certain position, and the target register is actually different.
+ * At this point we just add a helper register, but for later work (scheduling
+ * and optimization on the r600 IR level, this needs to be implemented
+ * differently */
+
+
+ emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3),
+ src.ms_index,
+ {alu_write, alu_last_instr}));
+
+ auto tex_sample_id_ir = new TexInstruction(TexInstruction::ld, sample_id_dest, src.coord,
+ sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+ tex_sample_id_ir->set_flag(TexInstruction::x_unnormalized);
+ tex_sample_id_ir->set_flag(TexInstruction::y_unnormalized);
+ tex_sample_id_ir->set_flag(TexInstruction::z_unnormalized);
+ tex_sample_id_ir->set_flag(TexInstruction::w_unnormalized);
+ tex_sample_id_ir->set_inst_mode(1);
+
+ emit_instruction(tex_sample_id_ir);
+
+ emit_instruction(new AluInstruction(op2_mullo_int, help,
+ {src.ms_index, PValue(new LiteralValue(4))},
+ {alu_write, alu_last_instr}));
+
+ emit_instruction(new AluInstruction(op2_lshr_int, src.coord.reg_i(3),
+ {sample_id_dest.reg_i(0), help},
+ {alu_write, alu_last_instr}));
+
+ emit_instruction(new AluInstruction(op2_and_int, src.coord.reg_i(3),
+ {src.coord.reg_i(3), PValue(new LiteralValue(15))},
+ {alu_write, alu_last_instr}));
+
+ auto dst = make_dest(*instr);
+
+ /* txf doesn't need rounding for the array index, but 1D has the array index
+ * in the z component */
+ if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D)
+ src.coord.set_reg_i(2, src.coord.reg_i(1));
+
+ auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord,
+ sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+
+
+ if (src.offset) {
+ assert(src.offset->is_ssa);
+ AluInstruction *ir = nullptr;
+ for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) {
+ ir = new AluInstruction(op2_add_int, src.coord.reg_i(i),
+ {src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write});
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ }
+
+ emit_instruction(tex_ir);
+ return true;
+}
+
+bool EmitTexInstruction::get_inputs(const nir_tex_instr& instr, TexInputs &src)
+{
+ sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components << " components\n";
+
+ unsigned grad_components = instr.coord_components;
+ if (instr.is_array)
+ --grad_components;
+
+
+ src.offset = nullptr;
+ bool retval = true;
+ for (unsigned i = 0; i < instr.num_srcs; ++i) {
+ switch (instr.src[i].src_type) {
+ case nir_tex_src_bias:
+ src.bias = from_nir(instr.src[i], 0);
+ break;
+
+ case nir_tex_src_coord: {
+ std::unique_ptr<GPRVector> coord(vec_from_nir_with_fetch_constant(instr.src[i].src,
+ (1 << instr.coord_components) - 1,
+ {0,1,2,3}));
+ src.coord = *coord;
+
+ } break;
+ case nir_tex_src_comparator:
+ src.comperator = from_nir(instr.src[i], 0);
+ break;
+ case nir_tex_src_ddx: {
+ sfn_log << SfnLog::tex << "Get DDX ";
+ std::unique_ptr<GPRVector> coord(vec_from_nir_with_fetch_constant(instr.src[i].src,
+ (1 << grad_components) - 1,
+ swizzle_from_mask(grad_components)));
+ src.ddx = *coord;
+ sfn_log << SfnLog::tex << src.ddx << "\n";
+ } break;
+ case nir_tex_src_ddy:{
+ sfn_log << SfnLog::tex << "Get DDY ";
+ std::unique_ptr<GPRVector> coord(vec_from_nir_with_fetch_constant(instr.src[i].src,
+ (1 << grad_components) - 1,
+ swizzle_from_mask(grad_components)));
+ src.ddy = *coord;
+ sfn_log << SfnLog::tex << src.ddy << "\n";
+ } break;
+ case nir_tex_src_lod:
+ src.lod = from_nir_with_fetch_constant(instr.src[i].src, 0);
+ break;
+ case nir_tex_src_offset:
+ sfn_log << SfnLog::tex << " -- Find offset\n";
+ src.offset = &instr.src[i].src;
+ break;
+ case nir_tex_src_sampler_deref:
+ src.sampler_deref = get_deref_location(instr.src[i].src);
+ break;
+ case nir_tex_src_texture_deref:
+ src.texture_deref = get_deref_location(instr.src[i].src);
+ break;
+ case nir_tex_src_ms_index:
+ src.ms_index = from_nir(instr.src[i], 0);
+ break;
+ case nir_tex_src_texture_offset:
+ src.texture_offset = from_nir(instr.src[i], 0);
+ break;
+ case nir_tex_src_sampler_offset:
+ src.sampler_offset = from_nir(instr.src[i], 0);
+ break;
+ case nir_tex_src_plane:
+ case nir_tex_src_projector:
+ case nir_tex_src_min_lod:
+ case nir_tex_src_ms_mcs:
+ default:
+ sfn_log << SfnLog::tex << "Texture source type " << instr.src[i].src_type << " not supported\n";
+ retval = false;
+ }
+ }
+ return retval;
+}
+
+GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr)
+{
+ int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components :
+ instr.dest.reg.reg->num_components;
+ std::array<PValue,4> dst_elms;
+ for (uint16_t i = 0; i < 4; ++i)
+ dst_elms[i] = from_nir(instr.dest, (i < num_dest_components) ? i : 7);
+ return GPRVector(dst_elms);
+}
+
+
+GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr,
+ const std::array<int, 4>& swizzle)
+{
+ int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components :
+ instr.dest.reg.reg->num_components;
+ std::array<PValue,4> dst_elms;
+ for (uint16_t i = 0; i < 4; ++i) {
+ int k = swizzle[i];
+ dst_elms[i] = from_nir(instr.dest, (k < num_dest_components) ? k : 7);
+ }
+ return GPRVector(dst_elms);
+}
+
+void EmitTexInstruction::set_rect_coordinate_flags(nir_tex_instr* instr,
+ TexInstruction* ir) const
+{
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
+ ir->set_flag(TexInstruction::x_unnormalized);
+ ir->set_flag(TexInstruction::y_unnormalized);
+ }
+}
+
+void EmitTexInstruction::set_offsets(TexInstruction* ir, nir_src *offset)
+{
+ if (!offset)
+ return;
+
+ assert(offset->is_ssa);
+ auto literal = get_literal_register(*offset);
+ assert(literal);
+
+ for (int i = 0; i < offset->ssa->num_components; ++i) {
+ ir->set_offset(i, literal->value[i].i32);
+ }
+}
+
+void EmitTexInstruction::handle_array_index(const nir_tex_instr& instr, const GPRVector& src, TexInstruction *ir)
+{
+ int src_idx = instr.sampler_dim == GLSL_SAMPLER_DIM_1D ? 1 : 2;
+ emit_instruction(new AluInstruction(op1_rndne, src.reg_i(2), src.reg_i(src_idx),
+ {alu_last_instr, alu_write}));
+ ir->set_flag(TexInstruction::z_unnormalized);
+}
+
+EmitTexInstruction::SamplerId
+EmitTexInstruction::get_samplerr_id(int sampler_id, const nir_variable *deref)
+{
+ EmitTexInstruction::SamplerId result = {sampler_id, false};
+
+ if (deref) {
+ assert(glsl_type_is_sampler(deref->type));
+ result.id = deref->data.binding;
+ }
+ return result;
+}
+
+EmitTexInstruction::TexInputs::TexInputs():
+ sampler_deref(nullptr),
+ texture_deref(nullptr),
+ offset(nullptr)
+{
+}
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_EMITTEXINSTRUCTION_H
+#define SFN_EMITTEXINSTRUCTION_H
+
+#include "sfn_emitinstruction.h"
+#include "sfn_instruction_tex.h"
+
+namespace r600 {
+
+class EmitTexInstruction : public EmitInstruction
+{
+public:
+ EmitTexInstruction(ShaderFromNirProcessor& processor);
+
+private:
+ struct TexInputs {
+ TexInputs();
+ const nir_variable *sampler_deref;
+ const nir_variable *texture_deref;
+ GPRVector coord;
+ PValue bias;
+ PValue comperator;
+ PValue lod;
+ GPRVector ddx;
+ GPRVector ddy;
+ nir_src *offset;
+ PValue gather_comp;
+ PValue ms_index;
+ PValue sampler_offset;
+ PValue texture_offset;
+ };
+
+
+ bool emit_cube_tex(nir_tex_instr* instr, TexInputs& src);
+ bool emit_cube_txf(nir_tex_instr* instr, TexInputs& src);
+ bool emit_cube_txb(nir_tex_instr* instr, TexInputs& src);
+ bool emit_cube_txl(nir_tex_instr* instr, TexInputs& src);
+ bool emit_cube_txd(nir_tex_instr* instr, TexInputs& src);
+ bool emit_cube_lod(nir_tex_instr* instr, TexInputs& src);
+ bool emit_cube_tg4(nir_tex_instr* instr, TexInputs& src);
+ bool emit_cube_prep(const GPRVector& coord, GPRVector& cubed, bool is_array);
+
+ bool emit_tex_tex(nir_tex_instr* instr, TexInputs& src);
+
+ bool emit_tex_txf(nir_tex_instr* instr, TexInputs &src);
+ bool emit_tex_txb(nir_tex_instr* instr, TexInputs& src);
+ bool emit_tex_txd(nir_tex_instr* instr, TexInputs& src);
+ bool emit_tex_txl(nir_tex_instr* instr, TexInputs& src);
+ bool emit_tex_txs(nir_tex_instr* instr, TexInputs& src,
+ const std::array<int, 4> &dest_swz);
+ bool emit_tex_lod(nir_tex_instr* instr, TexInputs& src);
+ bool emit_tex_tg4(nir_tex_instr* instr, TexInputs& src);
+ bool emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src);
+ bool emit_buf_txf(nir_tex_instr* instr, TexInputs& src);
+
+ bool get_inputs(const nir_tex_instr& instr, TexInputs &src);
+
+ void set_rect_coordinate_flags(nir_tex_instr* instr, TexInstruction* ir) const;
+
+ bool do_emit(nir_instr* instr) override;
+
+ GPRVector make_dest(nir_tex_instr& instr);
+ GPRVector make_dest(nir_tex_instr &instr, const std::array<int, 4> &swizzle);
+
+ void set_offsets(TexInstruction* ir, nir_src *offset);
+ void handle_array_index(const nir_tex_instr& instr, const GPRVector &src, TexInstruction* ir);
+
+ struct SamplerId {
+ int id;
+ bool indirect;
+ };
+
+ SamplerId get_samplerr_id(int sampler_id, const nir_variable *deref);
+
+};
+
+}
+
+#endif // SFN_EMITTEXINSTRUCTION_H
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_instruction_alu.h"
+#include "sfn_valuepool.h"
+
+namespace r600 {
+
+const AluModifiers AluInstruction::src_abs_flags[2] =
+ {alu_src0_abs, alu_src1_abs};
+const AluModifiers AluInstruction::src_neg_flags[3] =
+ {alu_src0_neg, alu_src1_neg, alu_src2_neg};
+const AluModifiers AluInstruction::src_rel_flags[3] =
+ {alu_src0_rel, alu_src1_rel, alu_src2_rel};
+
+AluInstruction::AluInstruction(EAluOp opcode):
+ Instruction (Instruction::alu),
+ m_opcode(opcode),
+ m_src(alu_ops.at(opcode).nsrc),
+ m_bank_swizzle(alu_vec_unknown),
+ m_cf_type(cf_alu)
+{
+ if (alu_ops.at(opcode).nsrc == 3)
+ m_flags.set(alu_op3);
+}
+
+AluInstruction::AluInstruction(EAluOp opcode, PValue dest,
+ std::vector<PValue> src,
+ const std::set<AluModifiers>& flags):
+ Instruction (Instruction::alu),
+ m_opcode(opcode),
+ m_dest(dest),
+ m_bank_swizzle(alu_vec_unknown),
+ m_cf_type(cf_alu)
+{
+ m_src.swap(src);
+ for (auto f : flags)
+ m_flags.set(f);
+
+ if (alu_ops.at(opcode).nsrc == 3)
+ m_flags.set(alu_op3);
+}
+
+AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0,
+ const std::set<AluModifiers>& flags):
+ AluInstruction(opcode, dest, std::vector<PValue>{src0}, flags)
+{
+}
+
+AluInstruction::AluInstruction(EAluOp opcode, PValue dest,
+ PValue src0, PValue src1,
+ const std::set<AluModifiers> &m_flags):
+ AluInstruction(opcode, dest, {src0, src1}, m_flags)
+{
+}
+
+AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0,
+ PValue src1, PValue src2,
+ const std::set<AluModifiers> &flags):
+ AluInstruction(opcode, dest, {src0, src1, src2}, flags)
+{
+}
+
+bool AluInstruction::is_equal_to(const Instruction& lhs) const
+{
+ assert(lhs.type() == alu);
+ const auto& oth = static_cast<const AluInstruction&>(lhs);
+
+ if (m_opcode != oth.m_opcode) {
+ return false;
+ }
+
+ if (*m_dest != *oth.m_dest)
+ return false;
+
+ if (m_src.size() != oth.m_src.size())
+ return false;
+
+ for (unsigned i = 0; i < m_src.size(); ++i)
+ if (*m_src[i] != *oth.m_src[i]) {
+ return false;
+ }
+ return (m_flags == oth.m_flags && m_cf_type == oth.m_cf_type);
+}
+
+void AluInstruction::set_flag(AluModifiers flag)
+{
+ m_flags.set(flag);
+}
+
+void AluInstruction::set_bank_swizzle(AluBankSwizzle bswz)
+{
+ m_bank_swizzle = bswz;
+}
+
+unsigned AluInstruction::n_sources() const
+{
+ return m_src.size();
+}
+
+void AluInstruction::do_print(std::ostream& os) const
+{
+ os << "ALU " << alu_ops.at(m_opcode).name;
+ if (m_flags.test(alu_dst_clamp))
+ os << "_CLAMP";
+ os << ' ' << *m_dest << " : " ;
+
+ for (unsigned i = 0; i < m_src.size(); ++i) {
+ int pflags = 0;
+ if (i)
+ os << ' ';
+ if (m_flags.test(src_neg_flags[i])) pflags |= Value::PrintFlags::has_neg;
+ if (m_flags.test(src_rel_flags[i])) pflags |= Value::PrintFlags::is_rel;
+ if (i < 2)
+ if (m_flags.test(src_abs_flags[i])) pflags |= Value::PrintFlags::has_abs;
+ m_src[i]->print(os, Value::PrintFlags(0, pflags));
+ }
+ os << " {";
+ os << (m_flags.test(alu_write) ? 'W' : ' ');
+ os << (m_flags.test(alu_last_instr) ? 'L' : ' ');
+ os << (m_flags.test(alu_update_exec) ? 'E' : ' ');
+ os << (m_flags.test(alu_update_pred) ? 'P' : ' ');
+ os << "}";
+
+ os << " BS:" << m_bank_swizzle;
+ os << " CF:" << m_cf_type;
+}
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef sfn_r600_instruction_alu_h
+#define sfn_r600_instruction_alu_h
+
+#include "sfn_instruction_base.h"
+#include "sfn_alu_defines.h"
+
+namespace r600 {
+
+enum AluModifiers {
+ alu_src0_neg,
+ alu_src0_abs,
+ alu_src0_rel,
+ alu_src1_neg,
+ alu_src1_abs,
+ alu_src1_rel,
+ alu_src2_neg,
+ alu_src2_rel,
+ alu_dst_clamp,
+ alu_dst_rel,
+ alu_last_instr,
+ alu_update_exec,
+ alu_update_pred,
+ alu_write,
+ alu_op3
+};
+
+enum AluDstModifiers {
+ omod_off = 0,
+ omod_mul2 = 1,
+ omod_mul4 = 2,
+ omod_divl2 = 3
+};
+
+enum AluPredSel {
+ pred_off = 0,
+ pred_zero = 2,
+ pred_one = 3
+};
+
+enum AluBankSwizzle {
+ alu_vec_012 = 0,
+ sq_alu_scl_201 = 0,
+ alu_vec_021 = 1,
+ sq_alu_scl_122 = 1,
+ alu_vec_120 = 2,
+ sq_alu_scl_212 = 2,
+ alu_vec_102 = 3,
+ sq_alu_scl_221 = 3,
+ alu_vec_201 = 4,
+ alu_vec_210 = 5,
+ alu_vec_unknown = 6
+};
+
+class AluInstruction : public Instruction {
+public:
+
+ static const AluModifiers src_abs_flags[2];
+ static const AluModifiers src_neg_flags[3];
+ static const AluModifiers src_rel_flags[3];
+
+ AluInstruction(EAluOp opcode);
+ AluInstruction(EAluOp opcode, PValue dest,
+ std::vector<PValue> src0,
+ const std::set<AluModifiers>& m_flags);
+
+ AluInstruction(EAluOp opcode, PValue dest, PValue src0,
+ const std::set<AluModifiers>& m_flags);
+
+ AluInstruction(EAluOp opcode, PValue dest,
+ PValue src0, PValue src1,
+ const std::set<AluModifiers>& m_flags);
+
+ AluInstruction(EAluOp opcode, PValue dest, PValue src0, PValue src1,
+ PValue src2,
+ const std::set<AluModifiers>& m_flags);
+
+
+ void set_flag(AluModifiers flag);
+ unsigned n_sources() const;
+
+ PValue dest() {return m_dest;}
+ EAluOp opcode() const {return m_opcode;}
+ const Value *dest() const {return m_dest.get();}
+ Value& src(unsigned i) const {assert(i < m_src.size() && m_src[i]); return *m_src[i];}
+ PValue *psrc(unsigned i) {assert(i < m_src.size()); return &m_src[i];}
+ bool is_last() const {return m_flags.test(alu_last_instr);}
+ bool write() const {return m_flags.test(alu_write);}
+ bool flag(AluModifiers f) const {return m_flags.test(f);}
+ void set_bank_swizzle(AluBankSwizzle swz);
+ int bank_swizzle() const {return m_bank_swizzle;}
+ ECFAluOpCode cf_type() const {return m_cf_type;}
+ void set_cf_type(ECFAluOpCode cf_type){ m_cf_type = cf_type; }
+
+private:
+
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+
+
+ EAluOp m_opcode;
+ PValue m_dest;
+ std::vector<PValue> m_src;
+ AluOpFlags m_flags;
+ AluDstModifiers m_omod;
+ AluPredSel m_pred_sel;
+ AluBankSwizzle m_bank_swizzle;
+ ECFAluOpCode m_cf_type;
+};
+
+}
+
+#endif
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include <algorithm>
+#include <cassert>
+
+#include "sfn_instruction_base.h"
+#include "sfn_valuepool.h"
+#include "sfn_debug.h"
+
+namespace r600 {
+
+Instruction::Instruction(instr_type t):
+ m_type(t)
+{
+}
+
+Instruction::~Instruction()
+{
+}
+
+void Instruction::print(std::ostream& os) const
+{
+ os << "OP:";
+ do_print(os);
+}
+
+bool operator == (const Instruction& lhs, const Instruction& rhs)
+{
+ if (rhs.m_type != lhs.m_type)
+ return false;
+
+ return lhs.is_equal_to(rhs);
+}
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef sfn_r600_instr_h
+#define sfn_r600_instr_h
+
+#include "sfn_value_gpr.h"
+#include "sfn_defines.h"
+
+#include "gallium/drivers/r600/r600_isa.h"
+#include <iostream>
+#include <memory>
+#include <vector>
+#include <set>
+
+namespace r600 {
+
+
+using OutputRegisterMap = std::map<unsigned, const GPRVector *>;
+
+class Instruction {
+public:
+ enum instr_type {
+ alu,
+ exprt,
+ tex,
+ vtx,
+ wait_ack,
+ cond_if,
+ cond_else,
+ cond_endif,
+ loop_begin,
+ loop_end,
+ loop_break,
+ loop_continue,
+ phi,
+ streamout,
+ ring,
+ emit_vtx,
+ mem_wr_scratch,
+ gds,
+ rat,
+ unknown
+ };
+
+ typedef std::shared_ptr<Instruction> Pointer;
+
+ friend bool operator == (const Instruction& lhs, const Instruction& rhs);
+
+ Instruction(instr_type t);
+
+ virtual ~Instruction();
+
+ instr_type type() const { return m_type;}
+
+ void print(std::ostream& os) const;
+
+private:
+ virtual bool is_equal_to(const Instruction& lhs) const = 0;
+
+ instr_type m_type;
+
+ virtual void do_print(std::ostream& os) const = 0;
+
+};
+
+using PInstruction=Instruction::Pointer;
+
+inline std::ostream& operator << (std::ostream& os, const Instruction& instr)
+{
+ instr.print(os);
+ return os;
+}
+
+bool operator == (const Instruction& lhs, const Instruction& rhs);
+
+}
+
+#endif
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_instruction_cf.h"
+
+namespace r600 {
+
+CFInstruction::CFInstruction(instr_type type):Instruction(type)
+{
+
+}
+
+IfElseInstruction::IfElseInstruction(instr_type type):
+ CFInstruction (type)
+{
+
+}
+
+IfInstruction::IfInstruction(AluInstruction *pred):
+ IfElseInstruction(cond_if),
+ m_pred(pred)
+{
+ PValue *v = m_pred->psrc(0);
+}
+
+bool IfInstruction::is_equal_to(const Instruction& lhs) const
+{
+ assert(lhs.type() == cond_if);
+ const IfInstruction& l = dynamic_cast<const IfInstruction&>(lhs);
+ return *l.m_pred == *m_pred;
+}
+
+void IfInstruction::do_print(std::ostream& os) const
+{
+ os << "PRED = " << *m_pred << "\n";
+ os << "IF (PRED)";
+}
+
+ElseInstruction::ElseInstruction(IfInstruction *jump_src):
+ IfElseInstruction(cond_else),
+ m_jump_src(jump_src)
+{
+}
+
+
+bool ElseInstruction::is_equal_to(const Instruction& lhs) const
+{
+ if (lhs.type() != cond_else)
+ return false;
+ auto& l = static_cast<const ElseInstruction&>(lhs);
+ return (*m_jump_src == *l.m_jump_src);
+}
+
+void ElseInstruction::do_print(std::ostream& os) const
+{
+ os << "ELSE";
+}
+
+IfElseEndInstruction::IfElseEndInstruction():
+ IfElseInstruction(cond_endif)
+{
+}
+
+bool IfElseEndInstruction::is_equal_to(const Instruction& lhs) const
+{
+ if (lhs.type() != cond_endif)
+ return false;
+ return true;
+}
+
+void IfElseEndInstruction::do_print(std::ostream& os) const
+{
+ os << "ENDIF";
+}
+
+LoopBeginInstruction::LoopBeginInstruction():
+ CFInstruction(loop_begin)
+{
+}
+
+bool LoopBeginInstruction::is_equal_to(const Instruction& lhs) const
+{
+ assert(lhs.type() == loop_begin);
+ return true;
+}
+
+void LoopBeginInstruction::do_print(std::ostream& os) const
+{
+ os << "BGNLOOP";
+}
+
+LoopEndInstruction::LoopEndInstruction(LoopBeginInstruction *start):
+ CFInstruction (loop_end),
+ m_start(start)
+{
+}
+
+bool LoopEndInstruction::is_equal_to(const Instruction& lhs) const
+{
+ assert(lhs.type() == loop_end);
+ const auto& other = static_cast<const LoopEndInstruction&>(lhs);
+ return *m_start == *other.m_start;
+}
+
+void LoopEndInstruction::do_print(std::ostream& os) const
+{
+ os << "ENDLOOP";
+}
+
+LoopBreakInstruction::LoopBreakInstruction():
+ CFInstruction (loop_break)
+{
+}
+
+bool LoopBreakInstruction::is_equal_to(UNUSED const Instruction& lhs) const
+{
+ return true;
+}
+
+void LoopBreakInstruction::do_print(std::ostream& os) const
+{
+ os << "BREAK";
+}
+
+LoopContInstruction::LoopContInstruction():
+ CFInstruction (loop_continue)
+{
+}
+
+bool LoopContInstruction::is_equal_to(UNUSED const Instruction& lhs) const
+{
+ return true;
+}
+void LoopContInstruction::do_print(std::ostream& os) const
+{
+ os << "CONTINUE";
+}
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_IFELSEINSTRUCTION_H
+#define SFN_IFELSEINSTRUCTION_H
+
+#include "sfn_instruction_alu.h"
+
+namespace r600 {
+
+class CFInstruction : public Instruction {
+protected:
+ CFInstruction(instr_type type);
+};
+
+class IfElseInstruction : public CFInstruction {
+public:
+ IfElseInstruction(instr_type type);
+};
+
+class IfInstruction : public IfElseInstruction {
+public:
+ IfInstruction(AluInstruction *pred);
+ const AluInstruction& pred() const {return *m_pred;}
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+ std::shared_ptr<AluInstruction> m_pred;
+};
+
+class ElseInstruction : public IfElseInstruction {
+public:
+ ElseInstruction(IfInstruction *jump_src);
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+
+ IfElseInstruction *m_jump_src;
+};
+
+class IfElseEndInstruction : public IfElseInstruction {
+public:
+ IfElseEndInstruction();
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+};
+
+class LoopBeginInstruction: public CFInstruction {
+public:
+ LoopBeginInstruction();
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+};
+
+class LoopEndInstruction: public CFInstruction {
+public:
+ LoopEndInstruction(LoopBeginInstruction *start);
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+ LoopBeginInstruction *m_start;
+};
+
+class LoopBreakInstruction: public CFInstruction {
+public:
+ LoopBreakInstruction();
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+};
+
+class LoopContInstruction: public CFInstruction {
+public:
+ LoopContInstruction();
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+};
+
+}
+
+#endif // SFN_IFELSEINSTRUCTION_H
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "sfn_instruction_export.h"
+#include "sfn_valuepool.h"
+
+namespace r600 {
+
+WriteoutInstruction::WriteoutInstruction(instr_type t, const GPRVector& value):
+ Instruction(t),
+ m_value(value)
+{
+}
+
+ExportInstruction::ExportInstruction(unsigned loc, const GPRVector &value, ExportType type):
+ WriteoutInstruction(Instruction::exprt, value),
+ m_type(type),
+ m_loc(loc),
+ m_is_last(false)
+{
+}
+
+
+bool ExportInstruction::is_equal_to(const Instruction& lhs) const
+{
+ assert(lhs.type() == exprt);
+ const auto& oth = static_cast<const ExportInstruction&>(lhs);
+
+ return (gpr() == oth.gpr()) &&
+ (m_type == oth.m_type) &&
+ (m_loc == oth.m_loc) &&
+ (m_is_last == oth.m_is_last);
+}
+
+void ExportInstruction::do_print(std::ostream& os) const
+{
+ os << (m_is_last ? "EXPORT_DONE ":"EXPORT ");
+ switch (m_type) {
+ case et_pixel: os << "PIXEL "; break;
+ case et_pos: os << "POS "; break;
+ case et_param: os << "PARAM "; break;
+ }
+ os << m_loc << " " << gpr();
+}
+
+void ExportInstruction::update_output_map(OutputRegisterMap& map) const
+{
+ map[m_loc] = gpr_ptr();
+}
+
+void ExportInstruction::set_last()
+{
+ m_is_last = true;
+}
+
+StreamOutIntruction::StreamOutIntruction(const GPRVector& value, int num_components,
+ int array_base, int comp_mask, int out_buffer,
+ int stream):
+ WriteoutInstruction(Instruction::streamout, value),
+ m_element_size(num_components == 3 ? 3 : num_components - 1),
+ m_burst_count(1),
+ m_array_base(array_base),
+ m_array_size(0xfff),
+ m_writemask(comp_mask),
+ m_output_buffer(out_buffer),
+ m_stream(stream)
+{
+}
+
+unsigned StreamOutIntruction::op() const
+{
+ int op = 0;
+ switch (m_output_buffer) {
+ case 0: op = CF_OP_MEM_STREAM0_BUF0; break;
+ case 1: op = CF_OP_MEM_STREAM0_BUF1; break;
+ case 2: op = CF_OP_MEM_STREAM0_BUF2; break;
+ case 3: op = CF_OP_MEM_STREAM0_BUF3; break;
+ }
+ return 4 * m_stream + op;
+}
+
+bool StreamOutIntruction::is_equal_to(const Instruction& lhs) const
+{
+ assert(lhs.type() == streamout);
+ const auto& oth = static_cast<const StreamOutIntruction&>(lhs);
+
+ return gpr() == oth.gpr() &&
+ m_element_size == oth.m_element_size &&
+ m_burst_count == oth.m_burst_count &&
+ m_array_base == oth.m_array_base &&
+ m_array_size == oth.m_array_size &&
+ m_writemask == oth.m_writemask &&
+ m_output_buffer == oth.m_output_buffer &&
+ m_stream == oth.m_stream;
+}
+
+void StreamOutIntruction::do_print(std::ostream& os) const
+{
+ os << "WRITE STREAM(" << m_stream << ") " << gpr()
+ << " ES:" << m_element_size
+ << " BC:" << m_burst_count
+ << " BUF:" << m_output_buffer
+ << " ARRAY:" << m_array_base;
+ if (m_array_size != 0xfff)
+ os << "+" << m_array_size;
+}
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_EXPORTINSTRUCTION_H
+#define SFN_EXPORTINSTRUCTION_H
+
+#include "sfn_instruction_base.h"
+
+namespace r600 {
+
+class WriteoutInstruction: public Instruction {
+public:
+ const GPRVector& gpr() const {return m_value;}
+ const GPRVector *gpr_ptr() const {return &m_value;}
+protected:
+ WriteoutInstruction(instr_type t, const GPRVector& value);
+
+ GPRVector m_value;
+};
+
+class ExportInstruction : public WriteoutInstruction {
+public:
+ enum ExportType {
+ et_pixel,
+ et_pos,
+ et_param
+ };
+
+ ExportInstruction(unsigned loc, const GPRVector& value, ExportType type);
+ void set_last();
+
+ ExportType export_type() const {return m_type;}
+
+ unsigned location() const {return m_loc;}
+ bool is_last_export() const {return m_is_last;}
+
+ void update_output_map(OutputRegisterMap& map) const;
+
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+
+ ExportType m_type;
+ unsigned m_loc;
+ bool m_is_last;
+};
+
+class StreamOutIntruction: public WriteoutInstruction {
+public:
+ StreamOutIntruction(const GPRVector& value, int num_components,
+ int array_base, int comp_mask, int out_buffer,
+ int stream);
+ int element_size() const { return m_element_size;}
+ int burst_count() const { return m_burst_count;}
+ int array_base() const { return m_array_base;}
+ int array_size() const { return m_array_size;}
+ int comp_mask() const { return m_writemask;}
+ unsigned op() const;
+
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+
+ int m_element_size;
+ int m_burst_count;
+ int m_array_base;
+ int m_array_size;
+ int m_writemask;
+ int m_output_buffer;
+ int m_stream;
+};
+
+enum EMemWriteType {
+ mem_write = 0,
+ mem_write_ind = 1,
+ mem_write_ack = 2,
+ mem_write_ind_ack = 3,
+};
+
+}
+
+
+#endif // SFN_EXPORTINSTRUCTION_H
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_instruction_fetch.h"
+
+#include "gallium/drivers/r600/r600_pipe.h"
+
+namespace r600 {
+
+/* refactor this to add status create methods for specific tasks */
+FetchInstruction::FetchInstruction(EVFetchInstr op,
+ EVFetchType type,
+ GPRVector dst,
+ PValue src, int offset,
+ int buffer_id, PValue buffer_offset,
+ EBufferIndexMode cp_rel,
+ bool use_const_field):
+ Instruction(vtx),
+ m_vc_opcode(op),
+ m_fetch_type(type),
+ m_endian_swap(vtx_es_none),
+ m_src(src),
+ m_dst(dst),
+ m_offset(offset),
+ m_is_mega_fetch(1),
+ m_mega_fetch_count(16),
+ m_buffer_id(buffer_id),
+ m_semantic_id(0),
+ m_buffer_index_mode(cp_rel),
+ m_flags(0),
+ m_uncached(false),
+ m_indexed(false),
+ m_array_base(0),
+ m_array_size(0),
+ m_elm_size(0),
+ m_buffer_offset(buffer_offset),
+ m_dest_swizzle({0,1,2,3})
+{
+ if (use_const_field) {
+ m_flags.set(vtx_use_const_field);
+ m_data_format = fmt_invalid;
+ m_num_format = vtx_nf_norm;
+ } else {
+ m_flags.set(vtx_format_comp_signed);
+ m_data_format = fmt_32_32_32_32_float;
+ m_num_format = vtx_nf_scaled;
+ }
+
+}
+
+/* Resource query */
+FetchInstruction::FetchInstruction(EVFetchInstr vc_opcode,
+ EVFetchType fetch_type,
+ EVTXDataFormat data_format,
+ EVFetchNumFormat num_format,
+ EVFetchEndianSwap endian_swap,
+ const PValue src,
+ const GPRVector dst,
+ uint32_t offset,
+ bool is_mega_fetch,
+ uint32_t mega_fetch_count,
+ uint32_t buffer_id,
+ uint32_t semantic_id,
+
+ EBufferIndexMode buffer_index_mode,
+ bool uncached,
+ bool indexed,
+ int array_base,
+ int array_size,
+ int elm_size,
+ PValue buffer_offset,
+ const std::array<int, 4>& dest_swizzle):
+ Instruction(vtx),
+ m_vc_opcode(vc_opcode),
+ m_fetch_type(fetch_type),
+ m_data_format(data_format),
+ m_num_format(num_format),
+ m_endian_swap(endian_swap),
+ m_src(src),
+ m_dst(dst),
+ m_offset(offset),
+ m_is_mega_fetch(is_mega_fetch),
+ m_mega_fetch_count(mega_fetch_count),
+ m_buffer_id(buffer_id),
+ m_semantic_id(semantic_id),
+ m_buffer_index_mode(buffer_index_mode),
+ m_uncached(uncached),
+ m_indexed(indexed),
+ m_array_base(array_base),
+ m_array_size(array_size),
+ m_elm_size(elm_size),
+ m_buffer_offset(buffer_offset),
+ m_dest_swizzle(dest_swizzle)
+{
+}
+
+FetchInstruction::FetchInstruction(GPRVector dst,
+ PValue src,
+ int buffer_id, PValue buffer_offset,
+ EVTXDataFormat format,
+ EVFetchNumFormat num_format):
+ Instruction(vtx),
+ m_vc_opcode(vc_fetch),
+ m_fetch_type(no_index_offset),
+ m_data_format(format),
+ m_num_format(num_format),
+ m_endian_swap(vtx_es_none),
+ m_src(src),
+ m_dst(dst),
+ m_offset(0),
+ m_is_mega_fetch(0),
+ m_mega_fetch_count(0),
+ m_buffer_id(buffer_id),
+ m_semantic_id(0),
+ m_buffer_index_mode(bim_none),
+ m_flags(0),
+ m_uncached(false),
+ m_indexed(false),
+ m_array_base(0),
+ m_array_size(0),
+ m_elm_size(1),
+ m_buffer_offset(buffer_offset),
+ m_dest_swizzle({0,1,2,3})
+{
+ m_flags.set(vtx_format_comp_signed);
+}
+
+
+/* Resource query */
+FetchInstruction::FetchInstruction(GPRVector dst,
+ PValue src,
+ int buffer_id,
+ EBufferIndexMode cp_rel):
+ Instruction(vtx),
+ m_vc_opcode(vc_get_buf_resinfo),
+ m_fetch_type(no_index_offset),
+ m_data_format(fmt_32_32_32_32),
+ m_num_format(vtx_nf_norm),
+ m_endian_swap(vtx_es_none),
+ m_src(src),
+ m_dst(dst),
+ m_offset(0),
+ m_is_mega_fetch(0),
+ m_mega_fetch_count(16),
+ m_buffer_id(buffer_id),
+ m_semantic_id(0),
+ m_buffer_index_mode(cp_rel),
+ m_flags(0),
+ m_uncached(false),
+ m_indexed(false),
+ m_array_base(0),
+ m_array_size(0),
+ m_elm_size(0),
+ m_dest_swizzle({0,1,2,3})
+{
+ m_flags.set(vtx_format_comp_signed);
+}
+
+FetchInstruction::FetchInstruction(GPRVector dst, PValue src, int scratch_size):
+ Instruction(vtx),
+ m_vc_opcode(vc_read_scratch),
+ m_fetch_type(vertex_data),
+ m_data_format(fmt_32_32_32_32),
+ m_num_format(vtx_nf_int),
+ m_endian_swap(vtx_es_none),
+ m_dst(dst),
+ m_offset(0),
+ m_is_mega_fetch(0),
+ m_mega_fetch_count(16),
+ m_buffer_id(0),
+ m_semantic_id(0),
+ m_buffer_index_mode(bim_none),
+ m_flags(0),
+ m_uncached(true),
+ m_array_base(0),
+ m_array_size(0),
+ m_elm_size(3),
+ m_dest_swizzle({0,1,2,3})
+{
+ if (src->type() == Value::literal) {
+ const auto& lv = dynamic_cast<const LiteralValue&>(*src);
+ m_array_base = lv.value();
+ m_indexed = false;
+ m_src.reset(new GPRValue(0,0));
+ m_array_size = 0;
+ } else {
+ m_array_base = 0;
+ m_src = src;
+ m_indexed = true;
+ m_array_size = scratch_size - 1;
+ }
+}
+
+
+bool FetchInstruction::is_equal_to(const Instruction& lhs) const
+{
+ auto& l = static_cast<const FetchInstruction&>(lhs);
+ if (m_src) {
+ if (!l.m_src)
+ return false;
+ if (*m_src != *l.m_src)
+ return false;
+ } else {
+ if (l.m_src)
+ return false;
+ }
+
+ return m_vc_opcode == l.m_vc_opcode &&
+ m_fetch_type == l.m_fetch_type &&
+ m_data_format == l.m_data_format &&
+ m_num_format == l.m_num_format &&
+ m_endian_swap == l.m_endian_swap &&
+ m_dst == l.m_dst &&
+ m_offset == l.m_offset &&
+ m_buffer_id == l.m_buffer_id &&
+ m_semantic_id == l.m_semantic_id &&
+ m_buffer_index_mode == l.m_buffer_index_mode &&
+ m_flags == l.m_flags &&
+ m_indexed == l.m_indexed &&
+ m_uncached == l.m_uncached;
+}
+
+void FetchInstruction::set_format(EVTXDataFormat fmt)
+{
+ m_data_format = fmt;
+}
+
+
+void FetchInstruction::set_dest_swizzle(const std::array<int,4>& swz)
+{
+ m_dest_swizzle = swz;
+}
+
+void FetchInstruction::prelude_append(Instruction *instr)
+{
+ assert(instr);
+ m_prelude.push_back(PInstruction(instr));
+}
+
+const std::vector<PInstruction>& FetchInstruction::prelude() const
+{
+ return m_prelude;
+}
+
+static const char *fmt_descr[64] = {
+ "INVALID",
+ "8",
+ "4_4",
+ "3_3_2",
+ "RESERVED_4",
+ "16",
+ "16F",
+ "8_8",
+ "5_6_5",
+ "6_5_5",
+ "1_5_5_5",
+ "4_4_4_4",
+ "5_5_5_1",
+ "32",
+ "32F",
+ "16_16",
+ "16_16F",
+ "8_24",
+ "8_24F",
+ "24_8",
+ "24_8F",
+ "10_11_11",
+ "10_11_11F",
+ "11_11_10",
+ "11_11_10F",
+ "2_10_10_10",
+ "8_8_8_8",
+ "10_10_10_2",
+ "X24_8_32F",
+ "32_32",
+ "32_32F",
+ "16_16_16_16",
+ "16_16_16_16F",
+ "RESERVED_33",
+ "32_32_32_32",
+ "32_32_32_32F",
+ "RESERVED_36",
+ "1",
+ "1_REVERSED",
+ "GB_GR",
+ "BG_RG",
+ "32_AS_8",
+ "32_AS_8_8",
+ "5_9_9_9_SHAREDEXP",
+ "8_8_8",
+ "16_16_16",
+ "16_16_16F",
+ "32_32_32",
+ "32_32_32F",
+ "BC1",
+ "BC2",
+ "BC3",
+ "BC4",
+ "BC5",
+ "APC0",
+ "APC1",
+ "APC2",
+ "APC3",
+ "APC4",
+ "APC5",
+ "APC6",
+ "APC7",
+ "CTX1",
+ "RESERVED_63"
+};
+
+
+void FetchInstruction::do_print(std::ostream& os) const
+{
+ static const std::string num_format_char[] = {"norm", "int", "scaled"};
+ static const std::string endian_swap_code[] = {
+ "noswap", "8in16", "8in32"
+ };
+ static const char buffer_index_mode_char[] = "_01E";
+ static const char *flag_string[] = {"WQM", "CF", "signed", "no_zero",
+ "nostride", "AC"};
+ switch (m_vc_opcode) {
+ case vc_fetch:
+ os << "Fetch " << m_dst;
+ break;
+ case vc_semantic:
+ os << "Fetch Semantic ID:" << m_semantic_id;
+ break;
+ case vc_get_buf_resinfo:
+ os << "Fetch BufResinfo:" << m_dst;
+ break;
+ case vc_read_scratch:
+ os << "MEM_READ_SCRATCH:" << m_dst;
+ break;
+ default:
+ os << "Fetch ERROR";
+ return;
+ }
+
+ os << ", " << *m_src;
+
+ if (m_offset)
+ os << "+" << m_offset;
+
+ os << " BUFID:" << m_buffer_id
+ << " FMT:(" << fmt_descr[m_data_format]
+ << " " << num_format_char[m_num_format]
+ << " " << endian_swap_code[m_endian_swap]
+ << ")";
+ if (m_buffer_index_mode > 0)
+ os << " IndexMode:" << buffer_index_mode_char[m_buffer_index_mode];
+
+
+ if (m_is_mega_fetch)
+ os << " MFC:" << m_mega_fetch_count;
+ else
+ os << " mfc*:" << m_mega_fetch_count;
+
+ if (m_flags.any()) {
+ os << " Flags:";
+ for( int i = 0; i < vtx_unknwon; ++i) {
+ if (m_flags.test(i))
+ os << ' ' << flag_string[i];
+ }
+ }
+}
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_INSTRUCTION_FETCH_H
+#define SFN_INSTRUCTION_FETCH_H
+
+#include "sfn_instruction_base.h"
+
+namespace r600 {
+
+class FetchInstruction : public Instruction {
+public:
+
+ FetchInstruction(EVFetchInstr vc_opcode,
+ EVFetchType fetch_type,
+ EVTXDataFormat data_format,
+ EVFetchNumFormat num_format,
+ EVFetchEndianSwap endian_swap,
+ const PValue src,
+ const GPRVector dst,
+ uint32_t offset,
+ bool is_mega_fetch,
+ uint32_t mega_fetch_count,
+ uint32_t buffer_id,
+ uint32_t semantic_id,
+
+ EBufferIndexMode buffer_index_mode,
+ bool uncached,
+ bool indexed,
+ int array_base,
+ int array_size,
+ int elm_size,
+ PValue buffer_offset,
+ const std::array<int, 4>& dest_swizzle);
+
+ FetchInstruction(EVFetchInstr op,
+ EVFetchType type,
+ GPRVector dst,
+ PValue src, int offset,
+ int buffer_id, PValue buffer_offset,
+ EBufferIndexMode cp_rel,
+ bool use_const_field = false);
+
+ FetchInstruction(GPRVector dst,
+ PValue src,
+ int buffer_id,
+ PValue buffer_offset,
+ EVTXDataFormat format,
+ EVFetchNumFormat num_format);
+
+ FetchInstruction(GPRVector dst,
+ PValue src,
+ int buffer_id,
+ EBufferIndexMode cp_rel);
+
+ FetchInstruction(GPRVector dst, PValue src, int scratch_size);
+
+ EVFetchInstr vc_opcode() const { return m_vc_opcode;}
+ EVFetchType fetch_type() const { return m_fetch_type;}
+
+ EVTXDataFormat data_format() const { return m_data_format;}
+ EVFetchNumFormat num_format() const { return m_num_format;}
+ EVFetchEndianSwap endian_swap() const { return m_endian_swap;}
+
+ const Value& src() const { return *m_src;}
+ const GPRVector& dst() const { return m_dst;}
+ uint32_t offset() const { return m_offset;}
+
+ bool is_mega_fetchconst() { return m_is_mega_fetch;}
+ uint32_t mega_fetch_count() const { return m_mega_fetch_count;}
+
+ uint32_t buffer_id() const { return m_buffer_id;}
+ uint32_t semantic_id() const { return m_semantic_id;}
+ EBufferIndexMode buffer_index_mode() const{ return m_buffer_index_mode;}
+
+ bool is_signed() const { return m_flags.test(vtx_format_comp_signed);}
+ bool use_const_fields() const { return m_flags.test(vtx_use_const_field);}
+
+ bool srf_mode_no_zero() const { return m_flags.test(vtx_srf_mode);}
+
+ void set_flag(EVFetchFlagShift flag) {m_flags.set(flag);}
+
+ bool uncached() const {return m_uncached; }
+ bool indexed() const {return m_indexed; }
+ int array_base()const {return m_array_base; }
+ int array_size() const {return m_array_size; }
+ int elm_size() const {return m_elm_size; }
+
+ void set_buffer_offset(PValue buffer_offset) {
+ m_buffer_offset = buffer_offset;
+ }
+ PValue buffer_offset() const { return m_buffer_offset; }
+
+ void set_dest_swizzle(const std::array<int,4>& swz);
+ void set_format(EVTXDataFormat fmt);
+
+ int swz(int idx) const { return m_dest_swizzle[idx];}
+
+ bool use_tc() const {return m_flags.test(vtx_use_tc);}
+
+ bool use_vpm() const {return m_flags.test(vtx_vpm);}
+
+ void prelude_append(Instruction *instr);
+
+ const std::vector<PInstruction>& prelude() const;
+
+ bool has_prelude() const {return !m_prelude.empty();}
+
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+
+ EVFetchInstr m_vc_opcode;
+ EVFetchType m_fetch_type;
+
+ EVTXDataFormat m_data_format;
+ EVFetchNumFormat m_num_format;
+ EVFetchEndianSwap m_endian_swap;
+
+ PValue m_src;
+ GPRVector m_dst;
+ uint32_t m_offset;
+
+ bool m_is_mega_fetch;
+ uint32_t m_mega_fetch_count;
+
+ uint32_t m_buffer_id;
+ uint32_t m_semantic_id;
+
+ EBufferIndexMode m_buffer_index_mode;
+ std::bitset<16> m_flags;
+ bool m_uncached;
+ bool m_indexed;
+ int m_array_base;
+ int m_array_size;
+ int m_elm_size;
+ PValue m_buffer_offset;
+ std::array<int, 4> m_dest_swizzle;
+ std::vector<PInstruction> m_prelude;
+};
+
+}
+
+#endif // SFN_INSTRUCTION_FETCH_H
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_instruction_tex.h"
+#include "nir_builder.h"
+#include "nir_builtin_builder.h"
+
+namespace r600 {
+
+TexInstruction::TexInstruction(Opcode op, const GPRVector &dest, const GPRVector &src,
+ unsigned sid, unsigned rid, PValue sampler_offset):
+ Instruction(tex),
+ m_opcode(op),
+ m_dst(dest),
+ m_src(src),
+ m_sampler_id(sid),
+ m_resource_id(rid),
+ m_flags(0),
+ m_inst_mode(0),
+ m_dest_swizzle{0,1,2,3},
+ m_sampler_offset(sampler_offset)
+
+{
+ memset(m_offset, 0, sizeof (m_offset));
+}
+
+void TexInstruction::set_gather_comp(int cmp)
+{
+ m_inst_mode = cmp;
+}
+
+void TexInstruction::set_offset(unsigned index, int32_t val)
+{
+ assert(index < 3);
+ m_offset[index] = val;
+}
+
+int TexInstruction::get_offset(unsigned index) const
+{
+ assert(index < 3);
+ return (m_offset[index] << 1 & 0x1f);
+}
+
+bool TexInstruction::is_equal_to(const Instruction& rhs) const
+{
+ assert(rhs.type() == tex);
+ const auto& r = static_cast<const TexInstruction&>(rhs);
+ return (m_opcode == r.m_opcode &&
+ m_dst == r.m_dst &&
+ m_src == r.m_src &&
+ m_sampler_id == r.m_sampler_id &&
+ m_resource_id == r.m_resource_id);
+}
+
+void TexInstruction::do_print(std::ostream& os) const
+{
+ const char *map_swz = "xyzw01?_";
+ os << opname(m_opcode) << " R" << m_dst.sel() << ".";
+ for (int i = 0; i < 4; ++i)
+ os << map_swz[m_dest_swizzle[i]];
+
+ os << " " << m_src
+ << " RESID:" << m_resource_id << " SAMPLER:"
+ << m_sampler_id;
+}
+
+const char *TexInstruction::opname(Opcode op)
+{
+ switch (op) {
+ case ld: return "LD";
+ case get_resinfo: return "GET_TEXTURE_RESINFO";
+ case get_nsampled: return "GET_NUMBER_OF_SAMPLES";
+ case get_tex_lod: return "GET_LOD";
+ case get_gradient_h: return "GET_GRADIENTS_H";
+ case get_gradient_v: return "GET_GRADIENTS_V";
+ case set_offsets: return "SET_TEXTURE_OFFSETS";
+ case keep_gradients: return "KEEP_GRADIENTS";
+ case set_gradient_h: return "SET_GRADIENTS_H";
+ case set_gradient_v: return "SET_GRADIENTS_V";
+ case sample: return "SAMPLE";
+ case sample_l: return "SAMPLE_L";
+ case sample_lb: return "SAMPLE_LB";
+ case sample_lz: return "SAMPLE_LZ";
+ case sample_g: return "SAMPLE_G";
+ case sample_g_lb: return "SAMPLE_G_L";
+ case gather4: return "GATHER4";
+ case gather4_o: return "GATHER4_O";
+ case sample_c: return "SAMPLE_C";
+ case sample_c_l: return "SAMPLE_C_L";
+ case sample_c_lb: return "SAMPLE_C_LB";
+ case sample_c_lz: return "SAMPLE_C_LZ";
+ case sample_c_g: return "SAMPLE_C_G";
+ case sample_c_g_lb: return "SAMPLE_C_G_L";
+ case gather4_c: return "GATHER4_C";
+ case gather4_c_o: return "OP_GATHER4_C_O";
+ }
+ return "ERROR";
+}
+
+
+
+static bool lower_coord_shift_normalized(nir_builder& b, nir_tex_instr *tex)
+{
+ b.cursor = nir_before_instr(&tex->instr);
+
+ nir_ssa_def * size = nir_i2f32(&b, nir_get_texture_size(&b, tex));
+ nir_ssa_def *scale = nir_frcp(&b, size);
+
+ int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
+ nir_ssa_def *corr = nir_fadd(&b,
+ nir_fmul(&b, nir_imm_float(&b, -0.5f), scale),
+ tex->src[coord_index].src.ssa);
+ nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
+ nir_src_for_ssa(corr));
+ return true;
+}
+
+static bool lower_coord_shift_unnormalized(nir_builder& b, nir_tex_instr *tex)
+{
+ b.cursor = nir_before_instr(&tex->instr);
+ int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
+ nir_ssa_def *corr = nir_fadd(&b, tex->src[coord_index].src.ssa,
+ nir_imm_float(&b, -0.5f));
+ nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
+ nir_src_for_ssa(corr));
+ return true;
+}
+
+static bool
+r600_nir_lower_int_tg4_impl(nir_function_impl *impl, const std::vector<bool>& lower)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ bool progress = false;
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type == nir_instr_type_tex) {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ if (tex->op == nir_texop_tg4 &&
+ tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) {
+ if (lower[tex->sampler_index]) {
+ if (tex->sampler_dim != GLSL_SAMPLER_DIM_RECT)
+ lower_coord_shift_normalized(b, tex);
+ else
+ lower_coord_shift_unnormalized(b, tex);
+ progress = true;
+ }
+ }
+ }
+ }
+ }
+ return progress;
+}
+
+/*
+ * This lowering pass works around a bug in r600 when doing TG4 from
+ * integral valued samplers.
+
+ * Gather4 should follow the same rules as bilinear filtering, but the hardware
+ * incorrectly forces nearest filtering if the texture format is integer.
+ * The only effect it has on Gather4, which always returns 4 texels for
+ * bilinear filtering, is that the final coordinates are off by 0.5 of
+ * the texel size.
+*/
+
+bool r600_nir_lower_int_tg4(nir_shader *shader)
+{
+ bool progress = false;
+ bool need_lowering = false;
+
+ int i = 0;
+
+ std::vector<bool> lower_sampler(shader->uniforms.length(), false);
+ auto is = lower_sampler.begin();
+
+ nir_foreach_variable(var, &shader->uniforms) {
+ if (var->type->is_sampler()) {
+ if (glsl_base_type_is_integer(var->type->sampled_type)) {
+ need_lowering = *is = true;
+ }
+ ++i;
+ ++is;
+ }
+ }
+
+ if (need_lowering) {
+ nir_foreach_function(function, shader) {
+ if (function->impl && r600_nir_lower_int_tg4_impl(function->impl, lower_sampler))
+ progress = true;
+ }
+ }
+
+ return progress;
+}
+
+static
+bool lower_txl_txf_array_or_cube(nir_builder *b, nir_tex_instr *tex)
+{
+ assert(tex->op == nir_texop_txb || tex->op == nir_texop_txl);
+ assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
+ assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
+
+ b->cursor = nir_before_instr(&tex->instr);
+
+ int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
+ int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
+ int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
+ assert (lod_idx >= 0 || bias_idx >= 0);
+
+ nir_ssa_def *size = nir_get_texture_size(b, tex);
+ nir_ssa_def *lod = (lod_idx >= 0) ?
+ nir_ssa_for_src(b, tex->src[lod_idx].src, 1) :
+ nir_get_texture_lod(b, tex);
+
+ if (bias_idx >= 0)
+ lod = nir_fadd(b, lod,nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
+
+ if (min_lod_idx >= 0)
+ lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
+
+ /* max lod? */
+
+ nir_ssa_def *lambda_exp = nir_fexp2(b, lod);
+ nir_ssa_def *scale = NULL;
+
+ if (tex->is_array) {
+ int cmp_mask = (1 << (size->num_components - 1)) - 1;
+ scale = nir_frcp(b, nir_channels(b, size,
+ (nir_component_mask_t)cmp_mask));
+ } else if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+ unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0};
+ scale = nir_frcp(b, nir_channels(b, size, 1));
+ scale = nir_swizzle(b, scale, swizzle, 3);
+ }
+
+ nir_ssa_def *grad = nir_fmul(b, lambda_exp, scale);
+
+ if (lod_idx >= 0)
+ nir_tex_instr_remove_src(tex, lod_idx);
+ if (bias_idx >= 0)
+ nir_tex_instr_remove_src(tex, bias_idx);
+ if (min_lod_idx >= 0)
+ nir_tex_instr_remove_src(tex, min_lod_idx);
+ nir_tex_instr_add_src(tex, nir_tex_src_ddx, nir_src_for_ssa(grad));
+ nir_tex_instr_add_src(tex, nir_tex_src_ddy, nir_src_for_ssa(grad));
+
+ tex->op = nir_texop_txd;
+ return true;
+}
+
+
+static bool
+r600_nir_lower_txl_txf_array_or_cube_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ bool progress = false;
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type == nir_instr_type_tex) {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+
+ if (tex->is_shadow &&
+ (tex->op == nir_texop_txl || tex->op == nir_texop_txb) &&
+ (tex->is_array || tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE))
+ progress |= lower_txl_txf_array_or_cube(&b, tex);
+ }
+ }
+ }
+ return progress;
+}
+
+bool
+r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader)
+{
+ bool progress = false;
+ nir_foreach_function(function, shader) {
+ if (function->impl && r600_nir_lower_txl_txf_array_or_cube_impl(function->impl))
+ progress = true;
+ }
+ return progress;
+}
+
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef INSTRUCTION_TEX_H
+#define INSTRUCTION_TEX_H
+
+#include "sfn_instruction_base.h"
+
+namespace r600 {
+
+class TexInstruction : public Instruction {
+public:
+ enum Opcode {
+ ld = FETCH_OP_LD,
+ get_resinfo = FETCH_OP_GET_TEXTURE_RESINFO,
+ get_nsampled = FETCH_OP_GET_NUMBER_OF_SAMPLES,
+ get_tex_lod = FETCH_OP_GET_LOD,
+ get_gradient_h = FETCH_OP_GET_GRADIENTS_H,
+ get_gradient_v = FETCH_OP_GET_GRADIENTS_V,
+ set_offsets = FETCH_OP_SET_TEXTURE_OFFSETS,
+ keep_gradients = FETCH_OP_KEEP_GRADIENTS,
+ set_gradient_h = FETCH_OP_SET_GRADIENTS_H,
+ set_gradient_v = FETCH_OP_SET_GRADIENTS_V,
+ sample = FETCH_OP_SAMPLE,
+ sample_l = FETCH_OP_SAMPLE_L,
+ sample_lb = FETCH_OP_SAMPLE_LB,
+ sample_lz = FETCH_OP_SAMPLE_LZ,
+ sample_g = FETCH_OP_SAMPLE_G,
+ sample_g_lb = FETCH_OP_SAMPLE_G_L,
+ gather4 = FETCH_OP_GATHER4,
+ gather4_o = FETCH_OP_GATHER4_O,
+
+ sample_c = FETCH_OP_SAMPLE_C,
+ sample_c_l = FETCH_OP_SAMPLE_C_L,
+ sample_c_lb = FETCH_OP_SAMPLE_C_LB,
+ sample_c_lz = FETCH_OP_SAMPLE_C_LZ,
+ sample_c_g = FETCH_OP_SAMPLE_C_G,
+ sample_c_g_lb = FETCH_OP_SAMPLE_C_G_L,
+ gather4_c = FETCH_OP_GATHER4_C,
+ gather4_c_o = FETCH_OP_GATHER4_C_O,
+
+ };
+
+ enum Flags {
+ x_unnormalized,
+ y_unnormalized,
+ z_unnormalized,
+ w_unnormalized,
+ grad_fine
+ };
+
+ TexInstruction(Opcode op, const GPRVector& dest, const GPRVector& src, unsigned sid,
+ unsigned rid, PValue sampler_offset);
+
+ const GPRVector& src() const {return m_src;}
+ const GPRVector& dst() const {return m_dst;}
+ unsigned opcode() const {return m_opcode;}
+ unsigned sampler_id() const {return m_sampler_id;}
+ unsigned resource_id() const {return m_resource_id;}
+
+ void set_offset(unsigned index, int32_t val);
+ int get_offset(unsigned index) const;
+
+ void set_inst_mode(int inst_mode) { m_inst_mode = inst_mode;}
+
+ int inst_mode() const { return m_inst_mode;}
+
+ void set_flag(Flags flag) {
+ m_flags.set(flag);
+ }
+
+ PValue sampler_offset() const {
+ return m_sampler_offset;
+ }
+
+ bool has_flag(Flags flag) const {
+ return m_flags.test(flag);
+ }
+
+ int dest_swizzle(int i) const {
+ assert(i < 4);
+ return m_dest_swizzle[i];
+ }
+
+ void set_dest_swizzle(const std::array<int,4>& swz) {
+ m_dest_swizzle = swz;
+ }
+
+ void set_gather_comp(int cmp);
+
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+
+ static const char *opname(Opcode code);
+
+ Opcode m_opcode;
+ GPRVector m_dst;
+ GPRVector m_src;
+ unsigned m_sampler_id;
+ unsigned m_resource_id;
+ std::bitset<8> m_flags;
+ int m_offset[3];
+ int m_inst_mode;
+ std::array<int,4> m_dest_swizzle;
+ PValue m_sampler_offset;
+};
+
+bool r600_nir_lower_int_tg4(nir_shader *nir);
+bool r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader);
+
+}
+
+#endif // INSTRUCTION_TEX_H
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_ir_to_assembly.h"
+#include "sfn_conditionaljumptracker.h"
+#include "sfn_callstack.h"
+#include "sfn_instruction_fetch.h"
+
+#include "../r600_shader.h"
+#include "../r600_sq.h"
+
+namespace r600 {
+
+using std::vector;
+
+struct AssemblyFromShaderLegacyImpl {
+
+ AssemblyFromShaderLegacyImpl(r600_shader *sh, r600_shader_key *key);
+ bool emit(const Instruction::Pointer i);
+ void reset_addr_register() {m_last_addr.reset();}
+
+private:
+ bool emit_alu(const AluInstruction& ai, ECFAluOpCode cf_op);
+ bool emit_export(const ExportInstruction & exi);
+ bool emit_streamout(const StreamOutIntruction& instr);
+ bool emit_tex(const TexInstruction & tex_instr);
+ bool emit_vtx(const FetchInstruction& fetch_instr);
+ bool emit_if_start(const IfInstruction & if_instr);
+ bool emit_else(const ElseInstruction & else_instr);
+ bool emit_endif(const IfElseEndInstruction & endif_instr);
+
+ bool emit_loop_begin(const LoopBeginInstruction& instr);
+ bool emit_loop_end(const LoopEndInstruction& instr);
+ bool emit_loop_break(const LoopBreakInstruction& instr);
+ bool emit_loop_continue(const LoopContInstruction& instr);
+
+ bool emit_load_addr(PValue addr);
+ bool emit_fs_pixel_export(const ExportInstruction & exi);
+ bool emit_vs_pos_export(const ExportInstruction & exi);
+ bool emit_vs_param_export(const ExportInstruction & exi);
+ bool copy_dst(r600_bytecode_alu_dst& dst, const Value& src);
+ bool copy_src(r600_bytecode_alu_src& src, const Value& s);
+
+ ConditionalJumpTracker m_jump_tracker;
+ CallStack m_callstack;
+
+public:
+ r600_bytecode *m_bc;
+ r600_shader *m_shader;
+ r600_shader_key *m_key;
+ r600_bytecode_output m_output;
+ unsigned m_max_color_exports;
+ bool has_pos_output;
+ bool has_param_output;
+ PValue m_last_addr;
+ int m_loop_nesting;
+ int m_nliterals_in_group;
+};
+
+
+AssemblyFromShaderLegacy::AssemblyFromShaderLegacy(struct r600_shader *sh,
+ r600_shader_key *key)
+{
+ impl = new AssemblyFromShaderLegacyImpl(sh, key);
+}
+
+AssemblyFromShaderLegacy::~AssemblyFromShaderLegacy()
+{
+ delete impl;
+}
+
+bool AssemblyFromShaderLegacy::do_lower(const std::vector<Instruction::Pointer>& ir)
+{
+ if (impl->m_shader->processor_type == PIPE_SHADER_VERTEX &&
+ impl->m_shader->ninput > 0)
+ r600_bytecode_add_cfinst(impl->m_bc, CF_OP_CALL_FS);
+
+
+ std::vector<Instruction::Pointer> exports;
+
+ for (const auto& i : ir) {
+ if (!impl->emit(i))
+ return false;
+ if (i->type() != Instruction::alu)
+ impl->reset_addr_register();
+ }
+ /*
+ for (const auto& i : exports) {
+ if (!impl->emit_export(static_cast<const ExportInstruction&>(*i)))
+ return false;
+ }*/
+
+
+ const struct cf_op_info *last = nullptr;
+ if (impl->m_bc->cf_last)
+ last = r600_isa_cf(impl->m_bc->cf_last->op);
+
+ /* alu clause instructions don't have EOP bit, so add NOP */
+ if (!last || last->flags & CF_ALU || impl->m_bc->cf_last->op == CF_OP_LOOP_END
+ || impl->m_bc->cf_last->op == CF_OP_POP)
+ r600_bytecode_add_cfinst(impl->m_bc, CF_OP_NOP);
+
+ /* A fetch shader only can't be EOP (results in hang), but we can replace it
+ * by a NOP */
+ else if (impl->m_bc->cf_last->op == CF_OP_CALL_FS)
+ impl->m_bc->cf_last->op = CF_OP_NOP;
+
+ impl->m_bc->cf_last->end_of_program = 1;
+
+ return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::emit(const Instruction::Pointer i)
+{
+ sfn_log << SfnLog::assembly << "Emit from '" << *i << "\n";
+ switch (i->type()) {
+ case Instruction::alu:
+ return emit_alu(static_cast<const AluInstruction&>(*i), cf_alu_undefined);
+ case Instruction::exprt:
+ return emit_export(static_cast<const ExportInstruction&>(*i));
+ case Instruction::tex:
+ return emit_tex(static_cast<const TexInstruction&>(*i));
+ case Instruction::vtx:
+ return emit_vtx(static_cast<const FetchInstruction&>(*i));
+ case Instruction::cond_if:
+ return emit_if_start(static_cast<const IfInstruction&>(*i));
+ case Instruction::cond_else:
+ return emit_else(static_cast<const ElseInstruction&>(*i));
+ case Instruction::cond_endif:
+ return emit_endif(static_cast<const IfElseEndInstruction&>(*i));
+ case Instruction::loop_begin:
+ return emit_loop_begin(static_cast<const LoopBeginInstruction&>(*i));
+ case Instruction::loop_end:
+ return emit_loop_end(static_cast<const LoopEndInstruction&>(*i));
+ case Instruction::loop_break:
+ return emit_loop_break(static_cast<const LoopBreakInstruction&>(*i));
+ case Instruction::loop_continue:
+ return emit_loop_continue(static_cast<const LoopContInstruction&>(*i));
+ case Instruction::streamout:
+ return emit_streamout(static_cast<const StreamOutIntruction&>(*i));
+ default:
+ return false;
+ }
+}
+
+AssemblyFromShaderLegacyImpl::AssemblyFromShaderLegacyImpl(r600_shader *sh,
+ r600_shader_key *key):
+ m_callstack(sh->bc),
+ m_bc(&sh->bc),
+ m_shader(sh),
+ m_key(key),
+ has_pos_output(false),
+ has_param_output(false),
+ m_loop_nesting(0),
+ m_nliterals_in_group(0)
+{
+ m_max_color_exports = MAX2(m_key->ps.nr_cbufs, 1);
+}
+
+extern const std::map<EAluOp, int> opcode_map;
+
+bool AssemblyFromShaderLegacyImpl::emit_load_addr(PValue addr)
+{
+ m_bc->ar_reg = addr->sel();
+ m_bc->ar_chan = addr->chan();
+ m_bc->ar_loaded = 0;
+ m_last_addr = addr;
+
+ sfn_log << SfnLog::assembly << " Prepare " << *addr << " to address register\n";
+
+ return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::emit_alu(const AluInstruction& ai, ECFAluOpCode cf_op)
+{
+
+ struct r600_bytecode_alu alu;
+ memset(&alu, 0, sizeof(alu));
+ PValue addr_in_use;
+
+ if (opcode_map.find(ai.opcode()) == opcode_map.end()) {
+ std::cerr << "Opcode not handled for " << ai <<"\n";
+ return false;
+ }
+
+ for (unsigned i = 0; i < ai.n_sources(); ++i) {
+ auto& s = ai.src(i);
+ if (s.type() == Value::literal)
+ ++m_nliterals_in_group;
+ }
+
+ /* This instruction group would exeed the limit of literals, so
+ * force a new instruction group by adding a NOP as last
+ * instruction. This will no loner be needed with a real
+ * scheduler */
+ if (m_nliterals_in_group > 4) {
+ sfn_log << SfnLog::assembly << " Have " << m_nliterals_in_group << " inject a last op (nop)\n";
+ alu.op = op0_nop;
+ alu.last = 1;
+ int retval = r600_bytecode_add_alu(m_bc, &alu);
+ if (retval)
+ return false;
+ memset(&alu, 0, sizeof(alu));
+ m_nliterals_in_group = 0;
+ }
+
+ alu.op = opcode_map.at(ai.opcode());
+
+ /* Missing test whether ai actually has a dest */
+ auto dst = ai.dest();
+
+ if (dst) {
+ if (!copy_dst(alu.dst, *dst))
+ return false;
+
+ alu.dst.write = ai.flag(alu_write);
+ alu.dst.clamp = ai.flag(alu_dst_clamp);
+
+ if (dst->type() == Value::gpr_array_value) {
+ auto& v = static_cast<const GPRArrayValue&>(*dst);
+ PValue addr = v.indirect();
+ if (addr) {
+ if (!m_last_addr || *addr != *m_last_addr) {
+ emit_load_addr(addr);
+ addr_in_use = addr;
+ }
+ alu.dst.rel = addr ? 1 : 0;;
+ }
+ }
+ }
+
+ alu.is_op3 = ai.n_sources() == 3;
+
+ for (unsigned i = 0; i < ai.n_sources(); ++i) {
+ auto& s = ai.src(i);
+
+ if (!copy_src(alu.src[i], s))
+ return false;
+ alu.src[i].neg = ai.flag(AluInstruction::src_neg_flags[i]);
+
+ if (s.type() == Value::gpr_array_value) {
+ auto& v = static_cast<const GPRArrayValue&>(s);
+ PValue addr = v.indirect();
+ if (addr) {
+ assert(!addr_in_use || (*addr_in_use == *addr));
+ if (!m_last_addr || *addr != *m_last_addr) {
+ emit_load_addr(addr);
+ addr_in_use = addr;
+ }
+ alu.src[i].rel = addr ? 1 : 0;
+ }
+ }
+ if (!alu.is_op3)
+ alu.src[i].abs = ai.flag(AluInstruction::src_abs_flags[i]);
+ }
+
+ if (ai.bank_swizzle() != alu_vec_unknown)
+ alu.bank_swizzle_force = ai.bank_swizzle();
+
+ alu.last = ai.flag(alu_last_instr);
+ alu.update_pred = ai.flag(alu_update_pred);
+ alu.execute_mask = ai.flag(alu_update_exec);
+
+ /* If the destination register is equal to the last loaded address register
+ * then clear the latter one, because the values will no longer be identical */
+ if (m_last_addr)
+ sfn_log << SfnLog::assembly << " Current address register is " << *m_last_addr << "\n";
+
+ if (dst)
+ sfn_log << SfnLog::assembly << " Current dst register is " << *dst << "\n";
+
+ if (dst && m_last_addr)
+ if (*dst == *m_last_addr) {
+ sfn_log << SfnLog::assembly << " Clear address register (was " << *m_last_addr << "\n";
+ m_last_addr.reset();
+ }
+
+ if (cf_op == cf_alu_undefined)
+ cf_op = ai.cf_type();
+
+ unsigned type = 0;
+ switch (cf_op) {
+ case cf_alu: type = CF_OP_ALU; break;
+ case cf_alu_push_before: type = CF_OP_ALU_PUSH_BEFORE; break;
+ case cf_alu_pop_after: type = CF_OP_ALU_POP_AFTER; break;
+ case cf_alu_pop2_after: type = CF_OP_ALU_POP2_AFTER; break;
+ case cf_alu_break: type = CF_OP_ALU_BREAK; break;
+ case cf_alu_else_after: type = CF_OP_ALU_ELSE_AFTER; break;
+ case cf_alu_continue: type = CF_OP_ALU_CONTINUE; break;
+ case cf_alu_extended: type = CF_OP_ALU_EXT; break;
+ default:
+ assert(0 && "cf_alu_undefined should have been replaced");
+ }
+
+ if (alu.last)
+ m_nliterals_in_group = 0;
+
+ bool retval = !r600_bytecode_add_alu_type(m_bc, &alu, type);
+
+ if (ai.opcode() == op1_mova_int)
+ m_bc->ar_loaded = 0;
+
+ if (ai.opcode() == op1_set_cf_idx0)
+ m_bc->index_loaded[0] = 1;
+
+ if (ai.opcode() == op1_set_cf_idx1)
+ m_bc->index_loaded[1] = 1;
+
+
+ m_bc->force_add_cf |= (ai.opcode() == op2_kille ||
+ ai.opcode() == op2_killne_int ||
+ ai.opcode() == op1_set_cf_idx0 ||
+ ai.opcode() == op1_set_cf_idx1);
+ return retval;
+}
+
+bool AssemblyFromShaderLegacyImpl::emit_vs_pos_export(const ExportInstruction & exi)
+{
+ r600_bytecode_output output;
+ memset(&output, 0, sizeof(output));
+ assert(exi.gpr().type() == Value::gpr_vector);
+ const auto& gpr = exi.gpr();
+ output.gpr = gpr.sel();
+ output.elem_size = 3;
+ output.swizzle_x = gpr.chan_i(0);
+ output.swizzle_y = gpr.chan_i(1);
+ output.swizzle_z = gpr.chan_i(2);
+ output.swizzle_w = gpr.chan_i(3);
+ output.burst_count = 1;
+ output.array_base = 60 + exi.location();
+ output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
+ output.type = exi.export_type();
+
+
+ if (r600_bytecode_add_output(m_bc, &output)) {
+ R600_ERR("Error adding pixel export at location %d\n", exi.location());
+ return false;
+ }
+
+ return true;
+}
+
+
+bool AssemblyFromShaderLegacyImpl::emit_vs_param_export(const ExportInstruction & exi)
+{
+ r600_bytecode_output output;
+ assert(exi.gpr().type() == Value::gpr_vector);
+ const auto& gpr = exi.gpr();
+
+ memset(&output, 0, sizeof(output));
+ output.gpr = gpr.sel();
+ output.elem_size = 3;
+ output.swizzle_x = gpr.chan_i(0);
+ output.swizzle_y = gpr.chan_i(1);
+ output.swizzle_z = gpr.chan_i(2);
+ output.swizzle_w = gpr.chan_i(3);
+ output.burst_count = 1;
+ output.array_base = exi.location();
+ output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
+ output.type = exi.export_type();
+
+
+ if (r600_bytecode_add_output(m_bc, &output)) {
+ R600_ERR("Error adding pixel export at location %d\n", exi.location());
+ return false;
+ }
+
+ return true;
+}
+
+
+bool AssemblyFromShaderLegacyImpl::emit_fs_pixel_export(const ExportInstruction & exi)
+{
+ if (exi.location() >= m_max_color_exports && exi.location() < 60) {
+ R600_ERR("shader_from_nir: ignore pixel export %u, because supported max is %u\n",
+ exi.location(), m_max_color_exports);
+ return true;
+ }
+
+ assert(exi.gpr().type() == Value::gpr_vector);
+ const auto& gpr = exi.gpr();
+
+ r600_bytecode_output output;
+ memset(&output, 0, sizeof(output));
+
+ output.gpr = gpr.sel();
+ output.elem_size = 3;
+ output.swizzle_x = gpr.chan_i(0);
+ output.swizzle_y = gpr.chan_i(1);
+ output.swizzle_z = gpr.chan_i(2);
+ output.swizzle_w = m_key->ps.alpha_to_one ? 5 : gpr.chan_i(3); ;
+ output.burst_count = 1;
+ output.array_base = exi.location();
+ output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
+ output.type = exi.export_type();
+
+
+ if (r600_bytecode_add_output(m_bc, &output)) {
+ R600_ERR("Error adding pixel export at location %d\n", exi.location());
+ return false;
+ }
+
+ return true;
+}
+
+
+bool AssemblyFromShaderLegacyImpl::emit_export(const ExportInstruction & exi)
+{
+ switch (exi.export_type()) {
+ case ExportInstruction::et_pixel:
+ return emit_fs_pixel_export(exi);
+ case ExportInstruction::et_pos:
+ return emit_vs_pos_export(exi);
+ case ExportInstruction::et_param:
+ return emit_vs_param_export(exi);
+ default:
+ R600_ERR("shader_from_nir: export %d type not yet supported\n", exi.export_type());
+ return false;
+ }
+}
+
+bool AssemblyFromShaderLegacyImpl::emit_if_start(const IfInstruction & if_instr)
+{
+ assert(m_bc->chip_class == EVERGREEN);
+
+ bool needs_workaround = false;
+ int elems = m_callstack.push(FC_PUSH_VPM);
+
+ if (m_bc->family != CHIP_HEMLOCK &&
+ m_bc->family != CHIP_CYPRESS &&
+ m_bc->family != CHIP_JUNIPER) {
+ unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size;
+ unsigned dmod2 = (elems) % m_bc->stack.entry_size;
+
+ if (elems && (!dmod1 || !dmod2))
+ needs_workaround = true;
+ }
+
+ auto& pred = if_instr.pred();
+ auto op = cf_alu_push_before;
+
+ if (needs_workaround) {
+ r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH);
+ m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
+ op = cf_alu;
+ }
+ emit_alu(pred, op);
+
+ r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
+
+ m_jump_tracker.push(m_bc->cf_last, jt_if);
+ return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::emit_else(UNUSED const ElseInstruction & else_instr)
+{
+ r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE);
+ m_bc->cf_last->pop_count = 1;
+ return m_jump_tracker.add_mid(m_bc->cf_last, jt_if);
+}
+
+bool AssemblyFromShaderLegacyImpl::emit_endif(UNUSED const IfElseEndInstruction & endif_instr)
+{
+ m_callstack.pop(FC_PUSH_VPM);
+
+ unsigned force_pop = m_bc->force_add_cf;
+ if (!force_pop) {
+ int alu_pop = 3;
+ if (m_bc->cf_last) {
+ if (m_bc->cf_last->op == CF_OP_ALU)
+ alu_pop = 0;
+ else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER)
+ alu_pop = 1;
+ }
+ alu_pop += 1;
+ if (alu_pop == 1) {
+ m_bc->cf_last->op = CF_OP_ALU_POP_AFTER;
+ m_bc->force_add_cf = 1;
+ } else if (alu_pop == 2) {
+ m_bc->cf_last->op = CF_OP_ALU_POP2_AFTER;
+ m_bc->force_add_cf = 1;
+ } else {
+ force_pop = 1;
+ }
+ }
+
+ if (force_pop) {
+ r600_bytecode_add_cfinst(m_bc, CF_OP_POP);
+ m_bc->cf_last->pop_count = 1;
+ m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
+ }
+
+ return m_jump_tracker.pop(m_bc->cf_last, jt_if);
+}
+
+bool AssemblyFromShaderLegacyImpl::emit_loop_begin(UNUSED const LoopBeginInstruction& instr)
+{
+ r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10);
+ m_jump_tracker.push(m_bc->cf_last, jt_loop);
+ m_callstack.push(FC_LOOP);
+ ++m_loop_nesting;
+ return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::emit_loop_end(UNUSED const LoopEndInstruction& instr)
+{
+ r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END);
+ m_callstack.pop(FC_LOOP);
+ assert(m_loop_nesting);
+ --m_loop_nesting;
+ return m_jump_tracker.pop(m_bc->cf_last, jt_loop);
+}
+
+bool AssemblyFromShaderLegacyImpl::emit_loop_break(UNUSED const LoopBreakInstruction& instr)
+{
+ r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK);
+ return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
+}
+
+bool AssemblyFromShaderLegacyImpl::emit_loop_continue(UNUSED const LoopContInstruction &instr)
+{
+ r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE);
+ return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
+}
+
+bool AssemblyFromShaderLegacyImpl::emit_streamout(const StreamOutIntruction& so_instr)
+{
+ struct r600_bytecode_output output;
+ memset(&output, 0, sizeof(struct r600_bytecode_output));
+
+ output.gpr = so_instr.gpr().sel();
+ output.elem_size = so_instr.element_size();
+ output.array_base = so_instr.array_base();
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
+ output.burst_count = so_instr.burst_count();
+ output.array_size = so_instr.array_size();
+ output.comp_mask = so_instr.comp_mask();
+ output.op = so_instr.op();
+
+ assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
+
+
+ if (r600_bytecode_add_output(m_bc, &output)) {
+ R600_ERR("shader_from_nir: Error creating stream output instruction\n");
+ return false;
+ }
+ return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::emit_tex(const TexInstruction & tex_instr)
+{
+ auto addr = tex_instr.sampler_offset();
+ if (addr && (!m_bc->index_loaded[1] || m_loop_nesting
+ || m_bc->index_reg[1] != addr->sel())) {
+ struct r600_bytecode_alu alu;
+ memset(&alu, 0, sizeof(alu));
+ alu.op = opcode_map.at(op1_mova_int);
+ alu.dst.chan = 0;
+ alu.src[0].sel = addr->sel();
+ alu.src[0].chan = addr->chan();
+ alu.last = 1;
+ int r = r600_bytecode_add_alu(m_bc, &alu);
+ if (r)
+ return false;
+
+ m_bc->ar_loaded = 0;
+
+ alu.op = opcode_map.at(op1_set_cf_idx1);
+ alu.dst.chan = 0;
+ alu.src[0].sel = 0;
+ alu.src[0].chan = 0;
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(m_bc, &alu);
+ if (r)
+ return false;
+
+ m_bc->index_reg[1] = addr->sel();
+ m_bc->index_loaded[1] = true;
+ }
+
+ r600_bytecode_tex tex;
+ memset(&tex, 0, sizeof(struct r600_bytecode_tex));
+ tex.op = tex_instr.opcode();
+ tex.sampler_id = tex_instr.sampler_id();
+ tex.sampler_index_mode = 0;
+ tex.resource_id = tex_instr.resource_id();;
+ tex.resource_index_mode = 0;
+ tex.src_gpr = tex_instr.src().sel();
+ tex.dst_gpr = tex_instr.dst().sel();
+ tex.dst_sel_x = tex_instr.dest_swizzle(0);
+ tex.dst_sel_y = tex_instr.dest_swizzle(1);
+ tex.dst_sel_z = tex_instr.dest_swizzle(2);
+ tex.dst_sel_w = tex_instr.dest_swizzle(3);
+ tex.src_sel_x = tex_instr.src().chan_i(0);
+ tex.src_sel_y = tex_instr.src().chan_i(1);
+ tex.src_sel_z = tex_instr.src().chan_i(2);
+ tex.src_sel_w = tex_instr.src().chan_i(3);
+ tex.coord_type_x = !tex_instr.has_flag(TexInstruction::x_unnormalized);
+ tex.coord_type_y = !tex_instr.has_flag(TexInstruction::y_unnormalized);
+ tex.coord_type_z = !tex_instr.has_flag(TexInstruction::z_unnormalized);
+ tex.coord_type_w = !tex_instr.has_flag(TexInstruction::w_unnormalized);
+ tex.offset_x = tex_instr.get_offset(0);
+ tex.offset_y = tex_instr.get_offset(1);
+ tex.offset_z = tex_instr.get_offset(2);
+ tex.resource_index_mode = (!!addr) ? 2 : 0;
+ tex.sampler_index_mode = tex.resource_index_mode;
+
+ if (tex_instr.opcode() == TexInstruction::get_gradient_h ||
+ tex_instr.opcode() == TexInstruction::get_gradient_v)
+ tex.inst_mod = tex_instr.has_flag(TexInstruction::grad_fine) ? 1 : 0;
+ else
+ tex.inst_mod = tex_instr.inst_mode();
+ if (r600_bytecode_add_tex(m_bc, &tex)) {
+ R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
+ return false;
+ }
+ return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::emit_vtx(const FetchInstruction& fetch_instr)
+{
+ int buffer_offset = 0;
+ auto addr = fetch_instr.buffer_offset();
+ auto index_mode = fetch_instr.buffer_index_mode();
+
+ if (addr) {
+ if (addr->type() == Value::literal) {
+ const auto& boffs = dynamic_cast<const LiteralValue&>(*addr);
+ buffer_offset = boffs.value();
+ } else {
+ index_mode = bim_zero;
+ if ((!m_bc->index_loaded[0] || m_loop_nesting || m_bc->index_reg[0] != addr->sel())) {
+ struct r600_bytecode_alu alu;
+ memset(&alu, 0, sizeof(alu));
+ alu.op = opcode_map.at(op1_mova_int);
+ alu.dst.chan = 0;
+ alu.src[0].sel = addr->sel();
+ alu.src[0].chan = addr->chan();
+ alu.last = 1;
+ int r = r600_bytecode_add_alu(m_bc, &alu);
+ if (r)
+ return false;
+
+ m_bc->ar_loaded = 0;
+
+ alu.op = opcode_map.at(op1_set_cf_idx0);
+ alu.dst.chan = 0;
+ alu.src[0].sel = 0;
+ alu.src[0].chan = 0;
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(m_bc, &alu);
+ if (r)
+ return false;
+
+ m_bc->index_reg[0] = addr->sel();
+ m_bc->index_loaded[0] = true;
+ }
+ }
+ }
+
+ if (fetch_instr.has_prelude()) {
+ for(auto &i : fetch_instr.prelude()) {
+ if (!emit(i))
+ return false;
+ }
+ }
+
+ struct r600_bytecode_vtx vtx;
+ memset(&vtx, 0, sizeof(vtx));
+ vtx.op = fetch_instr.vc_opcode();
+ vtx.buffer_id = fetch_instr.buffer_id() + buffer_offset;
+ vtx.fetch_type = fetch_instr.fetch_type();
+ vtx.src_gpr = fetch_instr.src().sel();
+ vtx.src_sel_x = fetch_instr.src().chan();
+ vtx.mega_fetch_count = fetch_instr.mega_fetch_count();
+ vtx.dst_gpr = fetch_instr.dst().sel();
+ vtx.dst_sel_x = fetch_instr.swz(0); /* SEL_X */
+ vtx.dst_sel_y = fetch_instr.swz(1); /* SEL_Y */
+ vtx.dst_sel_z = fetch_instr.swz(2); /* SEL_Z */
+ vtx.dst_sel_w = fetch_instr.swz(3); /* SEL_W */
+ vtx.use_const_fields = fetch_instr.use_const_fields();
+ vtx.data_format = fetch_instr.data_format();
+ vtx.num_format_all = fetch_instr.num_format(); /* NUM_FORMAT_SCALED */
+ vtx.format_comp_all = fetch_instr.is_signed(); /* FORMAT_COMP_SIGNED */
+ vtx.endian = fetch_instr.endian_swap();
+ vtx.buffer_index_mode = index_mode;
+ vtx.offset = fetch_instr.offset();
+ vtx.indexed = fetch_instr.indexed();
+ vtx.uncached = fetch_instr.uncached();
+ vtx.elem_size = fetch_instr.elm_size();
+ vtx.array_base = fetch_instr.array_base();
+ vtx.array_size = fetch_instr.array_size();
+ vtx.srf_mode_all = fetch_instr.srf_mode_no_zero();
+
+ if (fetch_instr.use_tc()) {
+ if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) {
+ R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
+ return false;
+ }
+
+ } else {
+ if ((r600_bytecode_add_vtx(m_bc, &vtx))) {
+ R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
+ return false;
+ }
+ }
+
+ m_bc->cf_last->vpm = fetch_instr.use_vpm();
+ m_bc->cf_last->barrier = 1;
+
+ return true;
+}
+
+extern const std::map<ESDOp, int> ds_opcode_map;
+
+bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst,
+ const Value& d)
+{
+ assert(d.type() == Value::gpr || d.type() == Value::gpr_array_value);
+
+ if (d.sel() > 124) {
+ R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", d.sel());
+ return false;
+ }
+
+ dst.sel = d.sel();
+ dst.chan = d.chan();
+
+ if (m_bc->index_reg[1] == dst.sel)
+ m_bc->index_loaded[1] = false;
+
+ if (m_bc->index_reg[0] == dst.sel)
+ m_bc->index_loaded[0] = false;
+
+ return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src& src, const Value& s)
+{
+
+ if (s.type() == Value::gpr && s.sel() > 124) {
+ R600_ERR("shader_from_nir: Don't support more then 124 GPRs, try using %d\n", s.sel());
+ return false;
+ }
+
+ if (s.type() == Value::lds_direct) {
+ R600_ERR("shader_from_nir: LDS_DIRECT values not supported\n");
+ return false;
+ }
+
+ if (s.type() == Value::kconst && s.sel() < 512) {
+ R600_ERR("shader_from_nir: Uniforms should have values >= 512, got %d \n", s.sel());
+ return false;
+ }
+
+ if (s.type() == Value::literal) {
+ auto& v = static_cast<const LiteralValue&>(s);
+ if (v.value() == 0) {
+ src.sel = ALU_SRC_0;
+ src.chan = 0;
+ return true;
+ }
+ if (v.value() == 1) {
+ src.sel = ALU_SRC_1_INT;
+ src.chan = 0;
+ return true;
+ }
+ if (v.value_float() == 1.0f) {
+ src.sel = ALU_SRC_1;
+ src.chan = 0;
+ return true;
+ }
+ if (v.value_float() == 0.5f) {
+ src.sel = ALU_SRC_0_5;
+ src.chan = 0;
+ return true;
+ }
+ src.value = v.value();
+ }
+
+ src.sel = s.sel();
+ src.chan = s.chan();
+ if (s.type() == Value::kconst) {
+ const UniformValue& cv = static_cast<const UniformValue&>(s);
+ src.kc_bank = cv.kcache_bank();
+ }
+
+ return true;
+}
+
+const std::map<EAluOp, int> opcode_map = {
+
+ {op2_add, ALU_OP2_ADD},
+ {op2_mul, ALU_OP2_MUL},
+ {op2_mul_ieee, ALU_OP2_MUL_IEEE},
+ {op2_max, ALU_OP2_MAX},
+ {op2_min, ALU_OP2_MIN},
+ {op2_max_dx10, ALU_OP2_MAX_DX10},
+ {op2_min_dx10, ALU_OP2_MIN_DX10},
+ {op2_sete, ALU_OP2_SETE},
+ {op2_setgt, ALU_OP2_SETGT},
+ {op2_setge, ALU_OP2_SETGE},
+ {op2_setne, ALU_OP2_SETNE},
+ {op2_sete_dx10, ALU_OP2_SETE_DX10},
+ {op2_setgt_dx10, ALU_OP2_SETGT_DX10},
+ {op2_setge_dx10, ALU_OP2_SETGE_DX10},
+ {op2_setne_dx10, ALU_OP2_SETNE_DX10},
+ {op1_fract, ALU_OP1_FRACT},
+ {op1_trunc, ALU_OP1_TRUNC},
+ {op1_ceil, ALU_OP1_CEIL},
+ {op1_rndne, ALU_OP1_RNDNE},
+ {op1_floor, ALU_OP1_FLOOR},
+ {op2_ashr_int, ALU_OP2_ASHR_INT},
+ {op2_lshr_int, ALU_OP2_LSHR_INT},
+ {op2_lshl_int, ALU_OP2_LSHL_INT},
+ {op1_mov, ALU_OP1_MOV},
+ {op0_nop, ALU_OP0_NOP},
+ {op2_mul_64, ALU_OP2_MUL_64},
+ {op1_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
+ {op1v_flt64_to_flt32, ALU_OP1_FLT32_TO_FLT64},
+ {op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT},
+ {op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT},
+ {op2_pred_sete, ALU_OP2_PRED_SETE},
+ {op2_pred_setgt, ALU_OP2_PRED_SETGT},
+ {op2_pred_setge, ALU_OP2_PRED_SETGE},
+ {op2_pred_setne, ALU_OP2_PRED_SETNE},
+ //{op2_pred_set_inv, ALU_OP2_PRED_SET},
+ //{op2_pred_set_clr, ALU_OP2_PRED_SET_CRL},
+ //{op2_pred_set_restore, ALU_OP2_PRED_SET_RESTORE},
+ {op2_pred_sete_push, ALU_OP2_PRED_SETE_PUSH},
+ {op2_pred_setgt_push, ALU_OP2_PRED_SETGT_PUSH},
+ {op2_pred_setge_push, ALU_OP2_PRED_SETGE_PUSH},
+ {op2_pred_setne_push, ALU_OP2_PRED_SETNE_PUSH},
+ {op2_kille, ALU_OP2_KILLE},
+ {op2_killgt, ALU_OP2_KILLGT},
+ {op2_killge, ALU_OP2_KILLGE},
+ {op2_killne, ALU_OP2_KILLNE},
+ {op2_and_int, ALU_OP2_AND_INT},
+ {op2_or_int, ALU_OP2_OR_INT},
+ {op2_xor_int, ALU_OP2_XOR_INT},
+ {op1_not_int, ALU_OP1_NOT_INT},
+ {op2_add_int, ALU_OP2_ADD_INT},
+ {op2_sub_int, ALU_OP2_SUB_INT},
+ {op2_max_int, ALU_OP2_MAX_INT},
+ {op2_min_int, ALU_OP2_MIN_INT},
+ {op2_max_uint, ALU_OP2_MAX_UINT},
+ {op2_min_uint, ALU_OP2_MIN_UINT},
+ {op2_sete_int, ALU_OP2_SETE_INT},
+ {op2_setgt_int, ALU_OP2_SETGT_INT},
+ {op2_setge_int, ALU_OP2_SETGE_INT},
+ {op2_setne_int, ALU_OP2_SETNE_INT},
+ {op2_setgt_uint, ALU_OP2_SETGT_UINT},
+ {op2_setge_uint, ALU_OP2_SETGE_UINT},
+ {op2_killgt_uint, ALU_OP2_KILLGT_UINT},
+ {op2_killge_uint, ALU_OP2_KILLGE_UINT},
+ //p2_prede_int, ALU_OP2_PREDE_INT},
+ {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT},
+ {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT},
+ {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT},
+ {op2_kille_int, ALU_OP2_KILLE_INT},
+ {op2_killgt_int, ALU_OP2_KILLGT_INT},
+ {op2_killge_int, ALU_OP2_KILLGE_INT},
+ {op2_killne_int, ALU_OP2_KILLNE_INT},
+ {op2_pred_sete_push_int, ALU_OP2_PRED_SETE_PUSH_INT},
+ {op2_pred_setgt_push_int, ALU_OP2_PRED_SETGT_PUSH_INT},
+ {op2_pred_setge_push_int, ALU_OP2_PRED_SETGE_PUSH_INT},
+ {op2_pred_setne_push_int, ALU_OP2_PRED_SETNE_PUSH_INT},
+ {op2_pred_setlt_push_int, ALU_OP2_PRED_SETLT_PUSH_INT},
+ {op2_pred_setle_push_int, ALU_OP2_PRED_SETLE_PUSH_INT},
+ {op1_flt_to_int, ALU_OP1_FLT_TO_INT},
+ {op1_bfrev_int, ALU_OP1_BFREV_INT},
+ {op2_addc_uint, ALU_OP2_ADDC_UINT},
+ {op2_subb_uint, ALU_OP2_SUBB_UINT},
+ {op0_group_barrier, ALU_OP0_GROUP_BARRIER},
+ {op0_group_seq_begin, ALU_OP0_GROUP_SEQ_BEGIN},
+ {op0_group_seq_end, ALU_OP0_GROUP_SEQ_END},
+ {op2_set_mode, ALU_OP2_SET_MODE},
+ {op1_set_cf_idx0, ALU_OP0_SET_CF_IDX0},
+ {op1_set_cf_idx1, ALU_OP0_SET_CF_IDX1},
+ {op2_set_lds_size, ALU_OP2_SET_LDS_SIZE},
+ {op1_exp_ieee, ALU_OP1_EXP_IEEE},
+ {op1_log_clamped, ALU_OP1_LOG_CLAMPED},
+ {op1_log_ieee, ALU_OP1_LOG_IEEE},
+ {op1_recip_clamped, ALU_OP1_RECIP_CLAMPED},
+ {op1_recip_ff, ALU_OP1_RECIP_FF},
+ {op1_recip_ieee, ALU_OP1_RECIP_IEEE},
+ {op1_recipsqrt_clamped, ALU_OP1_RECIPSQRT_CLAMPED},
+ {op1_recipsqrt_ff, ALU_OP1_RECIPSQRT_FF},
+ {op1_recipsqrt_ieee1, ALU_OP1_RECIPSQRT_IEEE},
+ {op1_sqrt_ieee, ALU_OP1_SQRT_IEEE},
+ {op1_sin, ALU_OP1_SIN},
+ {op1_cos, ALU_OP1_COS},
+ {op2_mullo_int, ALU_OP2_MULLO_INT},
+ {op2_mulhi_int, ALU_OP2_MULHI_INT},
+ {op2_mullo_uint, ALU_OP2_MULLO_UINT},
+ {op2_mulhi_uint, ALU_OP2_MULHI_UINT},
+ {op1_recip_int, ALU_OP1_RECIP_INT},
+ {op1_recip_uint, ALU_OP1_RECIP_UINT},
+ {op1_recip_64, ALU_OP2_RECIP_64},
+ {op1_recip_clamped_64, ALU_OP2_RECIP_CLAMPED_64},
+ {op1_recipsqrt_64, ALU_OP2_RECIPSQRT_64},
+ {op1_recipsqrt_clamped_64, ALU_OP2_RECIPSQRT_CLAMPED_64},
+ {op1_sqrt_64, ALU_OP2_SQRT_64},
+ {op1_flt_to_uint, ALU_OP1_FLT_TO_UINT},
+ {op1_int_to_flt, ALU_OP1_INT_TO_FLT},
+ {op1_uint_to_flt, ALU_OP1_UINT_TO_FLT},
+ {op2_bfm_int, ALU_OP2_BFM_INT},
+ {op1_flt32_to_flt16, ALU_OP1_FLT32_TO_FLT16},
+ {op1_flt16_to_flt32, ALU_OP1_FLT16_TO_FLT32},
+ {op1_ubyte0_flt, ALU_OP1_UBYTE0_FLT},
+ {op1_ubyte1_flt, ALU_OP1_UBYTE1_FLT},
+ {op1_ubyte2_flt, ALU_OP1_UBYTE2_FLT},
+ {op1_ubyte3_flt, ALU_OP1_UBYTE3_FLT},
+ {op1_bcnt_int, ALU_OP1_BCNT_INT},
+ {op1_ffbh_uint, ALU_OP1_FFBH_UINT},
+ {op1_ffbl_int, ALU_OP1_FFBL_INT},
+ {op1_ffbh_int, ALU_OP1_FFBH_INT},
+ {op1_flt_to_uint4, ALU_OP1_FLT_TO_UINT4},
+ {op2_dot_ieee, ALU_OP2_DOT_IEEE},
+ {op1_flt_to_int_rpi, ALU_OP1_FLT_TO_INT_RPI},
+ {op1_flt_to_int_floor, ALU_OP1_FLT_TO_INT_FLOOR},
+ {op2_mulhi_uint24, ALU_OP2_MULHI_UINT24},
+ {op1_mbcnt_32hi_int, ALU_OP1_MBCNT_32HI_INT},
+ {op1_offset_to_flt, ALU_OP1_OFFSET_TO_FLT},
+ {op2_mul_uint24, ALU_OP2_MUL_UINT24},
+ {op1_bcnt_accum_prev_int, ALU_OP1_BCNT_ACCUM_PREV_INT},
+ {op1_mbcnt_32lo_accum_prev_int, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT},
+ {op2_sete_64, ALU_OP2_SETE_64},
+ {op2_setne_64, ALU_OP2_SETNE_64},
+ {op2_setgt_64, ALU_OP2_SETGT_64},
+ {op2_setge_64, ALU_OP2_SETGE_64},
+ {op2_min_64, ALU_OP2_MIN_64},
+ {op2_max_64, ALU_OP2_MAX_64},
+ {op2_dot4, ALU_OP2_DOT4},
+ {op2_dot4_ieee, ALU_OP2_DOT4_IEEE},
+ {op2_cube, ALU_OP2_CUBE},
+ {op1_max4, ALU_OP1_MAX4},
+ {op1_frexp_64, ALU_OP1_FREXP_64},
+ {op1_ldexp_64, ALU_OP2_LDEXP_64},
+ {op1_fract_64, ALU_OP1_FRACT_64},
+ {op2_pred_setgt_64, ALU_OP2_PRED_SETGT_64},
+ {op2_pred_sete_64, ALU_OP2_PRED_SETE_64},
+ {op2_pred_setge_64, ALU_OP2_PRED_SETGE_64},
+ {op2_add_64, ALU_OP2_ADD_64},
+ {op1_mova_int, ALU_OP1_MOVA_INT},
+ {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
+ {op1_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
+ {op2_sad_accum_prev_uint, ALU_OP2_SAD_ACCUM_PREV_UINT},
+ {op2_dot, ALU_OP2_DOT},
+ //p2_mul_prev, ALU_OP2_MUL_PREV},
+ //p2_mul_ieee_prev, ALU_OP2_MUL_IEEE_PREV},
+ //p2_add_prev, ALU_OP2_ADD_PREV},
+ {op2_muladd_prev, ALU_OP2_MULADD_PREV},
+ {op2_muladd_ieee_prev, ALU_OP2_MULADD_IEEE_PREV},
+ {op2_interp_xy, ALU_OP2_INTERP_XY},
+ {op2_interp_zw, ALU_OP2_INTERP_ZW},
+ {op2_interp_x, ALU_OP2_INTERP_X},
+ {op2_interp_z, ALU_OP2_INTERP_Z},
+ {op0_store_flags, ALU_OP1_STORE_FLAGS},
+ {op1_load_store_flags, ALU_OP1_LOAD_STORE_FLAGS},
+ {op0_lds_1a, ALU_OP2_LDS_1A},
+ {op0_lds_1a1d, ALU_OP2_LDS_1A1D},
+ {op0_lds_2a, ALU_OP2_LDS_2A},
+ {op1_interp_load_p0, ALU_OP1_INTERP_LOAD_P0},
+ {op1_interp_load_p10, ALU_OP1_INTERP_LOAD_P10},
+ {op1_interp_load_p20, ALU_OP1_INTERP_LOAD_P20},
+ // {op 3 all left shift 6
+ {op3_bfe_uint, ALU_OP3_BFE_UINT},
+ {op3_bfe_int, ALU_OP3_BFE_INT},
+ {op3_bfi_int, ALU_OP3_BFI_INT},
+ {op3_fma, ALU_OP3_FMA},
+ {op3_cndne_64, ALU_OP3_CNDNE_64},
+ {op3_fma_64, ALU_OP3_FMA_64},
+ {op3_lerp_uint, ALU_OP3_LERP_UINT},
+ {op3_bit_align_int, ALU_OP3_BIT_ALIGN_INT},
+ {op3_byte_align_int, ALU_OP3_BYTE_ALIGN_INT},
+ {op3_sad_accum_uint, ALU_OP3_SAD_ACCUM_UINT},
+ {op3_sad_accum_hi_uint, ALU_OP3_SAD_ACCUM_HI_UINT},
+ {op3_muladd_uint24, ALU_OP3_MULADD_UINT24},
+ {op3_lds_idx_op, ALU_OP3_LDS_IDX_OP},
+ {op3_muladd, ALU_OP3_MULADD},
+ {op3_muladd_m2, ALU_OP3_MULADD_M2},
+ {op3_muladd_m4, ALU_OP3_MULADD_M4},
+ {op3_muladd_d2, ALU_OP3_MULADD_D2},
+ {op3_muladd_ieee, ALU_OP3_MULADD_IEEE},
+ {op3_cnde, ALU_OP3_CNDE},
+ {op3_cndgt, ALU_OP3_CNDGT},
+ {op3_cndge, ALU_OP3_CNDGE},
+ {op3_cnde_int, ALU_OP3_CNDE_INT},
+ {op3_cndgt_int, ALU_OP3_CNDGT_INT},
+ {op3_cndge_int, ALU_OP3_CNDGE_INT},
+ {op3_mul_lit, ALU_OP3_MUL_LIT},
+};
+
+const std::map<ESDOp, int> ds_opcode_map = {
+ {DS_OP_ADD, FETCH_OP_GDS_ADD},
+ {DS_OP_SUB, FETCH_OP_GDS_SUB},
+ {DS_OP_RSUB, FETCH_OP_GDS_RSUB},
+ {DS_OP_INC, FETCH_OP_GDS_INC},
+ {DS_OP_DEC, FETCH_OP_GDS_DEC},
+ {DS_OP_MIN_INT, FETCH_OP_GDS_MIN_INT},
+ {DS_OP_MAX_INT, FETCH_OP_GDS_MAX_INT},
+ {DS_OP_MIN_UINT, FETCH_OP_GDS_MIN_UINT},
+ {DS_OP_MAX_UINT, FETCH_OP_GDS_MAX_UINT},
+ {DS_OP_AND, FETCH_OP_GDS_AND},
+ {DS_OP_OR, FETCH_OP_GDS_OR},
+ {DS_OP_XOR, FETCH_OP_GDS_XOR},
+ {DS_OP_MSKOR, FETCH_OP_GDS_MSKOR},
+ {DS_OP_WRITE, FETCH_OP_GDS_WRITE},
+ {DS_OP_WRITE_REL, FETCH_OP_GDS_WRITE_REL},
+ {DS_OP_WRITE2, FETCH_OP_GDS_WRITE2},
+ {DS_OP_CMP_STORE, FETCH_OP_GDS_CMP_STORE},
+ {DS_OP_CMP_STORE_SPF, FETCH_OP_GDS_CMP_STORE_SPF},
+ {DS_OP_BYTE_WRITE, FETCH_OP_GDS_BYTE_WRITE},
+ {DS_OP_SHORT_WRITE, FETCH_OP_GDS_SHORT_WRITE},
+ {DS_OP_ADD_RET, FETCH_OP_GDS_ADD_RET},
+ {DS_OP_SUB_RET, FETCH_OP_GDS_SUB_RET},
+ {DS_OP_RSUB_RET, FETCH_OP_GDS_RSUB_RET},
+ {DS_OP_INC_RET, FETCH_OP_GDS_INC_RET},
+ {DS_OP_DEC_RET, FETCH_OP_GDS_DEC_RET},
+ {DS_OP_MIN_INT_RET, FETCH_OP_GDS_MIN_INT_RET},
+ {DS_OP_MAX_INT_RET, FETCH_OP_GDS_MAX_INT_RET},
+ {DS_OP_MIN_UINT_RET, FETCH_OP_GDS_MIN_UINT_RET},
+ {DS_OP_MAX_UINT_RET, FETCH_OP_GDS_MAX_UINT_RET},
+ {DS_OP_AND_RET, FETCH_OP_GDS_AND_RET},
+ {DS_OP_OR_RET, FETCH_OP_GDS_OR_RET},
+ {DS_OP_XOR_RET, FETCH_OP_GDS_XOR_RET},
+ {DS_OP_MSKOR_RET, FETCH_OP_GDS_MSKOR_RET},
+ {DS_OP_XCHG_RET, FETCH_OP_GDS_XCHG_RET},
+ {DS_OP_XCHG_REL_RET, FETCH_OP_GDS_XCHG_REL_RET},
+ {DS_OP_XCHG2_RET, FETCH_OP_GDS_XCHG2_RET},
+ {DS_OP_CMP_XCHG_RET, FETCH_OP_GDS_CMP_XCHG_RET},
+ {DS_OP_CMP_XCHG_SPF_RET, FETCH_OP_GDS_CMP_XCHG_SPF_RET},
+ {DS_OP_READ_RET, FETCH_OP_GDS_READ_RET},
+ {DS_OP_READ_REL_RET, FETCH_OP_GDS_READ_REL_RET},
+ {DS_OP_READ2_RET, FETCH_OP_GDS_READ2_RET},
+ {DS_OP_READWRITE_RET, FETCH_OP_GDS_READWRITE_RET},
+ {DS_OP_BYTE_READ_RET, FETCH_OP_GDS_BYTE_READ_RET},
+ {DS_OP_UBYTE_READ_RET, FETCH_OP_GDS_UBYTE_READ_RET},
+ {DS_OP_SHORT_READ_RET, FETCH_OP_GDS_SHORT_READ_RET},
+ {DS_OP_USHORT_READ_RET, FETCH_OP_GDS_USHORT_READ_RET},
+ {DS_OP_ATOMIC_ORDERED_ALLOC_RET, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC},
+ {DS_OP_INVALID, 0},
+};
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "sfn_nir.h"
+
+struct r600_shader;
+union r600_shader_key;
+
+namespace r600 {
+
+class AssemblyFromShaderLegacy : public AssemblyFromShader {
+public:
+ AssemblyFromShaderLegacy(struct r600_shader *sh, r600_shader_key *key);
+ ~AssemblyFromShaderLegacy() override;
+private:
+ bool do_lower(const std::vector<Instruction::Pointer>& ir) override ;
+
+ struct AssemblyFromShaderLegacyImpl *impl;
+};
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_nir.h"
+#include "nir_builder.h"
+
+#include "../r600_pipe.h"
+#include "../r600_shader.h"
+
+#include "sfn_instruction_tex.h"
+
+#include "sfn_shader_vertex.h"
+#include "sfn_shader_fragment.h"
+#include "sfn_ir_to_assembly.h"
+
+#include <vector>
+
+namespace r600 {
+
+using std::vector;
+
+ShaderFromNir::ShaderFromNir():sh(nullptr),
+ m_current_if_id(0),
+ m_current_loop_id(0)
+{
+}
+
+bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shader,
+ r600_pipe_shader_selector *sel, r600_shader_key& key,
+ struct r600_shader* gs_shader)
+{
+ sh = shader;
+ assert(sh);
+
+ switch (shader->info.stage) {
+ case MESA_SHADER_VERTEX:
+ if (key.vs.as_es) {
+ sfn_log << SfnLog::trans << "VS; next type GS not yet supported\n";
+ } else if (key.vs.as_ls) {
+ sfn_log << "VS: next type TCS and TES not yet supported\n";
+ return false;
+ } else {
+ sfn_log << SfnLog::trans << "Start VS for FS\n";
+ impl.reset(new VertexShaderFromNirForFS(pipe_shader, *sel, key));
+ }
+ break;
+ case MESA_SHADER_FRAGMENT:
+ sfn_log << SfnLog::trans << "Start FS\n";
+ impl.reset(new FragmentShaderFromNir(*shader, pipe_shader->shader, *sel, key));
+ break;
+ default:
+ return false;
+ }
+
+ sfn_log << SfnLog::trans << "Process declarations\n";
+ if (!process_declaration())
+ return false;
+
+ // at this point all functions should be inlined
+ const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sh->functions));
+
+ sfn_log << SfnLog::trans << "Scan shader\n";
+ nir_foreach_block(block, func->impl) {
+ nir_foreach_instr(instr, block) {
+ if (!impl->scan_instruction(instr)) {
+ fprintf(stderr, "Unhandled sysvalue access ");
+ nir_print_instr(instr, stderr);
+ fprintf(stderr, "\n");
+ return false;
+ }
+ }
+ }
+
+ sfn_log << SfnLog::trans << "Reserve registers\n";
+ if (!impl->allocate_reserved_registers()) {
+ return false;
+ }
+
+ ValuePool::array_list arrays;
+ sfn_log << SfnLog::trans << "Allocate local registers\n";
+ foreach_list_typed(nir_register, reg, node, &func->impl->registers) {
+ impl->allocate_local_register(*reg, arrays);
+ }
+
+ sfn_log << SfnLog::trans << "Emit shader start\n";
+ impl->allocate_arrays(arrays);
+
+ impl->emit_shader_start();
+
+ sfn_log << SfnLog::trans << "Process shader \n";
+ foreach_list_typed(nir_cf_node, node, node, &func->impl->body) {
+ if (!process_cf_node(node))
+ return false;
+ }
+
+ // Add optimizations here
+ sfn_log << SfnLog::trans << "Finalize\n";
+ impl->finalize();
+
+ sfn_log << SfnLog::trans << "Finished translating to R600 IR\n";
+ return true;
+}
+
+Shader ShaderFromNir::shader() const
+{
+ return Shader{impl->m_output, impl->get_temp_registers()};
+}
+
+
+bool ShaderFromNir::process_cf_node(nir_cf_node *node)
+{
+ SFN_TRACE_FUNC(SfnLog::flow, "CF");
+ switch (node->type) {
+ case nir_cf_node_block:
+ return process_block(nir_cf_node_as_block(node));
+ case nir_cf_node_if:
+ return process_if(nir_cf_node_as_if(node));
+ case nir_cf_node_loop:
+ return process_loop(nir_cf_node_as_loop(node));
+ default:
+ return false;
+ }
+}
+
+bool ShaderFromNir::process_if(nir_if *if_stmt)
+{
+ SFN_TRACE_FUNC(SfnLog::flow, "IF");
+
+ if (!impl->emit_if_start(m_current_if_id, if_stmt))
+ return false;
+
+ int if_id = m_current_if_id++;
+ m_if_stack.push(if_id);
+
+ foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list)
+ if (!process_cf_node(n)) return false;
+
+ if (!if_stmt->then_list.is_empty()) {
+ if (!impl->emit_else_start(if_id))
+ return false;
+
+ foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list)
+ if (!process_cf_node(n)) return false;
+ }
+
+ if (!impl->emit_ifelse_end(if_id))
+ return false;
+
+ m_if_stack.pop();
+ return true;
+}
+
+bool ShaderFromNir::process_loop(nir_loop *node)
+{
+ SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
+ int loop_id = m_current_loop_id++;
+
+ if (!impl->emit_loop_start(loop_id))
+ return false;
+
+ foreach_list_typed(nir_cf_node, n, node, &node->body)
+ if (!process_cf_node(n)) return false;
+
+ if (!impl->emit_loop_end(loop_id))
+ return false;
+
+ return true;
+}
+
+bool ShaderFromNir::process_block(nir_block *block)
+{
+ SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
+ nir_foreach_instr(instr, block) {
+ int r = emit_instruction(instr);
+ if (!r) {
+ sfn_log << SfnLog::err << "R600: Unsupported instruction: "
+ << *instr << "\n";
+ return false;
+ }
+ }
+ return true;
+}
+
+
+ShaderFromNir::~ShaderFromNir()
+{
+}
+
+pipe_shader_type ShaderFromNir::processor_type() const
+{
+ return impl->m_processor_type;
+}
+
+
+bool ShaderFromNir::emit_instruction(nir_instr *instr)
+{
+ assert(impl);
+
+ sfn_log << SfnLog::instr << "Read instruction " << *instr << "\n";
+
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ return impl->emit_alu_instruction(instr);
+ case nir_instr_type_deref:
+ return impl->emit_deref_instruction(nir_instr_as_deref(instr));
+ case nir_instr_type_intrinsic:
+ return impl->emit_intrinsic_instruction(nir_instr_as_intrinsic(instr));
+ case nir_instr_type_load_const:
+ return impl->set_literal_constant(nir_instr_as_load_const(instr));
+ case nir_instr_type_tex:
+ return impl->emit_tex_instruction(instr);
+ case nir_instr_type_jump:
+ return impl->emit_jump_instruction(nir_instr_as_jump(instr));
+ default:
+ fprintf(stderr, "R600: %s: ShaderFromNir Unsupported instruction: type %d:'", __func__, instr->type);
+ nir_print_instr(instr, stderr);
+ fprintf(stderr, "'\n");
+ return false;
+ case nir_instr_type_ssa_undef:
+ return impl->create_undef(nir_instr_as_ssa_undef(instr));
+ return true;
+ }
+}
+
+bool ShaderFromNir::process_declaration()
+{
+ // scan declarations
+ nir_foreach_variable(variable, &sh->inputs) {
+ if (!impl->process_inputs(variable)) {
+ fprintf(stderr, "R600: error parsing input varible %s\n", variable->name);
+ return false;
+ }
+ }
+
+ // scan declarations
+ nir_foreach_variable(variable, &sh->outputs) {
+ if (!impl->process_outputs(variable)) {
+ fprintf(stderr, "R600: error parsing outputs varible %s\n", variable->name);
+ return false;
+ }
+ }
+
+ // scan declarations
+ nir_foreach_variable(variable, &sh->uniforms) {
+ if (!impl->process_uniforms(variable)) {
+ fprintf(stderr, "R600: error parsing outputs varible %s\n", variable->name);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+const std::vector<Instruction::Pointer>& ShaderFromNir::shader_ir() const
+{
+ assert(impl);
+ return impl->m_output;
+}
+
+
+AssemblyFromShader::~AssemblyFromShader()
+{
+}
+
+bool AssemblyFromShader::lower(const std::vector<Instruction::Pointer>& ir)
+{
+ return do_lower(ir);
+}
+
+static nir_ssa_def *
+r600_nir_lower_pack_unpack_2x16_impl(nir_builder *b, nir_instr *instr, void *_options)
+{
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+ switch (alu->op) {
+ case nir_op_unpack_half_2x16: {
+ nir_ssa_def *packed = nir_ssa_for_alu_src(b, alu, 0);
+ return nir_vec2(b, nir_unpack_half_2x16_split_x(b, packed),
+ nir_unpack_half_2x16_split_y(b, packed));
+
+ }
+ case nir_op_pack_half_2x16: {
+ nir_ssa_def *src_vec2 = nir_ssa_for_alu_src(b, alu, 0);
+ return nir_pack_half_2x16_split(b, nir_channel(b, src_vec2, 0),
+ nir_channel(b, src_vec2, 1));
+ }
+ default:
+ return nullptr;
+ }
+}
+
+bool r600_nir_lower_pack_unpack_2x16_filter(const nir_instr *instr, const void *_options)
+{
+ return instr->type == nir_instr_type_alu;
+}
+
+bool r600_nir_lower_pack_unpack_2x16(nir_shader *shader)
+{
+ return nir_shader_lower_instructions(shader,
+ r600_nir_lower_pack_unpack_2x16_filter,
+ r600_nir_lower_pack_unpack_2x16_impl,
+ nullptr);
+};
+
+}
+
+using r600::r600_nir_lower_int_tg4;
+using r600::r600_nir_lower_pack_unpack_2x16;
+
+int
+r600_glsl_type_size(const struct glsl_type *type, bool is_bindless)
+{
+ return glsl_count_vec4_slots(type, false, is_bindless);
+}
+
+void
+r600_get_natural_size_align_bytes(const struct glsl_type *type,
+ unsigned *size, unsigned *align)
+{
+ if (type->base_type != GLSL_TYPE_ARRAY) {
+ *align = 1;
+ *size = 1;
+ } else {
+ unsigned elem_size, elem_align;
+ glsl_get_natural_size_align_bytes(type->fields.array,
+ &elem_size, &elem_align);
+ *align = 1;
+ *size = type->length;
+ }
+}
+
+static bool
+optimize_once(nir_shader *shader)
+{
+ bool progress = false;
+ NIR_PASS(progress, shader, nir_copy_prop);
+ NIR_PASS(progress, shader, nir_opt_dce);
+ NIR_PASS(progress, shader, nir_opt_algebraic);
+ NIR_PASS(progress, shader, nir_opt_constant_folding);
+ NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
+ NIR_PASS(progress, shader, nir_opt_vectorize);
+
+ NIR_PASS(progress, shader, nir_opt_remove_phis);
+
+ if (nir_opt_trivial_continues(shader)) {
+ progress = true;
+ NIR_PASS(progress, shader, nir_copy_prop);
+ NIR_PASS(progress, shader, nir_opt_dce);
+ }
+
+ NIR_PASS(progress, shader, nir_opt_if, false);
+ NIR_PASS(progress, shader, nir_opt_dead_cf);
+ NIR_PASS(progress, shader, nir_opt_cse);
+ NIR_PASS(progress, shader, nir_opt_peephole_select, 200, true, true);
+
+ NIR_PASS(progress, shader, nir_opt_conditional_discard);
+ NIR_PASS(progress, shader, nir_opt_dce);
+ NIR_PASS(progress, shader, nir_opt_undef);
+
+ NIR_PASS(progress, shader, nir_remove_dead_variables, nir_var_shader_in);
+ NIR_PASS(progress, shader, nir_remove_dead_variables, nir_var_shader_out);
+ return progress;
+}
+
+bool has_saturate(const nir_function *func)
+{
+ nir_foreach_block(block, func->impl) {
+ nir_foreach_instr(instr, block) {
+ if (instr->type == nir_instr_type_alu) {
+ auto alu = nir_instr_as_alu(instr);
+ if (alu->dest.saturate)
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+int r600_shader_from_nir(struct r600_context *rctx,
+ struct r600_pipe_shader *pipeshader,
+ r600_shader_key *key)
+{
+ char filename[4000];
+ struct r600_pipe_shader_selector *sel = pipeshader->selector;
+
+ r600::ShaderFromNir convert;
+
+ if (rctx->screen->b.debug_flags & DBG_PREOPT_IR) {
+ fprintf(stderr, "PRE-OPT-NIR-----------.------------------------------\n");
+ nir_print_shader(sel->nir, stderr);
+ fprintf(stderr, "END PRE-OPT-NIR--------------------------------------\n\n");
+ }
+
+ NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
+ NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
+ NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
+
+ static const struct nir_lower_tex_options lower_tex_options = {
+ .lower_txp = ~0u,
+ };
+ NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
+
+ NIR_PASS_V(sel->nir, r600::r600_nir_lower_txl_txf_array_or_cube);
+
+ NIR_PASS_V(sel->nir, r600_nir_lower_int_tg4);
+ NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16);
+
+ NIR_PASS_V(sel->nir, nir_lower_io, nir_var_uniform, r600_glsl_type_size,
+ nir_lower_io_lower_64bit_to_32);
+
+ if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL ||
+ sel->nir->info.stage == MESA_SHADER_TESS_EVAL)
+ NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_in, r600_glsl_type_size,
+ nir_lower_io_lower_64bit_to_32);
+
+ if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL)
+ NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_out, r600_glsl_type_size,
+ nir_lower_io_lower_64bit_to_32);
+
+ const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sel->nir->functions));
+ bool optimize = func->impl->registers.length() == 0 && !has_saturate(func);
+
+
+ /* It seems the output of this optimization is cached somewhere, and
+ * when there are registers, then we can no longer copy propagate, so
+ * skip the optimization then. (There is probably a better way, but yeah)
+ */
+ if (optimize)
+ while(optimize_once(sel->nir));
+
+ while (optimize && optimize_once(sel->nir));
+
+ NIR_PASS_V(sel->nir, nir_lower_locals_to_regs);
+ //NIR_PASS_V(sel->nir, nir_opt_algebraic);
+ //NIR_PASS_V(sel->nir, nir_copy_prop);
+ NIR_PASS_V(sel->nir, nir_lower_to_source_mods, nir_lower_float_source_mods);
+ NIR_PASS_V(sel->nir, nir_convert_from_ssa, true);
+ NIR_PASS_V(sel->nir, nir_opt_dce);
+
+ if ((rctx->screen->b.debug_flags & DBG_NIR) &&
+ (rctx->screen->b.debug_flags & DBG_ALL_SHADERS)) {
+ fprintf(stderr, "-- NIR --------------------------------------------------------\n");
+ struct nir_function *func = (struct nir_function *)exec_list_get_head(&sel->nir->functions);
+ nir_index_ssa_defs(func->impl);
+ nir_print_shader(sel->nir, stderr);
+ fprintf(stderr, "-- END --------------------------------------------------------\n");
+ }
+
+ memset(&pipeshader->shader, 0, sizeof(r600_shader));
+
+ if (sel->nir->info.stage == MESA_SHADER_TESS_EVAL ||
+ sel->nir->info.stage == MESA_SHADER_VERTEX ||
+ sel->nir->info.stage == MESA_SHADER_GEOMETRY) {
+ pipeshader->shader.clip_dist_write |= ((1 << sel->nir->info.clip_distance_array_size) - 1);
+ pipeshader->shader.cull_dist_write = ((1 << sel->nir->info.cull_distance_array_size) - 1)
+ << sel->nir->info.clip_distance_array_size;
+ pipeshader->shader.cc_dist_mask = (1 << (sel->nir->info.cull_distance_array_size +
+ sel->nir->info.clip_distance_array_size)) - 1;
+ }
+
+ // For learning we print out the complete failed shader
+ // and instead of asserts we use exceptions
+ bool r;
+ try {
+ struct r600_shader* gs_shader = nullptr;
+ if (rctx->gs_shader)
+ gs_shader = &rctx->gs_shader->current->shader;
+ r = convert.lower(sel->nir, pipeshader, sel, *key, gs_shader);
+
+ } catch (std::logic_error& x) {
+ r = false;
+ }
+ if (!r || rctx->screen->b.debug_flags & DBG_ALL_SHADERS) {
+ static int shnr = 0;
+
+ snprintf(filename, 4000, "nir-%s_%d.inc", sel->nir->info.name, shnr++);
+
+ if (access(filename, F_OK) == -1) {
+ FILE *f = fopen(filename, "w");
+
+ if (f) {
+ fprintf(f, "const char *shader_blob_%s = {\nR\"(", sel->nir->info.name);
+ nir_print_shader(sel->nir, f);
+ fprintf(f, ")\";\n");
+ fclose(f);
+ }
+ }
+ if (!r)
+ return -2;
+ }
+
+ auto shader = convert.shader();
+
+ r600_screen *rscreen = rctx->screen;
+ r600_bytecode_init(&pipeshader->shader.bc, rscreen->b.chip_class, rscreen->b.family,
+ rscreen->has_compressed_msaa_texturing);
+
+ r600::sfn_log << r600::SfnLog::shader_info
+ << "pipeshader->shader.processor_type = "
+ << pipeshader->shader.processor_type << "\n";
+
+ pipeshader->shader.bc.type = pipeshader->shader.processor_type;
+ pipeshader->shader.bc.isa = rctx->isa;
+
+ r600::AssemblyFromShaderLegacy afs(&pipeshader->shader, key);
+ if (!afs.lower(shader.m_ir)) {
+ R600_ERR("%s: Lowering to assembly failed\n", __func__);
+ return -1;
+ }
+
+ if (sel->nir->info.stage == MESA_SHADER_GEOMETRY) {
+ r600::sfn_log << r600::SfnLog::shader_info << "Geometry shader, create copy shader\n";
+ generate_gs_copy_shader(rctx, pipeshader, &sel->so);
+ assert(pipeshader->gs_copy_shader);
+ } else {
+ r600::sfn_log << r600::SfnLog::shader_info << "This is not a Geometry shader\n";
+ }
+
+ return 0;
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_NIR_H
+#define SFN_NIR_H
+
+#include "nir.h"
+
+#ifdef __cplusplus
+#include "sfn_shader_base.h"
+#include <vector>
+
+namespace r600 {
+
+bool r600_nir_lower_pack_unpack_2x16(nir_shader *shader);
+
+bool r600_lower_scratch_addresses(nir_shader *shader);
+
+bool r600_lower_ubo_to_align16(nir_shader *shader);
+
+class Shader {
+public:
+ std::vector<PInstruction>& m_ir;
+ ValueMap m_temp;
+};
+
+class ShaderFromNir {
+public:
+ ShaderFromNir();
+ ~ShaderFromNir();
+
+ unsigned ninputs() const;
+
+ bool lower(const nir_shader *shader, r600_pipe_shader *sh,
+ r600_pipe_shader_selector *sel, r600_shader_key &key,
+ r600_shader *gs_shader);
+
+ bool process_declaration();
+
+ pipe_shader_type processor_type() const;
+
+ bool emit_instruction(nir_instr *instr);
+
+ const std::vector<Instruction::Pointer>& shader_ir() const;
+
+ Shader shader() const;
+private:
+
+ bool process_block();
+ bool process_cf_node(nir_cf_node *node);
+ bool process_if(nir_if *node);
+ bool process_loop(nir_loop *node);
+ bool process_block(nir_block *node);
+
+ std::unique_ptr<ShaderFromNirProcessor> impl;
+ const nir_shader *sh;
+
+ int m_current_if_id;
+ int m_current_loop_id;
+ std::stack<int> m_if_stack;
+ int scratch_size;
+};
+
+class AssemblyFromShader {
+public:
+ virtual ~AssemblyFromShader();
+ bool lower(const std::vector<Instruction::Pointer>& ir);
+private:
+ virtual bool do_lower(const std::vector<Instruction::Pointer>& ir) = 0 ;
+};
+
+}
+
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int r600_shader_from_nir(struct r600_context *rctx,
+ struct r600_pipe_shader *pipeshader,
+ union r600_shader_key *key);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif // SFN_NIR_H
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "../r600_pipe.h"
+#include "../r600_shader.h"
+#include "sfn_shader_vertex.h"
+
+#include "sfn_shader_fragment.h"
+#include "sfn_ir_to_assembly.h"
+#include "sfn_nir.h"
+#include "sfn_instruction_fetch.h"
+
+#include <iostream>
+
+#define ENABLE_DEBUG 1
+
+#ifdef ENABLE_DEBUG
+#define DEBUG_SFN(X) \
+ do {\
+ X; \
+ } while (0)
+#else
+#define DEBUG_SFN(X)
+#endif
+
+namespace r600 {
+
+using namespace std;
+
+
+ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
+ r600_pipe_shader_selector& sel,
+ r600_shader &sh_info):
+ m_processor_type(ptype),
+ m_sh_info(sh_info),
+ m_tex_instr(*this),
+ m_alu_instr(*this),
+ m_pending_else(nullptr),
+ m_next_hwatomic_loc(0),
+ m_sel(sel)
+{
+ m_sh_info.processor_type = ptype;
+}
+
+
+ShaderFromNirProcessor::~ShaderFromNirProcessor()
+{
+}
+
+bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
+{
+ switch (instr->type) {
+ case nir_instr_type_tex: {
+ nir_tex_instr *t = nir_instr_as_tex(instr);
+ if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
+ sh_info().uses_tex_buffers = true;
+ }
+ default:
+ ;
+ }
+
+ return scan_sysvalue_access(instr);
+}
+
+bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
+{
+ // m_uniform_type_map
+ m_uniform_type_map[uniform->data.location] = uniform->type;
+
+ if (uniform->type->contains_atomic()) {
+ int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
+ sh_info().nhwatomic += natomics;
+
+ if (uniform->type->is_array())
+ sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
+
+ sh_info().uses_atomics = 1;
+
+ struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
+ ++sh_info().nhwatomic_ranges;
+ atom.buffer_id = uniform->data.binding;
+ atom.hw_idx = m_next_hwatomic_loc;
+ atom.start = m_next_hwatomic_loc;
+ atom.end = atom.start + natomics - 1;
+ m_next_hwatomic_loc = atom.end + 1;
+ //atom.array_id = uniform->type->is_array() ? 1 : 0;
+
+ m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end - atom.start + 1;
+
+ sfn_log << SfnLog::io << "HW_ATOMIC file count: "
+ << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
+ }
+
+ if (uniform->type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
+ sh_info().uses_images = 1;
+ }
+
+ return true;
+}
+
+bool ShaderFromNirProcessor::process_inputs(nir_variable *input)
+{
+ return do_process_inputs(input);
+}
+
+bool ShaderFromNirProcessor::process_outputs(nir_variable *output)
+{
+ return do_process_outputs(output);
+}
+
+void ShaderFromNirProcessor::add_array_deref(nir_deref_instr *instr)
+{
+ nir_variable *var = nir_deref_instr_get_variable(instr);
+
+ assert(instr->mode == nir_var_function_temp);
+ assert(glsl_type_is_array(var->type));
+
+ // add an alias for the index to the register(s);
+
+
+}
+
+void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
+{
+ auto& dest = instr->dest;
+ unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
+ m_var_mode[instr->var] = instr->mode;
+ m_var_derefs[index] = instr->var;
+
+ sfn_log << SfnLog::io << "Add var deref:" << index
+ << " with DDL:" << instr->var->data.driver_location << "\n";
+}
+
+void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
+{
+ switch (io.name) {
+ case TGSI_SEMANTIC_POSITION:
+ case TGSI_SEMANTIC_PSIZE:
+ case TGSI_SEMANTIC_EDGEFLAG:
+ case TGSI_SEMANTIC_FACE:
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ case TGSI_SEMANTIC_CLIPVERTEX:
+ io.spi_sid = 0;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ io.spi_sid = io.sid + 1;
+ break;
+ default:
+ /* For non-generic params - pack name and sid into 8 bits */
+ io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
+ }
+}
+
+const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
+{
+ unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
+
+ sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
+
+ auto v = m_var_derefs.find(index);
+ if (v != m_var_derefs.end())
+ return v->second;
+
+ fprintf(stderr, "R600: could not find deref with index %d\n", index);
+
+ return nullptr;
+
+ /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
+ return nir_deref_instr_get_variable(deref); */
+}
+
+bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
+{
+ return m_tex_instr.emit(instr);
+}
+
+void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
+{
+ if (m_pending_else) {
+ m_output.push_back(PInstruction(m_pending_else));
+ m_pending_else = nullptr;
+ }
+
+ r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n";
+ m_output.push_back(Instruction::Pointer(ir));
+}
+
+void ShaderFromNirProcessor::emit_shader_start()
+{
+ /* placeholder, may become an abstract method */
+}
+
+bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
+{
+ switch (instr->type) {
+ case nir_jump_break: {
+ auto b = new LoopBreakInstruction();
+ emit_instruction(b);
+ return true;
+ }
+ case nir_jump_continue: {
+ auto b = new LoopContInstruction();
+ emit_instruction(b);
+ return true;
+ }
+ default: {
+ nir_instr *i = reinterpret_cast<nir_instr*>(instr);
+ sfn_log << SfnLog::err << "Jump instrunction " << *i << " not supported\n";
+ return false;
+ }
+ }
+ return true;
+}
+
+bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
+{
+ return m_alu_instr.emit(instr);
+}
+
+bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
+{
+ return false;
+}
+
+bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
+{
+ LoopBeginInstruction *loop = new LoopBeginInstruction();
+ emit_instruction(loop);
+ m_loop_begin_block_map[loop_id] = loop;
+ return true;
+}
+bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
+{
+ auto start = m_loop_begin_block_map.find(loop_id);
+ if (start == m_loop_begin_block_map.end()) {
+ sfn_log << SfnLog::err << "End loop: Loop start for "
+ << loop_id << " not found\n";
+ return false;
+ }
+ LoopEndInstruction *loop = new LoopEndInstruction(start->second);
+ emit_instruction(loop);
+
+ m_loop_begin_block_map.erase(start);
+ return true;
+}
+
+bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
+{
+
+ auto value = from_nir(if_stmt->condition, 0, 0);
+ AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
+ value, Value::zero, EmitInstruction::last);
+ pred->set_flag(alu_update_exec);
+ pred->set_flag(alu_update_pred);
+ pred->set_cf_type(cf_alu_push_before);
+
+ IfInstruction *ir = new IfInstruction(pred);
+ emit_instruction(ir);
+ assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
+ m_if_block_start_map[if_id] = ir;
+ return true;
+}
+
+bool ShaderFromNirProcessor::emit_else_start(int if_id)
+{
+ auto iif = m_if_block_start_map.find(if_id);
+ if (iif == m_if_block_start_map.end()) {
+ std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
+ return false;
+ }
+
+ if (iif->second->type() != Instruction::cond_if) {
+ std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
+ return false;
+ }
+ IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
+ ElseInstruction *ir = new ElseInstruction(if_instr);
+ m_if_block_start_map[if_id] = ir;
+ m_pending_else = ir;
+
+ return true;
+}
+
+bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
+{
+ auto ifelse = m_if_block_start_map.find(if_id);
+ if (ifelse == m_if_block_start_map.end()) {
+ std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
+ return false;
+ }
+
+ if (ifelse->second->type() != Instruction::cond_if &&
+ ifelse->second->type() != Instruction::cond_else) {
+ std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
+ return false;
+ }
+ /* Clear pending else, if the else branch was empty, non will be emitted */
+
+ m_pending_else = nullptr;
+
+ IfElseEndInstruction *ir = new IfElseEndInstruction();
+ emit_instruction(ir);
+
+ return true;
+}
+
+bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
+{
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ if (emit_intrinsic_instruction_override(instr))
+ return true;
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_deref: {
+ auto var = get_deref_location(instr->src[0]);
+ if (!var)
+ return false;
+ auto mode_helper = m_var_mode.find(var);
+ if (mode_helper == m_var_mode.end()) {
+ cerr << "r600-nir: variable '" << var->name << "' not found\n";
+ return false;
+ }
+ switch (mode_helper->second) {
+ case nir_var_shader_in:
+ return emit_load_input_deref(var, instr);
+ case nir_var_function_temp:
+ return emit_load_function_temp(var, instr);
+ default:
+ cerr << "r600-nir: Unsupported mode" << mode_helper->second
+ << "for src variable\n";
+ return false;
+ }
+ }
+ case nir_intrinsic_store_deref:
+ return emit_store_deref(instr);
+ case nir_intrinsic_load_uniform:
+ return reserve_uniform(instr);
+ case nir_intrinsic_discard:
+ case nir_intrinsic_discard_if:
+ return emit_discard_if(instr);
+ case nir_intrinsic_load_ubo:
+ return emit_load_ubo(instr);
+ case nir_intrinsic_copy_deref:
+ case nir_intrinsic_load_constant:
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_store_output:
+ default:
+ fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
+ return false;
+ }
+ return false;
+}
+
+bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
+{
+ return false;
+}
+
+bool
+ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
+{
+ return false;
+}
+
+bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
+{
+ if (!dest.is_ssa) {
+ auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
+ if (as_last)
+ ir->set_flag(alu_last_instr);
+ emit_instruction(ir);
+ } else {
+ inject_register(dest.ssa.index, chan, value, true);
+ }
+ return true;
+}
+
+GPRVector *ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
+ UNUSED unsigned mask,
+ const GPRVector::Swizzle& swizzle)
+{
+ GPRVector *result = nullptr;
+ int sel = lookup_register_index(src);
+ if (sel >= 0 && from_nir(src, 0)->type() == Value::gpr &&
+ from_nir(src, 0)->chan() == 0) {
+ /* If the x-channel is really an x-channel register then we are pretty
+ * save that the value come like we need them */
+ result = new GPRVector(from_nir(src, 0)->sel(), swizzle);
+ } else {
+ AluInstruction *ir = nullptr;
+ int sel = allocate_temp_register();
+ GPRVector::Values v;
+ for (int i = 0; i < 4; ++i) {
+ v[i] = PValue(new GPRValue(sel, swizzle[i]));
+ if (swizzle[i] < 4 && (mask & (1 << i))) {
+ ir = new AluInstruction(op1_mov, v[i], from_nir(src, swizzle[i]),
+ EmitInstruction::write);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+
+ result = new GPRVector(v);
+ }
+ return result;
+}
+
+bool ShaderFromNirProcessor::emit_load_ubo(nir_intrinsic_instr* instr)
+{
+ nir_src& src0 = instr->src[0];
+ nir_src& src1 = instr->src[1];
+
+ int sel_bufid_reg = src0.is_ssa ? src0.ssa->index : src0.reg.reg->index;
+ const nir_load_const_instr* literal0 = get_literal_constant(sel_bufid_reg);
+
+ int ofs_reg = src1.is_ssa ? src1.ssa->index : src1.reg.reg->index;
+ const nir_load_const_instr* literal1 = get_literal_constant(ofs_reg);
+ if (literal0) {
+ if (literal1) {
+ uint bufid = literal0->value[0].u32;
+ uint buf_ofs = literal1->value[0].u32 >> 4;
+ int buf_cmp = ((literal1->value[0].u32 >> 2) & 3);
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < instr->num_components; ++i) {
+ int cmp = buf_cmp + i;
+ assert(cmp < 4);
+ auto u = PValue(new UniformValue(512 + buf_ofs, cmp, bufid + 1));
+ if (instr->dest.is_ssa)
+ add_uniform((instr->dest.ssa.index << 2) + i, u);
+ else {
+ ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+
+ } else {
+ /* literal0 is lost ...*/
+ return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, literal0->value[0].u32 + 1);
+ }
+ } else {
+ /* TODO: This can also be solved by using the CF indes on the ALU block, and
+ * this would probably make sense when there are more then one loads with
+ * the same buffer ID. */
+ PValue bufid = from_nir(instr->src[0], 0, 0);
+ PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
+ GPRVector trgt;
+ for (int i = 0; i < 4; ++i)
+ trgt.set_reg_i(i, from_nir(instr->dest, i));
+
+ auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
+ 1, bufid, bim_zero);
+
+ emit_instruction(ir);
+ for (int i = 0; i < instr->num_components ; ++i) {
+ add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
+ }
+ m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
+ return true;
+ }
+
+}
+
+bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
+{
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ if (instr->intrinsic == nir_intrinsic_discard_if) {
+ emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
+ {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
+
+ } else {
+ emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
+ {Value::zero, Value::zero}, {alu_last_instr}));
+ }
+ m_sh_info.uses_kill = 1;
+ return true;
+}
+
+bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
+ nir_intrinsic_instr* instr)
+{
+ return do_emit_load_deref(var, instr);
+}
+
+bool ShaderFromNirProcessor::reserve_uniform(nir_intrinsic_instr* instr)
+{
+ r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "'\n";
+
+
+ /* If the target register is a SSA register and the loading is not
+ * indirect then we can do lazy loading, i.e. the uniform value can
+ * be used directly. Otherwise we have to load the data for real
+ * rigt away.
+ */
+
+ /* Try to find the literal that defines the array index */
+ const nir_load_const_instr* literal = nullptr;
+ if (instr->src[0].is_ssa)
+ literal = get_literal_constant(instr->src[0].ssa->index);
+
+ int base = nir_intrinsic_base(instr);
+ if (literal) {
+ AluInstruction *ir = nullptr;
+
+ for (int i = 0; i < instr->num_components ; ++i) {
+ PValue u = PValue(new UniformValue(512 + literal->value[0].u32 + base, i));
+ sfn_log << SfnLog::io << "uniform "
+ << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
+
+ if (instr->dest.is_ssa)
+ add_uniform((instr->dest.ssa.index << 2) + i, u);
+ else {
+ ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
+ u, {alu_write});
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ } else {
+ PValue addr = from_nir(instr->src[0], 0, 0);
+ return load_uniform_indirect(instr, addr, 16 * base, 0);
+ }
+ return true;
+}
+
+bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
+{
+ if (!addr) {
+ std::cerr << "r600-nir: don't know how uniform is addressed\n";
+ return false;
+ }
+
+ GPRVector trgt;
+ for (int i = 0; i < 4; ++i)
+ trgt.set_reg_i(i, from_nir(instr->dest, i));
+
+ if (addr->type() != Value::gpr) {
+ emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
+ addr = trgt.reg_i(0);
+ }
+
+ /* FIXME: buffer index and index mode are not set correctly */
+ auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
+ bufferid, PValue(), bim_none);
+ emit_instruction(ir);
+ for (int i = 0; i < instr->num_components ; ++i) {
+ add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
+ }
+ m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
+ return true;
+}
+
+AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
+{
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < literal->def.num_components ; ++i) {
+ if (writemask & (1 << i)){
+ PValue lsrc;
+ switch (literal->def.bit_size) {
+
+ case 1:
+ sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
+ lsrc = literal->value[i].b ?
+ PValue(new LiteralValue( 0xffffffff, i)) :
+ Value::zero;
+ break;
+ case 32:
+ sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
+ if (literal->value[i].u32 == 0)
+ lsrc = Value::zero;
+ else if (literal->value[i].u32 == 1)
+ lsrc = Value::one_i;
+ else if (literal->value[i].f32 == 1.0f)
+ lsrc = Value::one_f;
+ else if (literal->value[i].f32 == 0.5f)
+ lsrc = Value::zero_dot_5;
+ else
+ lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
+ break;
+ default:
+ sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
+ << " falling back to 32 bit\n";
+ lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
+ }
+ ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
+
+ emit_instruction(ir);
+ }
+ }
+ return ir;
+}
+
+PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component)
+{
+ PValue value = from_nir(src, component);
+ if (value->type() != Value::gpr &&
+ value->type() != Value::gpr_vector &&
+ value->type() != Value::gpr_array_value) {
+ unsigned temp = allocate_temp_register();
+ PValue retval(new GPRValue(temp, component));
+ emit_instruction(new AluInstruction(op1_mov, retval, value,
+ EmitInstruction::last_write));
+ value = retval;
+ }
+ return value;
+}
+
+bool ShaderFromNirProcessor::emit_store_deref(nir_intrinsic_instr* instr)
+{
+ auto out_var = get_deref_location(instr->src[0]);
+ if (!out_var)
+ return false;
+
+ return do_emit_store_deref(out_var, instr);
+}
+
+bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
+{
+ r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "'\n";
+
+ /* Give the specific shader type a chance to process this, i.e. Geometry and
+ * tesselation shaders need specialized deref_array, for the other shaders
+ * it is lowered.
+ */
+ if (emit_deref_instruction_override(instr))
+ return true;
+
+ switch (instr->deref_type) {
+ case nir_deref_type_var:
+ set_var_address(instr);
+ return true;
+ case nir_deref_type_array:
+ case nir_deref_type_array_wildcard:
+ case nir_deref_type_struct:
+ case nir_deref_type_cast:
+ default:
+ fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
+ }
+ return false;
+}
+
+void ShaderFromNirProcessor::load_uniform(const nir_alu_src &src)
+{
+ AluInstruction *ir = nullptr;
+ PValue sv[4];
+
+ assert(src.src.is_ssa);
+
+ for (int i = 0; i < src.src.ssa->num_components ; ++i) {
+ unsigned uindex = (src.src.ssa->index << 2) + i;
+ sv[i] = uniform(uindex);
+ assert(sv[i]);
+ }
+
+ for (int i = 0; i < src.src.ssa->num_components ; ++i) {
+ ir = new AluInstruction(op1_mov, create_register_from_nir_src(src.src, i), sv[i],
+ EmitInstruction::write);
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+}
+
+
+
+bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
+ std::vector<PValue> srcs,
+ const std::set<AluModifiers>& m_flags)
+{
+ AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
+ emit_instruction(ir);
+ return true;
+}
+
+void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
+{
+ m_output_register_map[loc] = gpr;
+}
+
+void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
+{
+ r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n";
+ m_export_output.push_back(PInstruction(ir));
+}
+
+const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
+{
+ const GPRVector *retval = nullptr;
+ auto val = m_output_register_map.find(location);
+ if (val != m_output_register_map.end())
+ retval = val->second;
+ return retval;
+}
+
+void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
+{
+ r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var << "\n";
+ m_inputs[pos] = var;
+}
+
+void ShaderFromNirProcessor::set_output(unsigned pos, PValue var)
+{
+ r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << *var << "\n";
+ m_outputs[pos] = var;
+}
+
+void ShaderFromNirProcessor::finalize()
+{
+ do_finalize();
+
+ for (auto& i : m_inputs)
+ m_sh_info.input[i.first].gpr = i.second->sel();
+
+ for (auto& i : m_outputs)
+ m_sh_info.output[i.first].gpr = i.second->sel();
+
+ m_output.insert(m_output.end(), m_export_output.begin(), m_export_output.end());
+ m_export_output.clear();
+}
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef sfn_shader_from_nir_h
+#define sfn_shader_from_nir_h
+
+
+#include "gallium/drivers/r600/r600_shader.h"
+
+#include "compiler/nir/nir.h"
+#include "compiler/nir_types.h"
+
+#include "sfn_instruction_export.h"
+#include "sfn_alu_defines.h"
+#include "sfn_valuepool.h"
+#include "sfn_debug.h"
+#include "sfn_instruction_cf.h"
+#include "sfn_emittexinstruction.h"
+#include "sfn_emitaluinstruction.h"
+
+#include <vector>
+#include <set>
+#include <stack>
+
+struct nir_instr;
+
+namespace r600 {
+
+extern SfnLog sfn_log;
+
+class ShaderFromNirProcessor : public ValuePool {
+public:
+ ShaderFromNirProcessor(pipe_shader_type ptype, r600_pipe_shader_selector& sel,
+ r600_shader& sh_info);
+ virtual ~ShaderFromNirProcessor();
+
+ void emit_instruction(Instruction *ir);
+
+ PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component);
+ GPRVector *vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
+ const GPRVector::Swizzle& swizzle);
+
+ bool emit_instruction(EAluOp opcode, PValue dest,
+ std::vector<PValue> src0,
+ const std::set<AluModifiers>& m_flags);
+ void emit_export_instruction(WriteoutInstruction *ir);
+
+ void split_constants(nir_alu_instr* instr);
+ void load_uniform(const nir_alu_src& src);
+
+ const nir_variable *get_deref_location(const nir_src& src) const;
+protected:
+
+ void set_var_address(nir_deref_instr *instr);
+ void set_input(unsigned pos, PValue var);
+ void set_output(unsigned pos, PValue var);
+
+ void evaluate_spi_sid(r600_shader_io &io);
+
+ r600_shader& sh_info() {return m_sh_info;}
+
+ bool scan_instruction(nir_instr *instr);
+
+ virtual bool scan_sysvalue_access(nir_instr *instr) = 0;
+
+ bool emit_if_start(int if_id, nir_if *if_stmt);
+ bool emit_else_start(int if_id);
+ bool emit_ifelse_end(int if_id);
+
+ bool emit_loop_start(int loop_id);
+ bool emit_loop_end(int loop_id);
+ bool emit_jump_instruction(nir_jump_instr *instr);
+
+ const GPRVector *output_register(unsigned location) const;
+
+ bool load_preloaded_value(const nir_dest& dest, int chan, PValue value,
+ bool as_last = true);
+ void add_param_output_reg(int loc, const GPRVector *gpr);
+ void inc_atomic_file_count();
+ std::bitset<8> m_sv_values;
+
+ enum ESlots {
+ es_face,
+ es_instanceid,
+ es_pos,
+ es_sample_mask_in,
+ es_sample_id,
+ es_vertexid,
+ };
+
+private:
+ virtual bool allocate_reserved_registers() = 0;
+
+ bool emit_alu_instruction(nir_instr *instr);
+ bool emit_deref_instruction(nir_deref_instr* instr);
+ bool emit_intrinsic_instruction(nir_intrinsic_instr* instr);
+ virtual bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr);
+ bool emit_tex_instruction(nir_instr* instr);
+ bool emit_discard_if(nir_intrinsic_instr* instr);
+ bool emit_load_ubo(nir_intrinsic_instr* instr);
+ bool emit_ssbo_atomic_add(nir_intrinsic_instr* instr);
+ bool load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufid);
+
+ /* Code creating functions */
+ bool emit_load_input_deref(const nir_variable *var, nir_intrinsic_instr* instr);
+ bool emit_load_function_temp(const nir_variable *var, nir_intrinsic_instr *instr);
+ AluInstruction *emit_load_literal(const nir_load_const_instr *literal, const nir_src& src, unsigned writemask);
+
+ bool emit_store_deref(nir_intrinsic_instr* instr);
+
+ bool reserve_uniform(nir_intrinsic_instr* instr);
+ bool process_uniforms(nir_variable *uniform);
+ bool process_inputs(nir_variable *input);
+ bool process_outputs(nir_variable *output);
+
+ void add_array_deref(nir_deref_instr* instr);
+
+ virtual void emit_shader_start();
+ virtual bool emit_deref_instruction_override(nir_deref_instr* instr);
+ virtual bool do_process_inputs(nir_variable *input) = 0;
+ virtual bool do_process_outputs(nir_variable *output) = 0;
+ virtual bool do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) = 0;
+ virtual bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) = 0;
+
+ virtual void do_finalize() = 0;
+
+ void finalize();
+ friend class ShaderFromNir;
+
+ std::set<nir_variable*> m_arrays;
+
+ std::map<unsigned, PValue> m_inputs;
+ std::map<unsigned, PValue> m_outputs;
+
+ std::map<unsigned, nir_variable*> m_var_derefs;
+ std::map<const nir_variable *, nir_variable_mode> m_var_mode;
+
+ std::map<unsigned, const glsl_type*> m_uniform_type_map;
+ std::map<int, IfElseInstruction *> m_if_block_start_map;
+ std::map<int, LoopBeginInstruction *> m_loop_begin_block_map;
+
+ pipe_shader_type m_processor_type;
+
+ std::vector<PInstruction> m_output;
+ std::vector<PInstruction> m_export_output;
+ r600_shader& m_sh_info;
+
+ EmitTexInstruction m_tex_instr;
+ EmitAluInstruction m_alu_instr;
+ OutputRegisterMap m_output_register_map;
+
+ IfElseInstruction *m_pending_else;
+ int m_next_hwatomic_loc;
+
+ r600_pipe_shader_selector& m_sel;
+};
+
+}
+
+#endif
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_from_mesa.h"
+#include "sfn_shader_fragment.h"
+#include "sfn_instruction_fetch.h"
+
+namespace r600 {
+
+FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir,
+ r600_shader& sh,
+ r600_pipe_shader_selector &sel,
+ const r600_shader_key &key):
+ ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh),
+ m_max_color_exports(MAX2(key.ps.nr_cbufs,1)),
+ m_max_counted_color_exports(0),
+ m_two_sided_color(key.ps.color_two_side),
+ m_last_pixel_export(nullptr),
+ m_nir(nir),
+ m_reserved_registers(0),
+ m_frag_pos_index(0),
+ m_need_back_color(false),
+ m_front_face_loaded(false),
+ m_depth_exports(0),
+ m_enable_centroid_interpolators(false)
+{
+ for (auto& i: m_interpolator) {
+ i.enabled = false;
+ i.ij_index= 0;
+ }
+
+ sh_info().rat_base = key.ps.nr_cbufs;
+ sh_info().atomic_base = key.ps.first_atomic_counter;
+}
+
+bool FragmentShaderFromNir::do_process_inputs(nir_variable *input)
+{
+ sfn_log << SfnLog::io << "Parse input variable "
+ << input->name << " location:" << input->data.location
+ << " driver-loc:" << input->data.driver_location
+ << " interpolation:" << input->data.interpolation
+ << "\n";
+
+ unsigned name, sid;
+
+ if (input->data.location == VARYING_SLOT_FACE) {
+ m_sv_values.set(es_face);
+ return true;
+ }
+
+ tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot>(input->data.location),
+ true, &name, &sid);
+
+ /* Work around the mixed tgsi/nir semantic problems, this fixes
+ * dEQP-GLES2.functional.shaders.builtin_variable.pointcoord */
+ if (input->data.location == VARYING_SLOT_PNTC) {
+ name = TGSI_SEMANTIC_GENERIC;
+ sid = 8;
+ }
+
+ tgsi_semantic sname = static_cast<tgsi_semantic>(name);
+
+ switch (sname) {
+ case TGSI_SEMANTIC_POSITION: {
+ m_sv_values.set(es_pos);
+ return true;
+ }
+ case TGSI_SEMANTIC_COLOR: {
+ m_shaderio.add_input(new ShaderInputColor(sname, sid, input));
+ m_need_back_color = m_two_sided_color;
+ return true;
+ }
+ case TGSI_SEMANTIC_PRIMID:
+ sh_info().gs_prim_id_input = true;
+ sh_info().ps_prim_id_input = m_shaderio.inputs().size();
+ /* fallthrough */
+ case TGSI_SEMANTIC_FOG:
+ case TGSI_SEMANTIC_GENERIC:
+ case TGSI_SEMANTIC_TEXCOORD:
+ case TGSI_SEMANTIC_LAYER:
+ case TGSI_SEMANTIC_PCOORD:
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ case TGSI_SEMANTIC_CLIPDIST: {
+ if (!m_shaderio.find_varying(sname, sid, input->data.location_frac))
+ m_shaderio.add_input(new ShaderInputVarying(sname, sid, input));
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
+bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr)
+{
+ switch (instr->type) {
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
+ switch (ii->intrinsic) {
+ case nir_intrinsic_load_front_face:
+ m_sv_values.set(es_face);
+ break;
+ case nir_intrinsic_load_sample_mask_in:
+ m_sv_values.set(es_sample_mask_in);
+ break;
+ case nir_intrinsic_load_sample_id:
+ m_sv_values.set(es_sample_id);
+ break;
+ case nir_intrinsic_interp_deref_at_centroid:
+ /* This is not a sysvalue, should go elsewhere */
+ m_enable_centroid_interpolators = true;
+ break;
+ default:
+ ;
+ }
+ }
+ default:
+ ;
+ }
+ return true;
+}
+
+bool FragmentShaderFromNir::allocate_reserved_registers()
+{
+ assert(!m_reserved_registers);
+
+ int face_reg_index = -1;
+ // enabled interpolators based on inputs
+ for (auto& i: m_shaderio.inputs()) {
+ int ij = i->ij_index();
+ if (ij >= 0) {
+ m_interpolator[ij].enabled = true;
+ }
+ }
+
+ /* Lazy, enable both possible interpolators,
+ * TODO: check which ones are really needed */
+ if (m_enable_centroid_interpolators) {
+ m_interpolator[2].enabled = true; /* perspective */
+ m_interpolator[5].enabled = true; /* linear */
+ }
+
+ // sort the varying inputs
+ m_shaderio.sort_varying_inputs();
+
+ // handle interpolators
+ int num_baryc = 0;
+ for (int i = 0; i < 6; ++i) {
+ if (m_interpolator[i].enabled) {
+ sfn_log << SfnLog::io << "Interpolator " << i << " is enabled\n";
+
+ m_interpolator[i].ij_index = num_baryc;
+
+ unsigned sel = num_baryc / 2;
+ unsigned chan = 2 * (num_baryc % 2);
+
+ auto ip_i = new GPRValue(sel, chan + 1);
+ ip_i->set_as_input();
+ m_interpolator[i].i.reset(ip_i);
+ inject_register(sel, chan + 1, m_interpolator[i].i, false);
+
+ auto ip_j = new GPRValue(sel, chan);
+ ip_j->set_as_input();
+ m_interpolator[i].j.reset(ip_j);
+ inject_register(sel, chan, m_interpolator[i].j, false);
+
+ ++num_baryc;
+ }
+ }
+ m_reserved_registers += (num_baryc + 1) >> 1;
+
+ if (m_sv_values.test(es_pos)) {
+ m_frag_pos_index = m_reserved_registers++;
+ m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_POSITION, m_frag_pos_index));
+ }
+
+ // handle system values
+ if (m_sv_values.test(es_face) || m_need_back_color) {
+ face_reg_index = m_reserved_registers++;
+ auto ffr = new GPRValue(face_reg_index,0);
+ ffr->set_as_input();
+ m_front_face_reg.reset(ffr);
+ sfn_log << SfnLog::io << "Set front_face register to " << *m_front_face_reg << "\n";
+ inject_register(ffr->sel(), ffr->chan(), m_front_face_reg, false);
+
+ m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE, face_reg_index));
+ load_front_face();
+ }
+
+ if (m_sv_values.test(es_sample_mask_in)) {
+ if (face_reg_index < 0)
+ face_reg_index = m_reserved_registers++;
+
+ auto smi = new GPRValue(face_reg_index,2);
+ smi->set_as_input();
+ m_sample_mask_reg.reset(smi);
+ sfn_log << SfnLog::io << "Set sample mask in register to " << *m_sample_mask_reg << "\n";
+ //inject_register(smi->sel(), smi->chan(), m_sample_mask_reg, false);
+ sh_info().nsys_inputs = 1;
+ m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK, face_reg_index));
+ }
+
+ if (m_sv_values.test(es_sample_id)) {
+ if (face_reg_index < 0)
+ face_reg_index = m_reserved_registers++;
+
+ auto smi = new GPRValue(face_reg_index, 3);
+ smi->set_as_input();
+ m_sample_id_reg.reset(smi);
+ sfn_log << SfnLog::io << "Set sample id register to " << *m_sample_id_reg << "\n";
+ sh_info().nsys_inputs++;
+ m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID, face_reg_index));
+ }
+
+ // The back color handling is not emmited in the code, so we have
+ // to add the inputs here and later we also need to inject the code to set
+ // the right color
+ if (m_need_back_color) {
+ size_t ninputs = m_shaderio.inputs().size();
+ for (size_t k = 0; k < ninputs; ++k) {
+ ShaderInput& i = m_shaderio.input(k);
+
+ if (i.name() != TGSI_SEMANTIC_COLOR)
+ continue;
+
+ ShaderInputColor& col = static_cast<ShaderInputColor&>(i);
+
+ size_t next_pos = m_shaderio.size();
+ auto bcol = new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR, col, next_pos);
+ m_shaderio.add_input(bcol);
+ col.set_back_color(next_pos);
+ }
+ m_shaderio.set_two_sided();
+ }
+
+ m_shaderio.update_lds_pos();
+
+ set_reserved_registers(m_reserved_registers);
+
+ return true;
+}
+
+void FragmentShaderFromNir::emit_shader_start()
+{
+ if (m_sv_values.test(es_face))
+ load_front_face();
+
+ if (m_sv_values.test(es_pos)) {
+ for (int i = 0; i < 4; ++i) {
+ auto v = new GPRValue(m_frag_pos_index, i);
+ v->set_as_input();
+ auto reg = PValue(v);
+ if (i == 3)
+ emit_instruction(new AluInstruction(op1_recip_ieee, reg, reg, {alu_write, alu_last_instr}));
+ m_frag_pos[i] = reg;
+ }
+ }
+}
+
+bool FragmentShaderFromNir::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
+{
+ if (out_var->data.location == FRAG_RESULT_COLOR)
+ return emit_export_pixel(out_var, instr, true);
+
+ if ((out_var->data.location >= FRAG_RESULT_DATA0 &&
+ out_var->data.location <= FRAG_RESULT_DATA7) ||
+ out_var->data.location == FRAG_RESULT_DEPTH ||
+ out_var->data.location == FRAG_RESULT_STENCIL)
+ return emit_export_pixel(out_var, instr, false);
+
+ sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_deref for " <<
+ out_var->data.location << "(" << out_var->data.driver_location << ")\n";
+ return false;
+}
+
+bool FragmentShaderFromNir::do_process_outputs(nir_variable *output)
+{
+ sfn_log << SfnLog::instr << "Parse output variable "
+ << output->name << " @" << output->data.location
+ << "@dl:" << output->data.driver_location << "\n";
+
+ ++sh_info().noutput;
+ r600_shader_io& io = sh_info().output[output->data.driver_location];
+ tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result>( output->data.location),
+ &io.name, &io.sid);
+
+ /* Check whether this code has become obsolete by the IO vectorization */
+ unsigned num_components = 4;
+ unsigned vector_elements = glsl_get_vector_elements(glsl_without_array(output->type));
+ if (vector_elements)
+ num_components = vector_elements;
+ unsigned component = output->data.location_frac;
+
+ for (unsigned j = component; j < num_components + component; j++)
+ io.write_mask |= 1 << j;
+
+ int loc = output->data.location;
+ if (loc == FRAG_RESULT_COLOR &&
+ (m_nir.info.outputs_written & (1ull << loc))) {
+ sh_info().fs_write_all = true;
+ }
+
+ if (output->data.location == FRAG_RESULT_COLOR ||
+ (output->data.location >= FRAG_RESULT_DATA0 &&
+ output->data.location <= FRAG_RESULT_DATA7)) {
+ return true;
+ }
+ if (output->data.location == FRAG_RESULT_DEPTH ||
+ output->data.location == FRAG_RESULT_STENCIL) {
+ io.write_mask = 15;
+ return true;
+ }
+
+ return false;
+}
+
+bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_sample_mask_in:
+ return load_preloaded_value(instr->dest, 0, m_sample_mask_reg);
+ case nir_intrinsic_load_sample_id:
+ return load_preloaded_value(instr->dest, 0, m_sample_id_reg);
+ case nir_intrinsic_load_front_face:
+ return load_preloaded_value(instr->dest, 0, m_front_face_reg);
+ case nir_intrinsic_interp_deref_at_sample:
+ return emit_interp_deref_at_sample(instr);
+ case nir_intrinsic_interp_deref_at_offset:
+ return emit_interp_deref_at_offset(instr);
+ case nir_intrinsic_interp_deref_at_centroid:
+ return emit_interp_deref_at_centroid(instr);
+ default:
+ return false;
+ }
+}
+
+void FragmentShaderFromNir::load_front_face()
+{
+ assert(m_front_face_reg);
+ if (m_front_face_loaded)
+ return;
+
+ auto ir = new AluInstruction(op2_setge_dx10, m_front_face_reg, m_front_face_reg,
+ Value::zero, {alu_write, alu_last_instr});
+ m_front_face_loaded = true;
+ emit_instruction(ir);
+}
+
+bool FragmentShaderFromNir::emit_interp_deref_at_sample(nir_intrinsic_instr* instr)
+{
+ GPRVector slope = get_temp_vec4();
+
+ auto fetch = new FetchInstruction(vc_fetch, no_index_offset, slope,
+ from_nir_with_fetch_constant(instr->src[1], 0),
+ 0, R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none);
+ fetch->set_flag(vtx_srf_mode);
+ emit_instruction(fetch);
+
+ GPRVector grad = get_temp_vec4();
+ auto var = get_deref_location(instr->src[0]);
+ assert(var);
+
+ auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
+ auto interpolator = m_interpolator[io.ij_index()];
+ PValue dummy(new GPRValue(interpolator.i->sel(), 7));
+
+ GPRVector src({interpolator.j, interpolator.i, dummy, dummy});
+
+ auto tex = new TexInstruction(TexInstruction::get_gradient_h, grad, src, 0, 0, PValue());
+ tex->set_dest_swizzle({0,1,7,7});
+ emit_instruction(tex);
+
+ tex = new TexInstruction(TexInstruction::get_gradient_v, grad, src, 0, 0, PValue());
+ tex->set_dest_swizzle({7,7,0,1});
+ emit_instruction(tex);
+
+ emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(0), slope.reg_i(2), interpolator.j}, {alu_write}));
+ emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(1), slope.reg_i(2), interpolator.i}, {alu_write, alu_last_instr}));
+
+ emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(2), slope.reg_i(3), slope.reg_i(0)}, {alu_write}));
+ emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(3), slope.reg_i(3), slope.reg_i(1)}, {alu_write, alu_last_instr}));
+
+ Interpolator ip = {true, 0, slope.reg_i(1), slope.reg_i(0)};
+
+ auto dst = vec_from_nir(instr->dest, 4);
+ int num_components = instr->dest.is_ssa ?
+ instr->dest.ssa.num_components:
+ instr->dest.reg.reg->num_components;
+
+ load_interpolated(dst, io, ip, num_components, var->data.location_frac);
+
+ return true;
+}
+
+bool FragmentShaderFromNir::emit_interp_deref_at_offset(nir_intrinsic_instr* instr)
+{
+ int temp = allocate_temp_register();
+
+ GPRVector help(temp, {0,1,2,3});
+
+ auto var = get_deref_location(instr->src[0]);
+ assert(var);
+
+ auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
+ auto interpolator = m_interpolator[io.ij_index()];
+ PValue dummy(new GPRValue(interpolator.i->sel(), 7));
+
+ GPRVector interp({interpolator.j, interpolator.i, dummy, dummy});
+
+ auto getgradh = new TexInstruction(TexInstruction::get_gradient_h, help, interp, 0, 0, PValue());
+ getgradh->set_dest_swizzle({0,1,7,7});
+ getgradh->set_flag(TexInstruction::x_unnormalized);
+ getgradh->set_flag(TexInstruction::y_unnormalized);
+ getgradh->set_flag(TexInstruction::z_unnormalized);
+ getgradh->set_flag(TexInstruction::w_unnormalized);
+ emit_instruction(getgradh);
+
+ auto getgradv = new TexInstruction(TexInstruction::get_gradient_v, help, interp, 0, 0, PValue());
+ getgradv->set_dest_swizzle({7,7,0,1});
+ getgradv->set_flag(TexInstruction::x_unnormalized);
+ getgradv->set_flag(TexInstruction::y_unnormalized);
+ getgradv->set_flag(TexInstruction::z_unnormalized);
+ getgradv->set_flag(TexInstruction::w_unnormalized);
+ emit_instruction(getgradv);
+
+ PValue ofs_x = from_nir(instr->src[1], 0);
+ PValue ofs_y = from_nir(instr->src[1], 1);
+ emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(0), ofs_x, interpolator.j, {alu_write}));
+ emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(1), ofs_x, interpolator.i, {alu_write, alu_last_instr}));
+ emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(2), ofs_y, help.reg_i(0), {alu_write}));
+ emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(3), ofs_y, help.reg_i(1), {alu_write, alu_last_instr}));
+
+ Interpolator ip = {true, 0, help.reg_i(1), help.reg_i(0)};
+
+ auto dst = vec_from_nir(instr->dest, 4);
+ load_interpolated(dst, io, ip, nir_dest_num_components(instr->dest),
+ var->data.location_frac);
+
+ return true;
+}
+
+bool FragmentShaderFromNir::emit_interp_deref_at_centroid(nir_intrinsic_instr* instr)
+{
+ auto var = get_deref_location(instr->src[0]);
+ assert(var);
+
+ auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
+ io.set_uses_interpolate_at_centroid();
+
+ int ij_index = io.ij_index() >= 3 ? 5 : 2;
+ assert (m_interpolator[ij_index].enabled);
+ auto ip = m_interpolator[ij_index];
+
+ int num_components = nir_dest_num_components(instr->dest);
+
+ auto dst = vec_from_nir(instr->dest, 4);
+ load_interpolated(dst, io, ip, num_components, var->data.location_frac);
+ return true;
+}
+
+
+bool FragmentShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr)
+{
+ if (in_var->data.location == VARYING_SLOT_POS) {
+ assert(instr->dest.is_ssa);
+
+ for (int i = 0; i < instr->dest.ssa.num_components; ++i) {
+ inject_register(instr->dest.ssa.index, i, m_frag_pos[i], true);
+ }
+ return true;
+ }
+
+ if (in_var->data.location == VARYING_SLOT_FACE)
+ return load_preloaded_value(instr->dest, 0, m_front_face_reg);
+
+ // todo: replace io with ShaderInputVarying
+ auto& io = m_shaderio.input(in_var->data.driver_location, in_var->data.location_frac);
+ unsigned num_components = 4;
+
+
+ if (instr->dest.is_ssa) {
+ num_components = instr->dest.ssa.num_components;
+ } else {
+ num_components = instr->dest.reg.reg->num_components;
+ }
+
+ auto dst = vec_from_nir(instr->dest, 4);
+
+ sfn_log << SfnLog::io << "Set input[" << in_var->data.driver_location
+ << "].gpr=" << dst.sel() << "\n";
+
+ io.set_gpr(dst.sel());
+
+ auto& ip = io.interpolate() ? m_interpolator[io.ij_index()] : m_interpolator[0];
+
+ load_interpolated(dst, io, ip, num_components, in_var->data.location_frac);
+
+ /* These results are expected starting in slot x..*/
+ if (in_var->data.location_frac > 0) {
+ int n = instr->dest.is_ssa ? instr->dest.ssa.num_components :
+ instr->dest.reg.reg->num_components;
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < n; ++i) {
+ ir = new AluInstruction(op1_mov, dst[i],
+ dst[i + in_var->data.location_frac], {alu_write});
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ }
+
+
+ if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
+
+ auto & color_input = static_cast<ShaderInputColor&> (io);
+ auto& bgio = m_shaderio.input(color_input.back_color_input_index());
+
+ bgio.set_gpr(allocate_temp_register());
+
+ GPRVector bgcol(bgio.gpr(), {0,1,2,3});
+ load_interpolated(bgcol, bgio, ip, num_components, 0);
+
+ load_front_face();
+
+ AluInstruction *ir = nullptr;
+ for (unsigned i = 0; i < 4 ; ++i) {
+ ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ }
+
+ return true;
+}
+
+bool FragmentShaderFromNir::load_interpolated(GPRVector &dest,
+ ShaderInput& io, const Interpolator &ip,
+ int num_components, int start_comp)
+{
+ // replace io with ShaderInputVarying
+ if (io.interpolate() > 0) {
+
+ sfn_log << SfnLog::io << "Using Interpolator " << io.ij_index() << "\n";
+
+ if (num_components == 1) {
+ switch (start_comp) {
+ case 0: return load_interpolated_one_comp(dest, io, ip, op2_interp_x);
+ case 1: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
+ case 2: return load_interpolated_one_comp(dest, io, ip, op2_interp_z);
+ case 3: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_zw, 2, 3);
+ default:
+ assert(0);
+ }
+ }
+
+ if (num_components == 2) {
+ switch (start_comp) {
+ case 0: return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3);
+ case 2: return load_interpolated_two_comp(dest, io, ip, op2_interp_zw, 0xc);
+ case 1: return load_interpolated_one_comp(dest, io, ip, op2_interp_z) &&
+ load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
+ default:
+ assert(0);
+ }
+ }
+
+ if (num_components == 3 && start_comp == 0)
+ return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3) &&
+ load_interpolated_one_comp(dest, io, ip, op2_interp_z);
+
+ int full_write_mask = ((1 << num_components) - 1) << start_comp;
+
+ bool success = load_interpolated_two_comp(dest, io, ip, op2_interp_zw, full_write_mask & 0xc);
+ success &= load_interpolated_two_comp(dest, io, ip, op2_interp_xy, full_write_mask & 0x3);
+ return success;
+
+ } else {
+ AluInstruction *ir = nullptr;
+ for (unsigned i = 0; i < 4 ; ++i) {
+ ir = new AluInstruction(op1_interp_load_p0, dest[i],
+ PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
+ EmitInstruction::write);
+ emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+ }
+ return true;
+}
+
+bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector &dest,
+ ShaderInput& io, const Interpolator& ip, EAluOp op)
+{
+ for (unsigned i = 0; i < 2 ; ++i) {
+ int chan = i;
+ if (op == op2_interp_z)
+ chan += 2;
+
+
+ auto ir = new AluInstruction(op, dest[chan], i & 1 ? ip.j : ip.i,
+ PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
+ i == 0 ? EmitInstruction::write : EmitInstruction::last);
+ ir->set_bank_swizzle(alu_vec_210);
+ emit_instruction(ir);
+ }
+ return true;
+}
+
+bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector &dest, ShaderInput& io,
+ const Interpolator& ip, EAluOp op, int writemask)
+{
+ AluInstruction *ir = nullptr;
+ for (unsigned i = 0; i < 4 ; ++i) {
+ ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
+ (writemask & (1 << i)) ? EmitInstruction::write : EmitInstruction::empty);
+ ir->set_bank_swizzle(alu_vec_210);
+ emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector &dest,
+ ShaderInput& io, const Interpolator& ip,
+ EAluOp op, UNUSED int start, int comp)
+{
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i,
+ PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
+ i == comp ? EmitInstruction::write : EmitInstruction::empty);
+ ir->set_bank_swizzle(alu_vec_210);
+ emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+
+bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, bool all_chanels)
+{
+ int outputs = all_chanels ? m_max_color_exports : 1;
+
+ std::array<uint32_t,4> swizzle;
+ unsigned writemask = nir_intrinsic_write_mask(instr);
+ if (out_var->data.location != FRAG_RESULT_STENCIL) {
+ for (int i = 0; i < 4; ++i) {
+ swizzle[i] = (i < instr->num_components) ? i : 7;
+ }
+ } else {
+ swizzle = {7,0,7,7};
+ }
+
+ GPRVector *value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle);
+
+ set_output(out_var->data.driver_location, PValue(value));
+
+ if (out_var->data.location == FRAG_RESULT_COLOR ||
+ (out_var->data.location >= FRAG_RESULT_DATA0 &&
+ out_var->data.location <= FRAG_RESULT_DATA7)) {
+ for (int k = 0 ; k < outputs; ++k) {
+
+ unsigned location = out_var->data.driver_location + k - m_depth_exports;
+ if (location >= m_max_color_exports) {
+ sfn_log << SfnLog::io << "Pixel output " << location
+ << " skipped because we have only " << m_max_color_exports << "CBs\n";
+ continue;
+ }
+
+ m_last_pixel_export = new ExportInstruction(location, *value, ExportInstruction::et_pixel);
+
+ if (sh_info().ps_export_highest < location)
+ sh_info().ps_export_highest = location;
+
+ sh_info().nr_ps_color_exports++;
+
+ unsigned mask = (0xfu << (location * 4));
+ sh_info().ps_color_export_mask |= mask;
+
+ emit_export_instruction(m_last_pixel_export);
+ ++m_max_counted_color_exports;
+ };
+ } else if (out_var->data.location == FRAG_RESULT_DEPTH) {
+ m_depth_exports++;
+ emit_export_instruction(new ExportInstruction(61, *value, ExportInstruction::et_pixel));
+ } else if (out_var->data.location == FRAG_RESULT_STENCIL) {
+ m_depth_exports++;
+ emit_export_instruction(new ExportInstruction(61, *value, ExportInstruction::et_pixel));
+ } else {
+ return false;
+ }
+
+ return true;
+}
+
+void FragmentShaderFromNir::do_finalize()
+{
+ // update shader io info and set LDS etc.
+ sh_info().ninput = m_shaderio.inputs().size();
+
+ sfn_log << SfnLog::io << "Have " << sh_info().ninput << " inputs\n";
+ for (size_t i = 0; i < sh_info().ninput; ++i) {
+ int ij_idx = (m_shaderio.input(i).ij_index() < 6 &&
+ m_shaderio.input(i).ij_index() >= 0) ? m_shaderio.input(i).ij_index() : 0;
+ m_shaderio.input(i).set_ioinfo(sh_info().input[i], m_interpolator[ij_idx].ij_index);
+ }
+
+ sh_info().two_side = m_shaderio.two_sided();
+ sh_info().nlds = m_shaderio.nlds();
+
+ sh_info().nr_ps_max_color_exports = m_max_counted_color_exports;
+
+ if (sh_info().fs_write_all) {
+ sh_info().nr_ps_max_color_exports = m_max_color_exports;
+ }
+
+ if (!m_last_pixel_export) {
+ GPRVector v(0, {7,7,7,7});
+ m_last_pixel_export = new ExportInstruction(0, v, ExportInstruction::et_pixel);
+ sh_info().nr_ps_color_exports++;
+ sh_info().ps_color_export_mask = 0xf;
+ emit_export_instruction(m_last_pixel_export);
+ }
+
+ m_last_pixel_export->set_last();
+
+ if (sh_info().fs_write_all)
+ sh_info().nr_ps_max_color_exports = 8;
+}
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef sfn_fragment_shader_from_nir_h
+#define sfn_fragment_shader_from_nir_h
+
+#include "sfn_shader_base.h"
+#include "sfn_shaderio.h"
+#include <bitset>
+
+namespace r600 {
+
+class FragmentShaderFromNir : public ShaderFromNirProcessor {
+public:
+ FragmentShaderFromNir(const nir_shader& nir, r600_shader& sh_info,
+ r600_pipe_shader_selector &sel, const r600_shader_key &key);
+ bool scan_sysvalue_access(nir_instr *instr) override;
+private:
+
+ struct Interpolator {
+ bool enabled;
+ unsigned ij_index;
+ PValue i;
+ PValue j;
+ };
+
+ void emit_shader_start() override;
+ bool do_process_inputs(nir_variable *input) override;
+ bool allocate_reserved_registers() override;
+ bool do_process_outputs(nir_variable *output) override;
+ bool do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) override;
+ bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
+ bool emit_export_pixel(const nir_variable *, nir_intrinsic_instr* instr, bool all_chanels);
+ bool load_interpolated(GPRVector &dest, ShaderInput &io, const Interpolator& ip,
+ int num_components, int start_comp);
+ bool load_interpolated_one_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip, EAluOp op);
+ bool load_interpolated_two_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip,EAluOp op, int writemask);
+ bool load_interpolated_two_comp_for_one(GPRVector &dest,
+ ShaderInput& io, const Interpolator& ip, EAluOp op, int start, int comp);
+ bool emit_interp_deref_at_centroid(nir_intrinsic_instr* instr);
+
+ bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
+ void do_finalize() override;
+
+ void load_front_face();
+
+ bool emit_load_front_face(nir_intrinsic_instr* instr);
+ bool emit_load_sample_mask_in(nir_intrinsic_instr* instr);
+ bool emit_load_sample_id(nir_intrinsic_instr* instr);
+ bool emit_interp_deref_at_sample(nir_intrinsic_instr* instr);
+ bool emit_interp_deref_at_offset(nir_intrinsic_instr* instr);
+
+ unsigned m_max_color_exports;
+ unsigned m_max_counted_color_exports;
+ bool m_two_sided_color;
+ ExportInstruction *m_last_pixel_export;
+ const nir_shader& m_nir;
+
+
+ std::array<Interpolator, 6> m_interpolator;
+ unsigned m_reserved_registers;
+ unsigned m_frag_pos_index;
+ PValue m_front_face_reg;
+ PValue m_sample_mask_reg;
+ PValue m_sample_id_reg;
+ GPRVector m_frag_pos;
+ bool m_need_back_color;
+ bool m_front_face_loaded;
+ ShaderIO m_shaderio;
+ unsigned m_depth_exports;
+
+ std::map<unsigned, PValue> m_input_cache;
+ bool m_enable_centroid_interpolators;
+};
+
+}
+
+#endif
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_from_mesa.h"
+#include "sfn_shader_vertex.h"
+
+#include <queue>
+
+
+namespace r600 {
+
+using std::priority_queue;
+
+VertexShaderFromNir::VertexShaderFromNir(r600_pipe_shader *sh,
+ r600_pipe_shader_selector& sel,
+ const r600_shader_key& key):
+ ShaderFromNirProcessor (PIPE_SHADER_VERTEX, sel, sh->shader),
+ m_num_clip_dist(0),
+ m_last_param_export(nullptr),
+ m_last_pos_export(nullptr),
+ m_pipe_shader(sh),
+ m_enabled_stream_buffers_mask(0),
+ m_so_info(&sel.so),
+ m_cur_param(0),
+ m_cur_clip_pos(1),
+ m_vertex_id(),
+ m_key(key)
+{
+ // reg 0 is used in the fetch shader
+ increment_reserved_registers();
+
+ sh_info().atomic_base = key.vs.first_atomic_counter;
+}
+
+bool VertexShaderFromNir::do_process_inputs(nir_variable *input)
+{
+ ++sh_info().ninput;
+
+ if (input->data.location < VERT_ATTRIB_MAX) {
+ increment_reserved_registers();
+ return true;
+ }
+ fprintf(stderr, "r600-NIR-VS: Unimplemented process_inputs for %d\n", input->data.location);
+ return false;
+}
+
+bool VertexShaderFromNir::allocate_reserved_registers()
+{
+ /* Since the vertex ID is nearly always used, we add it here as an input so
+ * that the registers used for vertex attributes don't get clobbered by the
+ * register merge step */
+ auto R0x = new GPRValue(0,0);
+ R0x->set_as_input();
+ m_vertex_id.reset(R0x);
+ inject_register(0, 0, m_vertex_id, false);
+
+ if (m_sv_values.test(es_instanceid)) {
+ auto R0w = new GPRValue(0,3);
+ R0w->set_as_input();
+ m_instance_id.reset(R0w);
+ inject_register(0, 3, m_instance_id, false);
+ }
+
+ priority_queue<int, std::vector<int>, std::greater<int>> q;
+ for (auto a: m_param_map) {
+ q.push(a.first);
+ }
+
+ int next_param = 0;
+ while (!q.empty()) {
+ int loc = q.top();
+ q.pop();
+ m_param_map[loc] = next_param++;
+ }
+ return true;
+}
+
+bool VertexShaderFromNir::scan_sysvalue_access(nir_instr *instr)
+{
+ switch (instr->type) {
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
+ switch (ii->intrinsic) {
+ case nir_intrinsic_load_vertex_id:
+ m_sv_values.set(es_vertexid);
+ break;
+ case nir_intrinsic_load_instance_id:
+ m_sv_values.set(es_instanceid);
+ break;
+ default:
+ ;
+ }
+ }
+ default:
+ ;
+ }
+ return true;
+}
+
+bool VertexShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_vertex_id:
+ return load_preloaded_value(instr->dest, 0, m_vertex_id);
+ case nir_intrinsic_load_instance_id:
+ return load_preloaded_value(instr->dest, 0, m_instance_id);
+ default:
+ return false;
+ }
+}
+
+bool VertexShaderFromNir::do_process_outputs(nir_variable *output)
+{
+ if (output->data.location == VARYING_SLOT_COL0 ||
+ output->data.location == VARYING_SLOT_COL1 ||
+ (output->data.location >= VARYING_SLOT_VAR0 &&
+ output->data.location <= VARYING_SLOT_VAR31) ||
+ (output->data.location >= VARYING_SLOT_TEX0 &&
+ output->data.location <= VARYING_SLOT_TEX7) ||
+ output->data.location == VARYING_SLOT_BFC0 ||
+ output->data.location == VARYING_SLOT_BFC1 ||
+ output->data.location == VARYING_SLOT_CLIP_VERTEX ||
+ output->data.location == VARYING_SLOT_CLIP_DIST0 ||
+ output->data.location == VARYING_SLOT_CLIP_DIST1 ||
+ output->data.location == VARYING_SLOT_POS ||
+ output->data.location == VARYING_SLOT_PSIZ ||
+ output->data.location == VARYING_SLOT_FOGC ||
+ output->data.location == VARYING_SLOT_LAYER ||
+ output->data.location == VARYING_SLOT_EDGE ||
+ output->data.location == VARYING_SLOT_VIEWPORT
+ ) {
+
+ r600_shader_io& io = sh_info().output[output->data.driver_location];
+ tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot>( output->data.location),
+ true, &io.name, &io.sid);
+ if (! m_key.vs.as_es)
+ evaluate_spi_sid(io);
+ ++sh_info().noutput;
+
+ if (output->data.location == VARYING_SLOT_PSIZ ||
+ output->data.location == VARYING_SLOT_EDGE ||
+ output->data.location == VARYING_SLOT_LAYER)
+ m_cur_clip_pos = 2;
+
+ if (output->data.location != VARYING_SLOT_POS &&
+ output->data.location != VARYING_SLOT_EDGE &&
+ output->data.location != VARYING_SLOT_PSIZ &&
+ output->data.location != VARYING_SLOT_CLIP_VERTEX)
+ m_param_map[output->data.location] = m_cur_param++;
+
+ return true;
+ }
+ return false;
+}
+
+bool VertexShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr)
+{
+ if (in_var->data.location < VERT_ATTRIB_MAX) {
+ for (int i = 0; i < instr->num_components ; ++i) {
+ auto s = new GPRValue(in_var->data.driver_location + 1, i);
+ s->set_as_input();
+ auto src = PValue(s);
+ inject_register(in_var->data.driver_location + 1, i, src, false);
+
+ if (i == 0)
+ set_input(in_var->data.driver_location, src);
+
+ load_preloaded_value(instr->dest, i, src, i == instr->num_components - 1);
+ }
+ return true;
+ }
+ fprintf(stderr, "r600-NIR: Unimplemented load_deref for %d\n", in_var->data.location);
+ return false;
+}
+
+bool VertexShaderFromNir::emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr)
+{
+ sh_info().cc_dist_mask = 0xff;
+ sh_info().clip_dist_write = 0xff;
+
+ std::unique_ptr<GPRVector> clip_vertex(vec_from_nir_with_fetch_constant(instr->src[1], 0xf, {0,1,2,3}));
+
+ for (int i = 0; i < 4; ++i)
+ sh_info().output[out_var->data.driver_location].write_mask |= 1 << i;
+
+ GPRVector clip_dist[2] = { get_temp_vec4(), get_temp_vec4()};
+
+ for (int i = 0; i < 8; i++) {
+ int oreg = i >> 2;
+ int ochan = i & 3;
+ AluInstruction *ir = nullptr;
+ for (int j = 0; j < 4; j++) {
+ ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), clip_vertex->reg_i(j),
+ PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)),
+ (j == ochan) ? EmitInstruction::write : EmitInstruction::empty);
+ emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+ }
+
+ m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos);
+ emit_export_instruction(m_last_pos_export);
+
+ m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos);
+ emit_export_instruction(m_last_pos_export);
+
+ return true;
+}
+
+bool VertexShaderFromNir::emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr,
+ std::array<uint32_t, 4> *swizzle_override)
+{
+ std::array<uint32_t,4> swizzle;
+ uint32_t write_mask = 0;
+
+ if (swizzle_override) {
+ swizzle = *swizzle_override;
+ for (int i = 0; i < 4; ++i) {
+ if (swizzle[i] < 6)
+ write_mask |= 1 << i;
+ }
+ } else {
+ write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
+ for (int i = 0; i < 4; ++i)
+ swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
+ }
+
+ sh_info().output[out_var->data.driver_location].write_mask = write_mask;
+
+ GPRVector *value = vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
+ set_output(out_var->data.driver_location, PValue(value));
+
+ int export_slot = 0;
+
+ switch (out_var->data.location) {
+ case VARYING_SLOT_EDGE: {
+ sh_info().vs_out_misc_write = 1;
+ sh_info().vs_out_edgeflag = 1;
+ emit_instruction(op1_mov, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr});
+ emit_instruction(op1_flt_to_int, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_last_instr});
+ sh_info().output[out_var->data.driver_location].write_mask = 0xf;
+ }
+ /* fallthrough */
+ case VARYING_SLOT_PSIZ:
+ case VARYING_SLOT_LAYER:
+ export_slot = 1;
+ break;
+ case VARYING_SLOT_POS:
+ break;
+ case VARYING_SLOT_CLIP_DIST0:
+ case VARYING_SLOT_CLIP_DIST1:
+ export_slot = m_cur_clip_pos++;
+ break;
+ default:
+ sfn_log << SfnLog::err << __func__ << "Unsupported location "
+ << out_var->data.location << "\n";
+ return false;
+ }
+
+ m_last_pos_export = new ExportInstruction(export_slot, *value, ExportInstruction::et_pos);
+ emit_export_instruction(m_last_pos_export);
+ add_param_output_reg(out_var->data.driver_location, m_last_pos_export->gpr_ptr());
+ return true;
+}
+
+bool VertexShaderFromNir::emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr)
+{
+ assert(out_var->data.driver_location < sh_info().noutput);
+ sfn_log << SfnLog::io << __func__ << ": emit DDL: " << out_var->data.driver_location << "\n";
+
+ int write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
+ std::array<uint32_t,4> swizzle;
+ for (int i = 0; i < 4; ++i)
+ swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
+
+ sh_info().output[out_var->data.driver_location].write_mask = write_mask;
+
+ GPRVector *value = vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
+ sh_info().output[out_var->data.driver_location].gpr = value->sel();
+
+ /* This should use the registers!! */
+ set_output(out_var->data.driver_location, PValue(value));
+
+ auto param_loc = m_param_map.find(out_var->data.location);
+ assert(param_loc != m_param_map.end());
+
+ m_last_param_export = new ExportInstruction(param_loc->second, *value, ExportInstruction::et_param);
+ emit_export_instruction(m_last_param_export);
+ add_param_output_reg(out_var->data.driver_location, m_last_param_export->gpr_ptr());
+ return true;
+}
+
+bool VertexShaderFromNir::emit_stream(int stream)
+{
+ assert(m_so_info);
+ if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) {
+ R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs);
+ return false;
+ }
+ for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
+ if (m_so_info->output[i].output_buffer >= 4) {
+ R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
+ m_so_info->output[i].output_buffer);
+ return false;
+ }
+ }
+ const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS];
+ unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
+ std::vector<GPRVector> tmp(m_so_info->num_outputs);
+
+ /* Initialize locations where the outputs are stored. */
+ for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
+ if (stream != -1 && stream != m_so_info->output[i].stream)
+ continue;
+
+ sfn_log << SfnLog::instr << "Emit stream " << i
+ << " with register index " << m_so_info->output[i].register_index << " so_gpr:";
+
+
+ so_gpr[i] = output_register(m_so_info->output[i].register_index);
+
+ if (!so_gpr[i]) {
+ sfn_log << SfnLog::err << "\nERR: register index "
+ << m_so_info->output[i].register_index
+ << " doesn't correspond to an output register\n";
+ return false;
+ }
+ start_comp[i] = m_so_info->output[i].start_component;
+ /* Lower outputs with dst_offset < start_component.
+ *
+ * We can only output 4D vectors with a write mask, e.g. we can
+ * only output the W component at offset 3, etc. If we want
+ * to store Y, Z, or W at buffer offset 0, we need to use MOV
+ * to move it to X and output X. */
+ if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) {
+ int tmp_index = allocate_temp_register();
+ int sc = m_so_info->output[i].start_component;
+ AluInstruction *alu = nullptr;
+ for (int j = 0; j < m_so_info->output[i].num_components; j++) {
+ PValue dst(new GPRValue(tmp_index, j));
+ alu = new AluInstruction(op1_mov, dst, so_gpr[i]->reg_i(j + sc), {alu_write});
+ tmp[i].set_reg_i(j, dst);
+ emit_instruction(alu);
+ }
+ if (alu)
+ alu->set_flag(alu_last_instr);
+
+ /* Fill the vector with masked values */
+ PValue dst_blank(new GPRValue(tmp_index, 7));
+ for (int j = m_so_info->output[i].num_components; j < 4; j++)
+ tmp[i].set_reg_i(j, dst_blank);
+
+ start_comp[i] = 0;
+ so_gpr[i] = &tmp[i];
+ }
+ sfn_log << SfnLog::instr << *so_gpr[i] << "\n";
+ }
+
+ /* Write outputs to buffers. */
+ for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
+ sfn_log << SfnLog::instr << "Write output buffer " << i
+ << " with register index " << m_so_info->output[i].register_index << "\n";
+
+ StreamOutIntruction *out_stream =
+ new StreamOutIntruction(*so_gpr[i],
+ m_so_info->output[i].num_components,
+ m_so_info->output[i].dst_offset - start_comp[i],
+ ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i],
+ m_so_info->output[i].output_buffer,
+ m_so_info->output[i].stream);
+ emit_export_instruction(out_stream);
+ m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4;
+ }
+ return true;
+}
+
+void VertexShaderFromNir::do_finalize()
+{
+ if (m_key.vs.as_gs_a) {
+ PValue o(new GPRValue(0,PIPE_SWIZZLE_0));
+ GPRVector primid({PValue(new GPRValue(0,2)), o,o,o});
+ m_last_param_export = new ExportInstruction(m_cur_param, primid, ExportInstruction::et_param);
+ emit_export_instruction(m_last_param_export);
+ int i;
+ i = sh_info().noutput++;
+ auto& io = sh_info().output[i];
+ io.name = TGSI_SEMANTIC_PRIMID;
+ io.sid = 0;
+ io.gpr = 0;
+ io.interpolate = TGSI_INTERPOLATE_CONSTANT;
+ io.write_mask = 0x4;
+ io.spi_sid = m_key.vs.prim_id_out;
+ sh_info().vs_as_gs_a = 1;
+ }
+
+ finalize_exports();
+}
+
+
+bool VertexShaderFromNirForFS::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
+{
+
+ switch (out_var->data.location) {
+ case VARYING_SLOT_PSIZ:
+ sh_info().vs_out_point_size = 1;
+ sh_info().vs_out_misc_write = 1;
+ /* fallthrough */
+ case VARYING_SLOT_POS:
+ return emit_varying_pos(out_var, instr);
+ case VARYING_SLOT_EDGE: {
+ std::array<uint32_t, 4> swizzle_override = {7 ,0, 7, 7};
+ return emit_varying_pos(out_var, instr, &swizzle_override);
+ }
+ case VARYING_SLOT_CLIP_VERTEX:
+ return emit_clip_vertices(out_var, instr);
+ case VARYING_SLOT_CLIP_DIST0:
+ case VARYING_SLOT_CLIP_DIST1:
+ m_num_clip_dist += 4;
+ return emit_varying_param(out_var, instr) && emit_varying_pos(out_var, instr);
+ case VARYING_SLOT_LAYER: {
+ sh_info().vs_out_misc_write = 1;
+ sh_info().vs_out_layer = 1;
+ std::array<uint32_t, 4> swz = {7,7,0,7};
+ return emit_varying_pos(out_var, instr, &swz) &&
+ emit_varying_param(out_var, instr);
+ }
+ case VARYING_SLOT_VIEW_INDEX:
+ return emit_varying_pos(out_var, instr) &&
+ emit_varying_param(out_var, instr);
+
+ default:
+ if (out_var->data.location <= VARYING_SLOT_VAR31 ||
+ (out_var->data.location >= VARYING_SLOT_TEX0 &&
+ out_var->data.location <= VARYING_SLOT_TEX7))
+ return emit_varying_param(out_var, instr);
+ }
+
+ fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n",
+ out_var->data.location);
+ return false;
+}
+
+void VertexShaderFromNirForFS::finalize_exports()
+{
+ if (m_so_info && m_so_info->num_outputs)
+ emit_stream(-1);
+
+ m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask;
+
+ if (!m_last_param_export) {
+ GPRVector value(0,{7,7,7,7});
+ m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param);
+ emit_export_instruction(m_last_param_export);
+ }
+ m_last_param_export->set_last();
+
+ if (!m_last_pos_export) {
+ GPRVector value(0,{7,7,7,7});
+ m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos);
+ emit_export_instruction(m_last_pos_export);
+ }
+ m_last_pos_export->set_last();
+
+}
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef sfn_vertex_shader_from_nir_h
+#define sfn_vertex_shader_from_nir_h
+
+#include "sfn_shader_base.h"
+
+namespace r600 {
+
+class VertexShaderFromNir : public ShaderFromNirProcessor {
+public:
+ VertexShaderFromNir(r600_pipe_shader *sh,
+ r600_pipe_shader_selector &sel,
+ const r600_shader_key &key);
+
+ bool do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) override;
+ bool scan_sysvalue_access(nir_instr *instr) override;
+protected:
+ bool emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr,
+ std::array<uint32_t, 4> *swizzle_override = nullptr);
+ bool emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr);
+ bool emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr);
+ bool emit_stream(int stream);
+
+ // todo: encapsulate
+ unsigned m_num_clip_dist;
+ ExportInstruction *m_last_param_export;
+ ExportInstruction *m_last_pos_export;
+ r600_pipe_shader *m_pipe_shader;
+ unsigned m_enabled_stream_buffers_mask;
+ const pipe_stream_output_info *m_so_info;
+ void do_finalize() override;
+private:
+
+ bool do_process_inputs(nir_variable *input) override;
+ bool allocate_reserved_registers() override;
+ bool do_process_outputs(nir_variable *output) override;
+ bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
+
+ virtual void finalize_exports() = 0;
+
+ unsigned m_cur_param;
+ std::map<unsigned, unsigned> m_param_map;
+ unsigned m_cur_clip_pos;
+
+ PValue m_vertex_id;
+ PValue m_instance_id;
+ r600_shader_key m_key;
+};
+
+class VertexShaderFromNirForFS : public VertexShaderFromNir {
+public:
+ using VertexShaderFromNir::VertexShaderFromNir;
+
+ bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
+private:
+ void finalize_exports() override;
+};
+
+}
+
+#endif
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_shaderio.h"
+#include "sfn_debug.h"
+#include "tgsi/tgsi_from_mesa.h"
+
+#include <queue>
+
+namespace r600 {
+
+using std::vector;
+using std::priority_queue;
+
+ShaderIO::ShaderIO():
+ m_two_sided(false),
+ m_lds_pos(0)
+{
+
+}
+
+ShaderInput::ShaderInput(tgsi_semantic name):
+ m_name(name),
+ m_gpr(0),
+ m_uses_interpolate_at_centroid(false)
+{
+}
+
+ShaderInput::~ShaderInput()
+{
+}
+
+void ShaderInput::set_lds_pos(UNUSED int lds_pos)
+{
+}
+
+int ShaderInput::ij_index() const
+{
+ return -1;
+}
+
+bool ShaderInput::interpolate() const
+{
+ return false;
+}
+
+int ShaderInput::lds_pos() const
+{
+ return 0;
+}
+
+void ShaderInput::set_uses_interpolate_at_centroid()
+{
+ m_uses_interpolate_at_centroid = true;
+}
+
+void ShaderInput::set_ioinfo(r600_shader_io& io, int translated_ij_index) const
+{
+ io.name = m_name;
+ io.gpr = m_gpr;
+ io.ij_index = translated_ij_index;
+ io.lds_pos = lds_pos();
+ io.uses_interpolate_at_centroid = m_uses_interpolate_at_centroid;
+
+ set_specific_ioinfo(io);
+}
+
+void ShaderInput::set_specific_ioinfo(UNUSED r600_shader_io& io) const
+{
+}
+
+ShaderInputSystemValue::ShaderInputSystemValue(tgsi_semantic name, int gpr):
+ ShaderInput(name),
+ m_gpr(gpr)
+{
+}
+
+void ShaderInputSystemValue::set_specific_ioinfo(r600_shader_io& io) const
+{
+ io.gpr = m_gpr;
+ io.ij_index = 0;
+}
+
+ShaderInputVarying::ShaderInputVarying(tgsi_semantic _name, int sid, nir_variable *input):
+ ShaderInput(_name),
+ m_driver_location(input->data.driver_location),
+ m_location_frac(input->data.location_frac),
+ m_sid(sid),
+ m_ij_index(-10),
+ m_mask((1 << input->type->components()) - 1)
+{
+ sfn_log << SfnLog::io << __func__
+ << "name:" << _name
+ << " sid: " << sid
+ << " op: " << input->data.interpolation;
+
+ evaluate_spi_sid();
+
+ enum glsl_base_type base_type =
+ glsl_get_base_type(glsl_without_array(input->type));
+
+ switch (input->data.interpolation) {
+ case INTERP_MODE_NONE:
+ if (glsl_base_type_is_integer(base_type)) {
+ m_interpolate = TGSI_INTERPOLATE_CONSTANT;
+ break;
+ }
+
+ if (name() == TGSI_SEMANTIC_COLOR) {
+ m_interpolate = TGSI_INTERPOLATE_COLOR;
+ m_ij_index = 0;
+ break;
+ }
+ /* fall-through */
+
+ case INTERP_MODE_SMOOTH:
+ assert(!glsl_base_type_is_integer(base_type));
+
+ m_interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
+ m_ij_index = 0;
+ break;
+
+ case INTERP_MODE_NOPERSPECTIVE:
+ assert(!glsl_base_type_is_integer(base_type));
+
+ m_interpolate = TGSI_INTERPOLATE_LINEAR;
+ m_ij_index = 3;
+ break;
+
+ case INTERP_MODE_FLAT:
+ m_interpolate = TGSI_INTERPOLATE_CONSTANT;
+ break;
+ }
+
+ if (input->data.sample) {
+ m_interpolate_loc = TGSI_INTERPOLATE_LOC_SAMPLE;
+ } else if (input->data.centroid) {
+ m_interpolate_loc = TGSI_INTERPOLATE_LOC_CENTROID;
+ m_ij_index += 2;
+ } else {
+ m_interpolate_loc = TGSI_INTERPOLATE_LOC_CENTER;
+ m_ij_index += 1;
+ }
+ sfn_log << SfnLog::io
+ << " -> IP:" << m_interpolate
+ << " IJ:" << m_ij_index
+ << "\n";
+}
+
+void ShaderInputVarying::update_mask(int additional_comps)
+{
+ m_mask |= additional_comps;
+}
+
+void ShaderInputVarying::evaluate_spi_sid()
+{
+ switch (name()) {
+ case TGSI_SEMANTIC_POSITION:
+ case TGSI_SEMANTIC_PSIZE:
+ case TGSI_SEMANTIC_EDGEFLAG:
+ case TGSI_SEMANTIC_FACE:
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ assert(0 && "System value used as varying");
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ m_spi_sid = m_sid + 1;
+ break;
+ default:
+ /* For non-generic params - pack name and sid into 8 bits */
+ m_spi_sid = (0x80 | (name() << 3) | m_sid) + 1;
+ }
+}
+
+ShaderInputVarying::ShaderInputVarying(tgsi_semantic name,
+ const ShaderInputVarying& orig, size_t location):
+ ShaderInput(name),
+ m_driver_location(location),
+ m_location_frac(orig.location_frac()),
+
+ m_sid(orig.m_sid),
+ m_spi_sid(orig.m_spi_sid),
+ m_interpolate(orig.m_interpolate),
+ m_interpolate_loc(orig.m_interpolate_loc),
+ m_ij_index(orig.m_ij_index),
+ m_lds_pos(0)
+{
+ evaluate_spi_sid();
+}
+
+bool ShaderInputVarying::interpolate() const
+{
+ return m_interpolate > 0;
+}
+
+int ShaderInputVarying::ij_index() const
+{
+ return m_ij_index;
+}
+
+void ShaderInputVarying::set_lds_pos(int lds_pos)
+{
+ m_lds_pos = lds_pos;
+}
+
+int ShaderInputVarying::lds_pos() const
+{
+ return m_lds_pos;
+}
+
+void ShaderInputVarying::set_specific_ioinfo(r600_shader_io& io) const
+{
+ io.interpolate = m_interpolate;
+ io.interpolate_location = m_interpolate_loc;
+ io.sid = m_sid;
+ io.spi_sid = m_spi_sid;
+ set_color_ioinfo(io);
+}
+
+void ShaderInputVarying::set_color_ioinfo(UNUSED r600_shader_io& io) const
+{
+ sfn_log << SfnLog::io << __func__ << " Don't set color_ioinfo\n";
+}
+
+ShaderInputColor::ShaderInputColor(tgsi_semantic name, int sid, nir_variable *input):
+ ShaderInputVarying(name, sid, input),
+ m_back_color_input_idx(0)
+{
+ sfn_log << SfnLog::io << __func__ << "name << " << name << " sid << " << sid << "\n";
+}
+
+void ShaderInputColor::set_back_color(unsigned back_color_input_idx)
+{
+ sfn_log << SfnLog::io << "Set back color index " << back_color_input_idx << "\n";
+ m_back_color_input_idx = back_color_input_idx;
+}
+
+void ShaderInputColor::set_color_ioinfo(r600_shader_io& io) const
+{
+ sfn_log << SfnLog::io << __func__ << " set color_ioinfo " << m_back_color_input_idx << "\n";
+ io.back_color_input = m_back_color_input_idx;
+}
+
+size_t ShaderIO::add_input(ShaderInput *input)
+{
+ m_inputs.push_back(PShaderInput(input));
+ return m_inputs.size() - 1;
+}
+
+PShaderInput ShaderIO::find_varying(tgsi_semantic name, int sid, int frac)
+{
+ for (auto& a : m_inputs) {
+ if (a->name() == name) {
+ ShaderInputVarying *v = dynamic_cast<ShaderInputVarying *>(a.get());
+ assert(v);
+ if (v->sid() == sid && (v->location_frac() == frac))
+ return a;
+ }
+ }
+ return nullptr;
+}
+
+struct VaryingShaderIOLess {
+ bool operator () (PShaderInput lhs, PShaderInput rhs) const
+ {
+ const ShaderInputVarying& l = static_cast<ShaderInputVarying&>(*lhs);
+ const ShaderInputVarying& r = static_cast<ShaderInputVarying&>(*rhs);
+ return l.location() > r.location();
+ }
+};
+
+void ShaderIO::sort_varying_inputs()
+{
+ priority_queue<PShaderInput, vector<PShaderInput>, VaryingShaderIOLess> q;
+
+ vector<int> idx;
+
+ for (auto i = 0u; i < m_inputs.size(); ++i) {
+ ShaderInputVarying *vi = dynamic_cast<ShaderInputVarying *>(m_inputs[i].get());
+ if (vi) {
+ q.push(m_inputs[i]);
+ idx.push_back(i);
+ }
+ }
+
+ auto next_index = idx.begin();
+ while (!q.empty()) {
+ auto si = q.top();
+ q.pop();
+ m_inputs[*next_index++] = si;
+ }
+}
+
+void ShaderIO::update_lds_pos()
+{
+ m_lds_pos = -1;
+ m_ldspos.resize(m_inputs.size());
+ for (auto& i : m_inputs) {
+ ShaderInputVarying *v = dynamic_cast<ShaderInputVarying *>(i.get());
+ if (!v)
+ continue;
+ /* There are shaders that miss an input ...*/
+ if (m_ldspos.size() <= static_cast<unsigned>(v->location()))
+ m_ldspos.resize(v->location() + 1);
+ }
+
+ std::fill(m_ldspos.begin(), m_ldspos.end(), -1);
+ for (auto& i : m_inputs) {
+ ShaderInputVarying *v = dynamic_cast<ShaderInputVarying *>(i.get());
+ if (!v)
+ continue;
+
+ if (m_ldspos[v->location()] < 0) {
+ ++m_lds_pos;
+ m_ldspos[v->location()] = m_lds_pos;
+ }
+ v->set_lds_pos(m_lds_pos);
+ }
+ ++m_lds_pos;
+}
+
+std::vector<PShaderInput> &ShaderIO::inputs()
+{
+ return m_inputs;
+}
+
+ShaderInput& ShaderIO::input(size_t k)
+{
+ assert(k < m_inputs.size());
+ return *m_inputs[k];
+}
+
+ShaderInput& ShaderIO::input(size_t driver_loc, int frac)
+{
+ for (auto& i: m_inputs) {
+ auto v = dynamic_cast<ShaderInputVarying*>(i.get());
+ if (v && v->location() == driver_loc && v->location_frac() == frac)
+ return *v;
+ }
+ return input(driver_loc);
+}
+
+void ShaderIO::set_two_sided()
+{
+ m_two_sided = true;
+}
+
+}
+
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_SHADERIO_H
+#define SFN_SHADERIO_H
+
+#include "compiler/nir/nir.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "gallium/drivers/r600/r600_shader.h"
+
+#include <vector>
+#include <memory>
+
+namespace r600 {
+
+class ShaderInput {
+public:
+ ShaderInput();
+ virtual ~ShaderInput();
+
+ ShaderInput(tgsi_semantic name);
+ tgsi_semantic name() const {return m_name;}
+
+ void set_gpr(int gpr) {m_gpr = gpr;}
+ int gpr() const {return m_gpr;}
+ void set_ioinfo(r600_shader_io& io, int translated_ij_index) const;
+
+ virtual void set_lds_pos(int lds_pos);
+ virtual int ij_index() const;
+ virtual bool interpolate() const;
+ virtual int lds_pos() const;
+ void set_uses_interpolate_at_centroid();
+
+private:
+ virtual void set_specific_ioinfo(r600_shader_io& io) const;
+
+ tgsi_semantic m_name;
+ int m_gpr;
+ bool m_uses_interpolate_at_centroid;
+};
+
+using PShaderInput = std::shared_ptr<ShaderInput>;
+
+class ShaderInputSystemValue: public ShaderInput {
+public:
+ ShaderInputSystemValue(tgsi_semantic name, int gpr);
+ void set_specific_ioinfo(r600_shader_io& io) const;
+ int m_gpr;
+};
+
+class ShaderInputVarying : public ShaderInput {
+public:
+ ShaderInputVarying(tgsi_semantic name, int sid, nir_variable *input);
+ ShaderInputVarying(tgsi_semantic name, const ShaderInputVarying& orig,
+ size_t location);
+
+ void set_lds_pos(int lds_pos) override;
+
+ int ij_index() const override;
+
+ bool interpolate() const override;
+
+ int lds_pos() const override;
+
+ int sid() const {return m_sid;}
+
+ void update_mask(int additional_comps);
+
+ size_t location() const {return m_driver_location;}
+ int location_frac() const {return m_location_frac;}
+
+private:
+ void evaluate_spi_sid();
+
+ virtual void set_color_ioinfo(r600_shader_io& io) const;
+ void set_specific_ioinfo(r600_shader_io& io) const override;
+ size_t m_driver_location;
+ int m_location_frac;
+ int m_sid;
+ int m_spi_sid;
+ tgsi_interpolate_mode m_interpolate;
+ tgsi_interpolate_loc m_interpolate_loc;
+ int m_ij_index;
+ int m_lds_pos;
+ int m_mask;
+};
+
+class ShaderInputColor: public ShaderInputVarying {
+public:
+ ShaderInputColor(tgsi_semantic name, int sid, nir_variable *input);
+ void set_back_color(unsigned back_color_input_idx);
+ unsigned back_color_input_index() const {
+ return m_back_color_input_idx;
+ }
+private:
+ void set_color_ioinfo(UNUSED r600_shader_io& io) const override;
+ unsigned m_back_color_input_idx;
+
+};
+
+class ShaderIO
+{
+public:
+ ShaderIO();
+
+ size_t add_input(ShaderInput *input);
+
+ std::vector<PShaderInput>& inputs();
+ ShaderInput& input(size_t k);
+
+ ShaderInput& input(size_t driver_loc, int frac);
+
+ void set_two_sided();
+ bool two_sided() {return m_two_sided;}
+
+ int nlds() const {
+ return m_lds_pos;
+ }
+
+ void sort_varying_inputs();
+
+ size_t size() const {return m_inputs.size();}
+
+ PShaderInput find_varying(tgsi_semantic name, int sid, int frac);
+
+ void update_lds_pos();
+
+private:
+ std::vector<PShaderInput> m_inputs;
+ std::vector<int> m_ldspos;
+ bool m_two_sided;
+ int m_lds_pos;
+
+};
+
+}
+
+#endif // SFN_SHADERIO_H
\ No newline at end of file
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_value.h"
+#include "util/macros.h"
+
+#include <iostream>
+#include <iomanip>
+#include <cassert>
+
+namespace r600 {
+
+using std::unique_ptr;
+using std::make_shared;
+
+const char *Value::component_names = "xyzw01?_!";
+
+Value::Value():
+ m_type(gpr),
+ m_chan(0)
+{
+}
+
+Value::Value(Type type, uint32_t chan):
+ m_type(type),
+ m_chan(chan)
+{
+
+}
+
+
+
+Value::Value(Type type):
+ Value(type, 0)
+{
+}
+
+Value::Type Value::type() const
+{
+ return m_type;
+}
+
+void Value::set_chan(uint32_t chan)
+{
+ m_chan = chan;
+}
+
+void Value::print(std::ostream& os) const
+{
+ do_print(os);
+}
+
+void Value::print(std::ostream& os, const PrintFlags& flags) const
+{
+ if (flags.flags & PrintFlags::has_neg) os << '-';
+ if (flags.flags & PrintFlags::has_abs) os << '|';
+ do_print(os, flags);
+ if (flags.flags & PrintFlags::has_abs) os << '|';
+}
+
+void Value::do_print(std::ostream& os, const PrintFlags& flags) const
+{
+ (void)flags;
+ do_print(os);
+}
+
+bool Value::operator < (const Value& lhs) const
+{
+ return sel() < lhs.sel() ||
+ (sel() == lhs.sel() && chan() < lhs.chan());
+}
+
+
+LiteralValue::LiteralValue(float value, uint32_t chan):
+ Value(Value::literal, chan)
+{
+ m_value.f=value;
+}
+
+
+LiteralValue::LiteralValue(uint32_t value, uint32_t chan):
+ Value(Value::literal, chan)
+{
+ m_value.u=value;
+}
+
+LiteralValue::LiteralValue(int value, uint32_t chan):
+ Value(Value::literal, chan)
+{
+ m_value.u=value;
+}
+
+uint32_t LiteralValue::sel() const
+{
+ return ALU_SRC_LITERAL;
+}
+
+uint32_t LiteralValue::value() const
+{
+ return m_value.u;
+}
+
+float LiteralValue::value_float() const
+{
+ return m_value.f;
+}
+
+void LiteralValue::do_print(std::ostream& os) const
+{
+ os << "[0x" << std::setbase(16) << m_value.u << " " << std::setbase(10)
+ << m_value.f << "].";
+ os << component_names[chan()];
+}
+
+void LiteralValue::do_print(std::ostream& os, UNUSED const PrintFlags& flags) const
+{
+ os << "[0x" << std::setbase(16) << m_value.u << " "
+ << std::setbase(10);
+
+ os << m_value.f << "f";
+
+ os<< "]";
+}
+
+bool LiteralValue::is_equal_to(const Value& other) const
+{
+ assert(other.type() == Value::Type::literal);
+ const auto& rhs = static_cast<const LiteralValue&>(other);
+ return (sel() == rhs.sel() &&
+ value() == rhs.value());
+}
+
+SpecialValue::SpecialValue(Type type, int value, int chan):
+ Value(type, chan),
+ m_value(static_cast<AluInlineConstants>(value))
+{
+}
+
+uint32_t SpecialValue::sel() const
+{
+ return m_value;
+}
+
+
+void SpecialValue::do_print(std::ostream& os) const
+{
+ auto sv_info = alu_src_const.find(m_value);
+ if (sv_info != alu_src_const.end()) {
+ os << sv_info->second.descr;
+ if (sv_info->second.use_chan)
+ os << '.' << component_names[chan()];
+ else if (chan() > 0)
+ os << "." << component_names[chan()]
+ << " (W: Channel ignored)";
+ } else {
+ if (m_value >= ALU_SRC_PARAM_BASE && m_value < ALU_SRC_PARAM_BASE + 32)
+ os << " Param" << m_value - ALU_SRC_PARAM_BASE;
+ else
+ os << " E: unknown inline constant " << m_value;
+ }
+}
+
+PValue Value::zero(new InlineConstValue(ALU_SRC_0, 0));
+PValue Value::one_f(new InlineConstValue(ALU_SRC_1, 0));
+PValue Value::one_i(new InlineConstValue(ALU_SRC_1_INT, 0));
+PValue Value::zero_dot_5(new InlineConstValue(ALU_SRC_0_5, 0));
+
+InlineConstValue::InlineConstValue(int value, int chan):
+ SpecialValue(Value::cinline, value, chan)
+{
+}
+
+bool InlineConstValue::is_equal_to(const Value& other) const
+{
+ assert(other.type() == Value::Type::cinline);
+ const auto& rhs = static_cast<const InlineConstValue&>(other);
+ return sel() == rhs.sel();
+}
+
+UniformValue::UniformValue(uint32_t sel, uint32_t chan, uint32_t kcache_bank):
+ Value(Value::kconst, chan)
+{
+ if (sel < 512) {
+ m_index = sel & 0x1f;
+ m_kcache_bank = ((sel >> 5) & 1) | ((sel >> 7) & 2);
+ } else {
+ m_index = sel;
+ m_kcache_bank = kcache_bank;
+ }
+}
+
+UniformValue::UniformValue(uint32_t sel, uint32_t chan, PValue addr):
+ Value(Value::kconst, chan),
+ m_index(sel),
+ m_kcache_bank(0),
+ m_addr(addr)
+{
+
+}
+
+uint32_t UniformValue::sel() const
+{
+ const int bank_base[4] = {128, 160, 256, 288};
+ return m_index < 512 ? m_index + bank_base[m_kcache_bank] : m_index;
+}
+
+uint32_t UniformValue::kcache_bank() const
+{
+ return m_kcache_bank;
+}
+
+bool UniformValue::is_equal_to(const Value& other) const
+{
+ const UniformValue& o = static_cast<const UniformValue&>(other);
+ return sel() == o.sel() &&
+ m_kcache_bank == o.kcache_bank();
+}
+
+void UniformValue::do_print(std::ostream& os) const
+{
+ if (m_index < 512)
+ os << "KC" << m_kcache_bank << "[" << m_index;
+ else
+ os << "KCX[" << m_index;
+ os << "]." << component_names[chan()];
+}
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_VALUE_H
+#define SFN_VALUE_H
+
+#include "sfn_alu_defines.h"
+#include "nir.h"
+
+#include <memory>
+#include <set>
+#include <bitset>
+#include <iostream>
+
+namespace r600 {
+
+class Value {
+public:
+ using Pointer=std::shared_ptr<Value>;
+
+ struct PrintFlags {
+ PrintFlags():index_mode(0),
+ flags(0)
+ {
+ }
+ PrintFlags(int im, int f):index_mode(im),
+ flags(f)
+ {
+ }
+ int index_mode;
+ int flags;
+ static const int is_rel = 1;
+ static const int has_abs = 2;
+ static const int has_neg = 4;
+ static const int literal_is_float = 8;
+ static const int index_ar = 16;
+ static const int index_loopidx = 32;
+ };
+
+ enum Type {
+ gpr,
+ kconst,
+ literal,
+ cinline,
+ lds_direct,
+ gpr_vector,
+ gpr_array_value,
+ unknown
+ };
+
+ static const char *component_names;
+
+ using LiteralFlags=std::bitset<4>;
+
+ Value();
+
+ Value(Type type);
+
+ virtual ~Value(){}
+
+ Type type() const;
+ virtual uint32_t sel() const = 0;
+ uint32_t chan() const {return m_chan;}
+
+ void set_chan(uint32_t chan);
+ void print(std::ostream& os, const PrintFlags& flags) const;
+
+ void print(std::ostream& os) const;
+
+ bool operator < (const Value& lhs) const;
+
+ static Value::Pointer zero;
+ static Value::Pointer one_f;
+ static Value::Pointer zero_dot_5;
+ static Value::Pointer one_i;
+
+protected:
+ Value(Type type, uint32_t chan);
+
+private:
+ virtual void do_print(std::ostream& os) const = 0;
+ virtual void do_print(std::ostream& os, const PrintFlags& flags) const;
+
+ virtual bool is_equal_to(const Value& other) const = 0;
+
+ Type m_type;
+ uint32_t m_chan;
+
+ friend bool operator == (const Value& lhs, const Value& rhs);
+};
+
+
+inline std::ostream& operator << (std::ostream& os, const Value& v)
+{
+ v.print(os);
+ return os;
+}
+
+
+inline bool operator == (const Value& lhs, const Value& rhs)
+{
+ if (lhs.type() == rhs.type())
+ return lhs.is_equal_to(rhs);
+ return false;
+}
+
+inline bool operator != (const Value& lhs, const Value& rhs)
+{
+ return !(lhs == rhs);
+}
+
+using PValue=Value::Pointer;
+
+struct value_less {
+ inline bool operator () (PValue lhs, PValue rhs) const {
+ return *lhs < *rhs;
+ }
+};
+
+using ValueSet = std::set<PValue, value_less>;
+
+
+class LiteralValue: public Value {
+public:
+ LiteralValue(float value, uint32_t chan= 0);
+ LiteralValue(uint32_t value, uint32_t chan= 0);
+ LiteralValue(int value, uint32_t chan= 0);
+ uint32_t sel() const override final;
+ uint32_t value() const;
+ float value_float() const;
+private:
+ void do_print(std::ostream& os) const override;
+ void do_print(std::ostream& os, const PrintFlags& flags) const override;
+ bool is_equal_to(const Value& other) const override;
+ union {
+ uint32_t u;
+ float f;
+ } m_value;
+};
+
+class SpecialValue: public Value {
+protected:
+ SpecialValue(Type type, int value, int chan);
+ uint32_t sel() const override final;
+private:
+ void do_print(std::ostream& os) const override;
+ AluInlineConstants m_value;
+};
+
+class InlineConstValue: public SpecialValue {
+public:
+ InlineConstValue(int value, int chan);
+ bool is_equal_to(const Value& other) const override;
+
+private:
+ AluInlineConstants m_value;
+};
+
+class UniformValue: public Value {
+public:
+ UniformValue(uint32_t sel, uint32_t chan, uint32_t kcache_bank = 0);
+ UniformValue(uint32_t sel, uint32_t chan, PValue addr);
+ uint32_t sel() const override;
+ uint32_t kcache_bank() const;
+private:
+ void do_print(std::ostream& os) const override;
+ bool is_equal_to(const Value& other) const override;
+
+ uint32_t m_index;
+ uint32_t m_kcache_bank;
+ PValue m_addr;
+};
+
+} // end ns r600
+
+#endif
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_value_gpr.h"
+#include "sfn_valuepool.h"
+#include "sfn_debug.h"
+
+namespace r600 {
+
+using std::vector;
+using std::array;
+
+GPRValue::GPRValue(uint32_t sel, uint32_t chan, int base_offset):
+ Value(Value::gpr, chan),
+ m_sel(sel),
+ m_base_offset(base_offset),
+ m_input(false)
+{
+}
+
+GPRValue::GPRValue(uint32_t sel, uint32_t chan):
+ Value(Value::gpr, chan),
+ m_sel(sel),
+ m_base_offset(0),
+ m_input(false)
+{
+}
+
+uint32_t GPRValue::sel() const
+{
+ return m_sel;
+}
+
+void GPRValue::do_print(std::ostream& os) const
+{
+ os << 'R';
+ os << m_sel;
+ os << '.' << component_names[chan()];
+}
+
+bool GPRValue::is_equal_to(const Value& other) const
+{
+ assert(other.type() == Value::Type::gpr);
+ const auto& rhs = static_cast<const GPRValue&>(other);
+ return (sel() == rhs.sel() &&
+ chan() == rhs.chan());
+}
+
+void GPRValue::do_print(std::ostream& os, UNUSED const PrintFlags& flags) const
+{
+ os << 'R';
+ os << m_sel;
+ os << '.' << component_names[chan()];
+}
+
+GPRVector::GPRVector(const GPRVector& orig):
+ Value(gpr_vector),
+ m_elms(orig.m_elms),
+ m_valid(orig.m_valid)
+{
+}
+
+GPRVector::GPRVector(std::array<PValue,4> elms):
+ Value(gpr_vector),
+ m_elms(elms),
+ m_valid(false)
+{
+ for (unsigned i = 0; i < 4; ++i)
+ if (!m_elms[i] || (m_elms[i]->type() != Value::gpr)) {
+ assert(0 && "GPR vector not valid because element missing or nit a GPR");
+ return;
+ }
+ unsigned sel = m_elms[0]->sel();
+ for (unsigned i = 1; i < 4; ++i)
+ if (m_elms[i]->sel() != sel) {
+ assert(0 && "GPR vector not valid because sel is not equal for all elements");
+ return;
+ }
+ m_valid = true;
+}
+
+GPRVector::GPRVector(uint32_t sel, std::array<uint32_t,4> swizzle):
+ Value (gpr_vector),
+ m_valid(true)
+{
+ for (int i = 0; i < 4; ++i)
+ m_elms[i] = PValue(new GPRValue(sel, swizzle[i]));
+}
+
+GPRVector::GPRVector(const GPRVector& orig, const std::array<uint8_t,4>& swizzle)
+{
+ for (int i = 0; i < 4; ++i)
+ m_elms[i] = orig.reg_i(swizzle[i]);
+ m_valid = orig.m_valid;
+}
+
+void GPRVector::validate() const
+{
+ assert(m_elms[0]);
+ uint32_t sel = m_elms[0]->sel();
+ if (sel >= 124)
+ return;
+
+ for (unsigned i = 1; i < 4; ++i) {
+ assert(m_elms[i]);
+ if (sel != m_elms[i]->sel())
+ return;
+ }
+
+ m_valid = true;
+}
+
+uint32_t GPRVector::sel() const
+{
+ validate();
+ assert(m_valid);
+ return m_elms[0] ? m_elms[0]->sel() : 999;
+}
+
+void GPRVector::set_reg_i(int i, PValue reg)
+{
+ m_elms[i] = reg;
+}
+
+void GPRVector::do_print(std::ostream& os) const
+{
+ os << "R" << sel() << ".";
+ for (int i = 0; i < 4; ++i)
+ os << (m_elms[i] ? component_names[m_elms[i]->chan() < 8 ? m_elms[i]->chan() : 8] : '?');
+}
+
+void GPRVector::swizzle(const Swizzle& swz)
+{
+ Values v(m_elms);
+ for (uint32_t i = 0; i < 4; ++i)
+ if (i != swz[i]) {
+ assert(swz[i] < 4);
+ m_elms[i] = v[swz[i]];
+ }
+}
+
+bool GPRVector::is_equal_to(const Value& other) const
+{
+ if (other.type() != gpr_vector) {
+ std::cerr << "t";
+ return false;
+ }
+
+ const GPRVector& o = static_cast<const GPRVector&>(other);
+
+ for (int i = 0; i < 4; ++i) {
+ if (*m_elms[i] != *o.m_elms[i]) {
+ std::cerr << "elm" << i;
+ return false;
+ }
+ }
+ return true;
+}
+
+
+GPRArrayValue::GPRArrayValue(PValue value, PValue addr, GPRArray *array):
+ Value(gpr_array_value, value->chan()),
+ m_value(value),
+ m_addr(addr),
+ m_array(array)
+{
+}
+
+GPRArrayValue::GPRArrayValue(PValue value, GPRArray *array):
+ Value(gpr_array_value, value->chan()),
+ m_value(value),
+ m_array(array)
+{
+}
+
+static const char *swz_char = "xyzw01_";
+
+void GPRArrayValue::do_print(std::ostream& os) const
+{
+ assert(m_array);
+ os << "R" << m_value->sel();
+ if (m_addr) {
+ os << "[" << *m_addr << "] ";
+ }
+ os << swz_char[m_value->chan()];
+
+ os << "(" << *m_array << ")";
+}
+
+bool GPRArrayValue::is_equal_to(const Value& other) const
+{
+ const GPRArrayValue& v = static_cast<const GPRArrayValue&>(other);
+
+ return *m_value == *v.m_value &&
+ *m_array == *v.m_array;
+}
+
+void GPRArrayValue::reset_value(PValue new_value)
+{
+ m_value = new_value;
+}
+
+void GPRArrayValue::reset_addr(PValue new_addr)
+{
+ m_addr = new_addr;
+}
+
+
+GPRArray::GPRArray(int base, int size, int mask, int frac):
+ Value (gpr_vector),
+ m_base_index(base),
+ m_component_mask(mask),
+ m_frac(frac)
+{
+ m_values.resize(size);
+ for (int i = 0; i < size; ++i) {
+ for (int j = 0; j < 4; ++j) {
+ if (mask & (1 << j))
+ m_values[i].set_reg_i(j, PValue(new GPRValue(base + i, j)));
+ }
+ }
+}
+
+uint32_t GPRArray::sel() const
+{
+ return m_base_index;
+}
+
+static const char *compchar = "xyzw";
+void GPRArray::do_print(std::ostream& os) const
+{
+ os << "ARRAY[R" << sel() << "..R" << sel() + m_values.size() - 1 << "].";
+ for (int j = 0; j < 4; ++j) {
+ if (m_component_mask & (1 << j))
+ os << compchar[j];
+ }
+}
+
+bool GPRArray::is_equal_to(const Value& other) const
+{
+ const GPRArray& o = dynamic_cast<const GPRArray&>(other);
+ return o.sel() == sel() &&
+ o.m_values.size() == m_values.size() &&
+ o.m_component_mask == m_component_mask;
+}
+
+uint32_t GPRArrayValue::sel() const
+{
+ return m_value->sel();
+}
+
+PValue GPRArray::get_indirect(unsigned index, PValue indirect, unsigned component)
+{
+ assert(index < m_values.size());
+ assert(m_component_mask & (1 << (component + m_frac)));
+
+ sfn_log << SfnLog::reg << "Create indirect register from " << *this;
+
+ PValue v = m_values[index].reg_i(component + m_frac);
+ assert(v);
+
+ sfn_log << SfnLog::reg << " -> " << *v;
+
+ if (indirect) {
+ sfn_log << SfnLog::reg << "[" << *indirect << "]";
+ switch (indirect->type()) {
+ case Value::literal: {
+ const LiteralValue& lv = static_cast<const LiteralValue&>(*indirect);
+ v = m_values[lv.value()].reg_i(component + m_frac);
+ break;
+ }
+ case Value::gpr: {
+ v = PValue(new GPRArrayValue(v, indirect, this));
+ sfn_log << SfnLog::reg << "(" << *v << ")";
+ break;
+ }
+ default:
+ assert(0 && !"Indirect addressing must be literal value or GPR");
+ }
+ }
+ sfn_log << SfnLog::reg <<" -> " << *v << "\n";
+ return v;
+}
+
+void GPRArray::collect_registers(ValueMap& output) const
+{
+ for (auto& v: m_values) {
+ for (int i = 0; i < 4; ++i) {
+ auto vv = v.reg_i(i);
+ if (vv)
+ output.insert(vv);
+ }
+ }
+}
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_GPRARRAY_H
+#define SFN_GPRARRAY_H
+
+#include "sfn_value.h"
+#include <vector>
+
+namespace r600 {
+
+class ValuePool;
+class ValueMap;
+
+class GPRValue : public Value {
+public:
+ GPRValue() = default;
+ GPRValue(GPRValue&& orig) = default;
+ GPRValue(const GPRValue& orig) = default;
+
+ GPRValue(uint32_t sel, uint32_t chan, int base_offset);
+
+ GPRValue(uint32_t sel, uint32_t chan);
+
+ GPRValue& operator = (const GPRValue& orig) = default;
+ GPRValue& operator = (GPRValue&& orig) = default;
+
+ uint32_t sel() const override final;
+
+ void set_as_input(){ m_input = true; }
+ bool is_input() const {return m_input; }
+ void set_pin_to_channel() { m_pin_to_channel = true;}
+ bool pin_to_channel() const { return m_pin_to_channel;}
+
+private:
+ void do_print(std::ostream& os) const override;
+ void do_print(std::ostream& os, const PrintFlags& flags) const override;
+ bool is_equal_to(const Value& other) const override;
+ uint32_t m_sel;
+ bool m_base_offset;
+ bool m_input;
+ bool m_pin_to_channel;
+};
+
+class GPRVector : public Value {
+public:
+ using Swizzle = std::array<uint32_t,4>;
+ using Values = std::array<PValue,4>;
+ GPRVector() = default;
+ GPRVector(GPRVector&& orig) = default;
+ GPRVector(const GPRVector& orig);
+
+ GPRVector(const GPRVector& orig, const std::array<uint8_t, 4>& swizzle);
+ GPRVector(std::array<PValue,4> elms);
+ GPRVector(uint32_t sel, std::array<uint32_t,4> swizzle);
+
+ GPRVector& operator = (const GPRVector& orig) = default;
+ GPRVector& operator = (GPRVector&& orig) = default;
+
+ void swizzle(const Swizzle& swz);
+
+ uint32_t sel() const override final;
+
+ void set_reg_i(int i, PValue reg);
+
+ unsigned chan_i(int i) const {return m_elms[i]->chan();}
+ PValue reg_i(int i) const {return m_elms[i];}
+ PValue operator [] (int i) const {return m_elms[i];}
+ PValue& operator [] (int i) {return m_elms[i];}
+
+
+ PValue x() const {return m_elms[0];}
+ PValue y() const {return m_elms[1];}
+ PValue z() const {return m_elms[2];}
+ PValue w() const {return m_elms[3];}
+
+
+private:
+ void do_print(std::ostream& os) const override;
+ bool is_equal_to(const Value& other) const override;
+ void validate() const;
+
+ Values m_elms;
+ mutable bool m_valid;
+};
+
+
+class GPRArray : public Value
+{
+public:
+ using Pointer = std::shared_ptr<GPRArray>;
+
+ GPRArray(int base, int size, int comp_mask, int frac);
+
+ uint32_t sel() const override;
+
+ size_t size() const {return m_values.size();}
+
+ PValue get_indirect(unsigned index, PValue indirect, unsigned component);
+
+ void collect_registers(ValueMap& output) const;
+
+private:
+ void do_print(std::ostream& os) const override;
+
+ bool is_equal_to(const Value& other) const override;
+
+ int m_base_index;
+ int m_component_mask;
+ int m_frac;
+
+ std::vector<GPRVector> m_values;
+};
+
+using PGPRArray = GPRArray::Pointer;
+
+class GPRArrayValue :public Value {
+public:
+ GPRArrayValue(PValue value, GPRArray *array);
+ GPRArrayValue(PValue value, PValue index, GPRArray *array);
+
+ size_t array_size() const;
+ uint32_t sel() const override;
+
+ PValue value() {return m_value;}
+
+ void reset_value(PValue new_value);
+ void reset_addr(PValue new_addr);
+
+ Value::Pointer indirect() const {return m_addr;}
+
+private:
+
+ void do_print(std::ostream& os) const override;
+
+ bool is_equal_to(const Value& other) const override;
+
+ PValue m_value;
+ PValue m_addr;
+ GPRArray *m_array;
+};
+
+inline size_t GPRArrayValue::array_size() const
+{
+ return m_array->size();
+}
+
+inline GPRVector::Swizzle swizzle_from_mask(unsigned ncomp)
+{
+ GPRVector::Swizzle swz = {0,1,2,3};
+ for (int i = ncomp; i < 4; ++i)
+ swz[i] = 7;
+ return swz;
+}
+
+
+}
+
+#endif // SFN_GPRARRAY_H
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_debug.h"
+#include "sfn_value_gpr.h"
+#include "sfn_valuepool.h"
+
+#include <iostream>
+#include <queue>
+
+namespace r600 {
+
+using std::vector;
+using std::pair;
+using std::make_pair;
+using std::queue;
+
+ValuePool::ValuePool():
+ m_next_register_index(0),
+ current_temp_reg_index(0),
+ next_temp_reg_comp(4)
+{
+}
+
+PValue ValuePool::m_undef = Value::zero;
+
+GPRVector ValuePool::vec_from_nir(const nir_dest& dst, int num_components)
+{
+ std::array<PValue, 4> result;
+ for (int i = 0; i < 4; ++i)
+ result[i] = from_nir(dst, i < num_components ? i : 7);
+ return GPRVector(result);
+
+}
+
+PValue ValuePool::from_nir(const nir_src& v, unsigned component, unsigned swizzled)
+{
+ sfn_log << SfnLog::reg << "Search " << (v.is_ssa ? "ssa_reg " : "reg ")
+ << (v.is_ssa ? v.ssa->index : v.reg.reg->index);
+
+ if (!v.is_ssa) {
+ int idx = lookup_register_index(v);
+ sfn_log << SfnLog::reg << " -> got index " << idx << "\n";
+ if (idx >= 0) {
+ auto reg = lookup_register(idx, swizzled, false);
+ if (reg) {
+ if (reg->type() == Value::gpr_vector) {
+ auto& array = dynamic_cast<GPRArray&>(*reg);
+ reg = array.get_indirect(v.reg.base_offset,
+ v.reg.indirect ?
+ from_nir(*v.reg.indirect, 0, 0) : nullptr,
+ component);
+ }
+ return reg;
+ }
+ }
+ assert(0 && "local registers should always be found");
+ }
+
+ unsigned index = v.ssa->index;
+ /* For undefs we use zero and let ()yet to be implemeneted dce deal with it */
+ if (m_ssa_undef.find(index) != m_ssa_undef.end())
+ return Value::zero;
+
+
+ int idx = lookup_register_index(v);
+ sfn_log << SfnLog::reg << " -> got index " << idx << "\n";
+ if (idx >= 0) {
+ auto reg = lookup_register(idx, swizzled, false);
+ if (reg)
+ return reg;
+ }
+
+
+ auto literal_val = m_literal_constants.find(index);
+ if (literal_val != m_literal_constants.end()) {
+ switch (literal_val->second->def.bit_size) {
+ case 1:
+ return PValue(new LiteralValue(literal_val->second->value[swizzled].b ? 0xffffffff : 0, component));
+ case 32:
+ return literal(literal_val->second->value[swizzled].u32);
+ default:
+ sfn_log << SfnLog::reg << "Unsupported bit size " << literal_val->second->def.bit_size
+ << " fall back to 32\n";
+ return PValue(new LiteralValue(literal_val->second->value[swizzled].u32, component));
+ }
+ }
+
+ unsigned uindex = (index << 2) + swizzled;
+ auto u = m_uniforms.find(uindex);
+ if (u != m_uniforms.end())
+ return u->second;
+
+ return PValue();
+}
+
+PValue ValuePool::from_nir(const nir_src& v, unsigned component)
+{
+ return from_nir(v, component, component);
+}
+
+PValue ValuePool::from_nir(const nir_tex_src &v, unsigned component)
+{
+ return from_nir(v.src, component, component);
+}
+
+PValue ValuePool::from_nir(const nir_alu_src &v, unsigned component)
+{
+ return from_nir(v.src, component, v.swizzle[component]);
+}
+
+PValue ValuePool::get_temp_register()
+{
+ if (next_temp_reg_comp > 3) {
+ current_temp_reg_index = allocate_temp_register();
+ next_temp_reg_comp = 0;
+ }
+ return PValue(new GPRValue(current_temp_reg_index, next_temp_reg_comp++));
+}
+
+GPRVector ValuePool::get_temp_vec4()
+{
+ int sel = allocate_temp_register();
+ return GPRVector(sel, {0,1,2,3});
+}
+
+PValue ValuePool::create_register_from_nir_src(const nir_src& src, int comp)
+{
+ int idx = src.is_ssa ? get_dst_ssa_register_index(*src.ssa):
+ get_local_register_index(*src.reg.reg);
+
+ auto retval = lookup_register(idx, comp, false);
+ if (!retval)
+ retval = create_register(idx, comp);
+ return retval;
+}
+
+PValue ValuePool::from_nir(const nir_alu_dest &v, unsigned component)
+{
+ //assert(v->write_mask & (1 << component));
+ return from_nir(v.dest, component);
+}
+
+int ValuePool::lookup_register_index(const nir_dest& dst)
+{
+ return dst.is_ssa ? get_dst_ssa_register_index(dst.ssa):
+ get_local_register_index(*dst.reg.reg);
+}
+
+int ValuePool::lookup_register_index(const nir_src& src) const
+{
+ int index = 0;
+
+ index = src.is_ssa ?
+ get_ssa_register_index(*src.ssa) :
+ get_local_register_index(*src.reg.reg);
+
+ sfn_log << SfnLog::reg << " LIDX:" << index;
+
+ auto r = m_register_map.find(index);
+ if (r == m_register_map.end()) {
+ return -1;
+ }
+ return static_cast<int>(r->second.index);
+}
+
+
+int ValuePool::allocate_component(unsigned index, unsigned comp, bool pre_alloc)
+{
+ assert(comp < 8);
+ return allocate_with_mask(index, 1 << comp, pre_alloc);
+}
+
+int ValuePool::allocate_temp_register()
+{
+ return m_next_register_index++;
+}
+
+
+PValue ValuePool::from_nir(const nir_dest& v, unsigned component)
+{
+ int idx = lookup_register_index(v);
+ sfn_log << SfnLog::reg << __func__ << ": ";
+ if (v.is_ssa)
+ sfn_log << "ssa_" << v.ssa.index;
+ else
+ sfn_log << "r" << v.reg.reg->index;
+ sfn_log << " -> " << idx << "\n";
+
+ auto retval = lookup_register(idx, component, false);
+ if (!retval)
+ retval = create_register(idx, component);
+
+ if (retval->type() == Value::gpr_vector) {
+ assert(!v.is_ssa);
+ auto& array = dynamic_cast<GPRArray&>(*retval);
+ retval = array.get_indirect(v.reg.base_offset,
+ v.reg.indirect ?
+ from_nir(*v.reg.indirect, 0, 0) : nullptr,
+ component);
+ }
+
+ return retval;
+}
+
+ValueMap ValuePool::get_temp_registers() const
+{
+ ValueMap result;
+
+ for (auto& v : m_registers) {
+ if (v.second->type() == Value::gpr)
+ result.insert(v.second);
+ else if (v.second->type() == Value::gpr_vector) {
+ auto& array = dynamic_cast<GPRArray&>(*v.second);
+ array.collect_registers(result);
+ }
+ }
+ return result;
+}
+
+static const char swz[] = "xyzw01?_";
+
+PValue ValuePool::create_register(unsigned sel, unsigned swizzle)
+{
+ sfn_log << SfnLog::reg
+ <<"Create register " << sel << '.' << swz[swizzle] << "\n";
+ auto retval = PValue(new GPRValue(sel, swizzle));
+ m_registers[(sel << 3) + swizzle] = retval;
+ return retval;
+}
+
+bool ValuePool::inject_register(unsigned sel, unsigned swizzle,
+ const PValue& reg, bool map)
+{
+ uint32_t ssa_index = sel;
+
+ if (map) {
+ auto pos = m_ssa_register_map.find(sel);
+ if (pos == m_ssa_register_map.end())
+ ssa_index = m_next_register_index++;
+ else
+ ssa_index = pos->second;
+ }
+
+ sfn_log << SfnLog::reg
+ << "Inject register " << sel << '.' << swz[swizzle]
+ << " at index " << ssa_index << " ...";
+
+ if (map)
+ m_ssa_register_map[sel] = ssa_index;
+
+ allocate_with_mask(ssa_index, swizzle, true);
+
+ unsigned idx = (ssa_index << 3) + swizzle;
+ auto p = m_registers.find(idx);
+ if ( (p != m_registers.end()) && *p->second != *reg) {
+ std::cerr << "Register location (" << ssa_index << ", " << swizzle << ") was already reserved\n";
+ assert(0);
+ return false;
+ }
+ sfn_log << SfnLog::reg << " at idx:" << idx << " to " << *reg << "\n";
+ m_registers[idx] = reg;
+
+ if (m_next_register_index <= ssa_index)
+ m_next_register_index = ssa_index + 1;
+ return true;
+}
+
+
+PValue ValuePool::lookup_register(unsigned sel, unsigned swizzle,
+ bool required)
+{
+
+ PValue retval;
+ sfn_log << SfnLog::reg
+ << "lookup register " << sel << '.' << swz[swizzle] << "("
+ << ((sel << 3) + swizzle) << ")...";
+
+
+ auto reg = m_registers.find((sel << 3) + swizzle);
+ if (reg != m_registers.end()) {
+ sfn_log << SfnLog::reg << " -> Found " << *reg->second << "\n";
+ retval = reg->second;
+ } else if (swizzle == 7) {
+ PValue retval = create_register(sel, swizzle);
+ sfn_log << SfnLog::reg << " -> Created " << *retval << "\n";
+ } else if (required) {
+ sfn_log << SfnLog::reg << "Register (" << sel << ", "
+ << swizzle << ") not found but required\n";
+ assert(0 && "Unallocated register value requested\n");
+ }
+ sfn_log << SfnLog::reg << " -> Not required and not allocated\n";
+ return retval;
+}
+
+unsigned ValuePool::get_dst_ssa_register_index(const nir_ssa_def& ssa)
+{
+ sfn_log << SfnLog::reg << __func__ << ": search dst ssa "
+ << ssa.index;
+
+ auto pos = m_ssa_register_map.find(ssa.index);
+ if (pos == m_ssa_register_map.end()) {
+ sfn_log << SfnLog::reg << " Need to allocate ...";
+ allocate_ssa_register(ssa);
+ pos = m_ssa_register_map.find(ssa.index);
+ assert(pos != m_ssa_register_map.end());
+ }
+ sfn_log << SfnLog::reg << "... got " << pos->second << "\n";
+ return pos->second;
+}
+
+unsigned ValuePool::get_ssa_register_index(const nir_ssa_def& ssa) const
+{
+ sfn_log << SfnLog::reg << __func__ << ": search ssa "
+ << ssa.index;
+
+ auto pos = m_ssa_register_map.find(ssa.index);
+ sfn_log << SfnLog::reg << " got " << pos->second<< "\n";
+ if (pos == m_ssa_register_map.end()) {
+ sfn_log << SfnLog::reg << __func__ << ": ssa register "
+ << ssa.index << " lookup failed\n";
+ return -1;
+ }
+ return pos->second;
+}
+
+unsigned ValuePool::get_local_register_index(const nir_register& reg)
+{
+ auto pos = m_local_register_map.find(reg.index);
+ if (pos == m_local_register_map.end()) {
+ allocate_local_register(reg);
+ pos = m_local_register_map.find(reg.index);
+ assert(pos != m_local_register_map.end());
+ }
+ return pos->second;
+}
+
+unsigned ValuePool::get_local_register_index(const nir_register& reg) const
+{
+ auto pos = m_local_register_map.find(reg.index);
+ if (pos == m_local_register_map.end()) {
+ sfn_log << SfnLog::err << __func__ << ": local register "
+ << reg.index << " lookup failed";
+ return -1;
+ }
+ return pos->second;
+}
+
+void ValuePool::allocate_ssa_register(const nir_ssa_def& ssa)
+{
+ sfn_log << SfnLog::reg << "ValuePool: Allocate ssa register " << ssa.index
+ << " as " << m_next_register_index << "\n";
+ int index = m_next_register_index++;
+ m_ssa_register_map[ssa.index] = index;
+ allocate_with_mask(index, 0xf, true);
+}
+
+void ValuePool::allocate_arrays(array_list& arrays)
+{
+ int ncomponents = 0;
+ int current_index = m_next_register_index;
+ unsigned instance = 0;
+
+ while (!arrays.empty()) {
+ auto a = arrays.top();
+ arrays.pop();
+
+ /* This is a bit hackish, return an id that encodes the array merge. To make sure
+ * that the mapping doesn't go wrong we have to make sure the arrays is longer than
+ * the number of instances in this arrays slot */
+ if (a.ncomponents + ncomponents > 4 ||
+ a.length < instance) {
+ current_index = m_next_register_index;
+ ncomponents = 0;
+ instance = 0;
+ }
+
+ if (ncomponents == 0)
+ m_next_register_index += a.length;
+
+ uint32_t mask = ((1 << a.ncomponents) - 1) << ncomponents;
+
+ PValue array = PValue(new GPRArray(current_index, a.length, mask, ncomponents));
+
+ sfn_log << SfnLog::reg << "Add array at "<< current_index
+ << " of size " << a.length << " with " << a.ncomponents
+ << " components, mask " << mask << "\n";
+
+ m_local_register_map[a.index] = current_index + instance;
+
+ for (unsigned i = 0; i < a.ncomponents; ++i)
+ m_registers[((current_index + instance) << 3) + i] = array;
+
+ VRec next_reg = {current_index + instance, mask, mask};
+ m_register_map[current_index + instance] = next_reg;
+
+ ncomponents += a.ncomponents;
+ ++instance;
+ }
+}
+
+void ValuePool::allocate_local_register(const nir_register& reg)
+{
+ int index = m_next_register_index++;
+ m_local_register_map[reg.index] = index;
+ allocate_with_mask(index, 0xf, true);
+
+ /* Create actual register and map it */;
+ for (int i = 0; i < 4; ++i) {
+ int k = (index << 3) + i;
+ m_registers[k] = PValue(new GPRValue(index, i));
+ }
+}
+
+void ValuePool::allocate_local_register(const nir_register& reg, array_list& arrays)
+{
+ sfn_log << SfnLog::reg << "ValuePool: Allocate local register " << reg.index
+ << " as " << m_next_register_index << "\n";
+
+ if (reg.num_array_elems) {
+ array_entry ae = {reg.index, reg.num_array_elems, reg.num_components};
+ arrays.push(ae);
+ }
+ else
+ allocate_local_register(reg);
+}
+
+bool ValuePool::create_undef(nir_ssa_undef_instr* instr)
+{
+ m_ssa_undef.insert(instr->def.index);
+ return true;
+}
+
+bool ValuePool::set_literal_constant(nir_load_const_instr* instr)
+{
+ sfn_log << SfnLog::reg << "Add literal " << instr->def.index << "\n";
+ m_literal_constants[instr->def.index] = instr;
+ return true;
+}
+
+const nir_load_const_instr* ValuePool::get_literal_constant(int index)
+{
+ sfn_log << SfnLog::reg << "Try to locate literal " << index << "...";
+ auto literal = m_literal_constants.find(index);
+ if (literal == m_literal_constants.end()) {
+ sfn_log << SfnLog::reg << " not found\n";
+ return nullptr;
+ }
+ sfn_log << SfnLog::reg << " found\n";
+ return literal->second;
+}
+
+void ValuePool::add_uniform(unsigned index, const PValue& value)
+{
+ sfn_log << SfnLog::reg << "Reserve " << *value << " as " << index << "\n";
+ m_uniforms[index] = value;
+}
+
+PValue ValuePool::uniform(unsigned index)
+{
+ sfn_log << SfnLog::reg << "Search index " << index << "\n";
+ auto i = m_uniforms.find(index);
+ return i == m_uniforms.end() ? PValue() : i->second;
+}
+
+int ValuePool::allocate_with_mask(unsigned index, unsigned mask, bool pre_alloc)
+{
+ int retval;
+ VRec next_register = { index, mask };
+
+ sfn_log << SfnLog::reg << (pre_alloc ? "Pre-alloc" : "Allocate")
+ << " register (" << index << ", " << mask << ")\n";
+ retval = index;
+ auto r = m_register_map.find(index);
+
+ if (r != m_register_map.end()) {
+ if ((r->second.mask & next_register.mask) &&
+ !(r->second.pre_alloc_mask & next_register.mask)) {
+ std::cerr << "r600 ERR: register ("
+ << index << ", " << mask
+ << ") already allocated as (" << r->second.index << ", "
+ << r->second.mask << ", " << r->second.pre_alloc_mask
+ << ") \n";
+ retval = -1;
+ } else {
+ r->second.mask |= next_register.mask;
+ if (pre_alloc)
+ r->second.pre_alloc_mask |= next_register.mask;
+ retval = r->second.index;
+ }
+ } else {
+ if (pre_alloc)
+ next_register.pre_alloc_mask = mask;
+ m_register_map[index] = next_register;
+ retval = next_register.index;
+ }
+
+ sfn_log << SfnLog::reg << "Allocate register (" << index << "," << mask << ") in R"
+ << retval << "\n";
+
+ return retval;
+}
+
+PValue ValuePool::literal(uint32_t value)
+{
+ const uint32_t float_1 = 0x3f800000;
+ const uint32_t float_05 = 0x3f000000;
+
+ auto l = m_literals.find(value);
+ if (l != m_literals.end())
+ return l->second;
+
+ switch (value) {
+ case 0:
+ m_literals[0] = PValue(new InlineConstValue(ALU_SRC_0, 0));
+ return m_literals[0];
+ case 1:
+ m_literals[1] = PValue(new InlineConstValue(ALU_SRC_1_INT, 0));
+ return m_literals[1];
+ case float_1:
+ m_literals[float_1] = PValue(new InlineConstValue(ALU_SRC_1, 0));
+ return m_literals[float_1];
+ case float_05:
+ m_literals[float_05] = PValue(new InlineConstValue(ALU_SRC_0_5, 0));
+ return m_literals[float_05];
+ case 0xffffffff:
+ m_literals[0xffffffff] = PValue(new InlineConstValue(ALU_SRC_M_1_INT, 0));
+ return m_literals[0xffffffff];
+ default:
+ m_literals[value] = PValue(new LiteralValue(value));
+ return m_literals[value];
+ }
+}
+
+}
--- /dev/null
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef SFN_VALUEPOOL_H
+#define SFN_VALUEPOOL_H
+
+#include "sfn_value.h"
+#include "sfn_value_gpr.h"
+
+#include <set>
+#include <queue>
+
+namespace r600 {
+
+using LiteralBuffer = std::map<unsigned, const nir_load_const_instr *>;
+
+class ValueMap {
+public:
+ void insert(const PValue& v) {
+ auto idx = index_from(v->sel(), v->chan());
+ m_map[idx] = v;
+ }
+ PValue get_or_inject(uint32_t index, uint32_t chan) {
+ auto idx = index_from(index, chan);
+ auto v = m_map.find(idx);
+ if (v == m_map.end()) {
+ insert(PValue(new GPRValue(index, chan)));
+ v = m_map.find(idx);
+ }
+ return v->second;
+ }
+ std::map<uint32_t, PValue>::const_iterator begin() const {return m_map.begin();}
+ std::map<uint32_t, PValue>::const_iterator end() const {return m_map.end();}
+
+private:
+ uint32_t index_from(uint32_t index, uint32_t chan) {
+ return (index << 3) + chan;
+ }
+ std::map<uint32_t, PValue> m_map;
+};
+
+/** \brief Class to keep track of registers, uniforms, and literals
+ * This class holds the references to the uniforms and the literals
+ * and is responsible for allocating the registers.
+ */
+class ValuePool
+{
+public:
+
+ struct array_entry {
+ unsigned index;
+ unsigned length;
+ unsigned ncomponents;
+
+ bool operator ()(const array_entry& a, const array_entry& b) const {
+ return a.length < b.length || (a.length == b.length && a.ncomponents > b.ncomponents);
+ }
+ };
+
+ using array_list = std::priority_queue<array_entry, std::vector<array_entry>,
+ array_entry>;
+
+ ValuePool();
+
+
+ GPRVector vec_from_nir(const nir_dest& dst, int num_components);
+
+ PValue from_nir(const nir_src& v, unsigned component, unsigned swizzled);
+
+ PValue from_nir(const nir_src& v, unsigned component);
+ /** Get a register that is used as source register in an ALU instruction
+ * The PValue holds one componet as specified. If the register refers to
+ * a GPR it must already have been allocated, uniforms and literals on
+ * the other hand might be pre-loaded.
+ */
+ PValue from_nir(const nir_alu_src& v, unsigned component);
+
+ /** Get a register that is used as source register in an Texture instruction
+ * The PValue holds one componet as specified.
+ */
+ PValue from_nir(const nir_tex_src& v, unsigned component);
+
+ /** Allocate a register that is used as destination register in an ALU
+ * instruction. The PValue holds one componet as specified.
+ */
+ PValue from_nir(const nir_alu_dest& v, unsigned component);
+
+ /** Allocate a register that is used as destination register in any
+ * instruction. The PValue holds one componet as specified.
+ */
+ PValue from_nir(const nir_dest& v, unsigned component);
+
+ /** Get the register index mapped from the NIR code to the r600 ir
+ * \param index NIR index of register
+ * \returns r600 ir inxex
+ */
+ int lookup_register_index(const nir_src& src) const;
+
+ /** Get the register index mapped from the NIR code to the r600 ir
+ * \param index NIR index of register
+ * \returns r600 ir inxex
+ */
+ int lookup_register_index(const nir_dest& dst);
+
+ /** Inject a register into a given ssa index position
+ * This is used to redirect loads from system values and vertex attributes
+ * that are already loaded into registers */
+ bool inject_register(unsigned sel, unsigned swizzle, const PValue ®, bool map);
+
+ /** Reserve space for a local register */
+ void allocate_local_register(const nir_register& reg);
+ void allocate_local_register(const nir_register ®, array_list& arrays);
+
+ void allocate_arrays(array_list& arrays);
+
+
+ void increment_reserved_registers() {
+ ++m_next_register_index;
+ }
+
+ void set_reserved_registers(unsigned rr) {
+ m_next_register_index =rr;
+ }
+
+ /** Allocate a register that is is needed for lowering an instruction
+ * that requires complex calculations,
+ */
+ int allocate_temp_register();
+
+ /** Reserve a undef register, currently it uses (0,7),
+ * \todo should be eliminated in the final pass
+ */
+ bool create_undef(nir_ssa_undef_instr* instr);
+
+ bool set_literal_constant(nir_load_const_instr* instr);
+
+ const nir_load_const_instr *get_literal_constant(int index);
+
+ void add_uniform(unsigned index, const PValue &value);
+
+ PValue uniform(unsigned index);
+
+ /** Create a new register with the given index and store it in the
+ * lookup map
+ */
+ PValue create_register_from_nir_src(const nir_src& sel, int comp);
+
+ ValueMap get_temp_registers() const;
+
+ PValue lookup_register(unsigned sel, unsigned swizzle, bool required);
+
+ size_t register_count() const {return m_next_register_index;}
+
+ PValue create_register(unsigned index, unsigned swizzle);
+
+ unsigned get_dst_ssa_register_index(const nir_ssa_def& ssa);
+
+ PValue literal(uint32_t value);
+
+ PValue get_temp_register();
+
+ GPRVector get_temp_vec4();
+
+private:
+
+ unsigned get_ssa_register_index(const nir_ssa_def& ssa) const;
+
+ unsigned get_local_register_index(const nir_register& reg);
+
+ unsigned get_local_register_index(const nir_register& reg) const;
+
+ void allocate_ssa_register(const nir_ssa_def& ssa);
+
+ void allocate_array(const nir_register& reg);
+
+
+ /** Allocate a register index with the given component mask.
+ * If one of the components is already been allocated the function
+ * will signal an error bz returning -1, otherwise a register index is
+ * returned.
+ */
+ int allocate_with_mask(unsigned index, unsigned mask, bool pre_alloc);
+
+ /** Allocate a register index with the given component.
+ * If the component is already been allocated the function
+ * will signal an error bz returning -1, otherwise a register index is
+ * returned.
+ */
+ int allocate_component(unsigned index, unsigned comp, bool pre_alloc);
+
+ /** search for a new register with the given index in the
+ * lookup map.
+ * \param sel register sel value
+ * \param swizzle register component, can also be 4,5, and 7
+ * \param required true: in debug mode assert when register doesn't exist
+ * false: return nullptr on failure
+ */
+
+ std::set<unsigned> m_ssa_undef;
+
+ LiteralBuffer m_literal_constants;
+
+ std::map<unsigned, unsigned> m_local_register_map;
+ std::map<unsigned, unsigned> m_ssa_register_map;
+
+ std::map<unsigned, PValue> m_uniforms;
+
+ std::map<unsigned, PValue> m_registers;
+
+ static PValue m_undef;
+
+ struct VRec {
+ unsigned index;
+ unsigned mask;
+ unsigned pre_alloc_mask;
+ };
+ std::map<unsigned, VRec> m_register_map;
+
+ unsigned m_next_register_index;
+
+ std::map<unsigned, PGPRArray> m_arrays_map;
+
+ std::map<uint32_t, PValue> m_literals;
+
+ int current_temp_reg_index;
+ int next_temp_reg_comp;
+};
+
+}
+
+#endif // SFN_VALUEPOOL_H