From 91becd84ae2c9aa144ba2eb08f3e62166275a0e4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 8 Aug 2020 13:56:16 -0500 Subject: [PATCH] intel/fs: Add support for a new load_reloc_const intrinsic Reviewed-by: Kenneth Graunke Part-of: --- src/compiler/nir/nir_builder.h | 12 ++++++++++++ src/compiler/nir/nir_intrinsics.py | 4 ++++ src/intel/compiler/brw_eu_defines.h | 3 +++ src/intel/compiler/brw_fs.cpp | 1 + src/intel/compiler/brw_fs_generator.cpp | 7 +++++++ src/intel/compiler/brw_fs_nir.cpp | 7 +++++++ src/intel/compiler/brw_ir_performance.cpp | 1 + src/intel/compiler/brw_shader.cpp | 2 ++ 8 files changed, 37 insertions(+) diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index af638275eec..19dab6e9b10 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -1386,6 +1386,18 @@ nir_load_param(nir_builder *build, uint32_t param_idx) return &load->dest.ssa; } +static inline nir_ssa_def * +nir_load_reloc_const_intel(nir_builder *b, uint32_t id) +{ + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, + nir_intrinsic_load_reloc_const_intel); + nir_intrinsic_set_param_idx(load, id); + nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); + nir_builder_instr_insert(b, &load->instr); + return &load->dest.ssa; +} + #include "nir_builder_opcodes.h" static inline nir_ssa_def * diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 09a3c91c267..0de66f4c2fb 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -942,3 +942,7 @@ image("store_raw_intel", src_comp=[1, 0]) # Number of data items being operated on for a SIMD program. system_value("simd_width_intel", 1) + +# Load a relocatable 32-bit value +intrinsic("load_reloc_const_intel", dest_comp=1, bit_sizes=[32], + indices=[PARAM_IDX], flags=[CAN_ELIMINATE, CAN_REORDER]) diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index d63360222ec..25576a21a87 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -768,6 +768,9 @@ enum opcode { */ SHADER_OPCODE_MOV_INDIRECT, + /** Fills out a relocatable immediate */ + SHADER_OPCODE_MOV_RELOC_IMM, + VEC4_OPCODE_URB_READ, TCS_OPCODE_GET_INSTANCE_ID, TCS_OPCODE_URB_WRITE, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 460a36587a1..8182bb30759 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6350,6 +6350,7 @@ get_lowered_simd_width(const struct gen_device_info *devinfo, case FS_OPCODE_PACK: case SHADER_OPCODE_SEL_EXEC: case SHADER_OPCODE_CLUSTER_BROADCAST: + case SHADER_OPCODE_MOV_RELOC_IMM: return get_fpu_lowered_simd_width(devinfo, inst); case BRW_OPCODE_CMP: { diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 74c6cd30474..e75c8d98337 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -2230,6 +2230,11 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, generate_mov_indirect(inst, dst, src[0], src[1]); break; + case SHADER_OPCODE_MOV_RELOC_IMM: + assert(src[0].file == BRW_IMMEDIATE_VALUE); + brw_MOV_reloc_imm(p, dst, dst.type, src[0].ud); + break; + case SHADER_OPCODE_URB_READ_SIMD8: case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: generate_urb_read(inst, dst, src[0]); @@ -2608,5 +2613,7 @@ fs_generator::add_const_data(void *data, unsigned size) const unsigned * fs_generator::get_assembly() { + prog_data->relocs = brw_get_shader_relocs(p, &prog_data->num_relocs); + return brw_get_program(p, &prog_data->program_size); } diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 49fafe1417a..0ef40625e33 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4396,6 +4396,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(1)); break; + case nir_intrinsic_load_reloc_const_intel: { + uint32_t id = nir_intrinsic_param_idx(instr); + bld.emit(SHADER_OPCODE_MOV_RELOC_IMM, + dest, brw_imm_ud(id)); + break; + } + case nir_intrinsic_load_uniform: { /* Offsets are in bytes but they should always aligned to * the type size diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp index 3c39594d121..e95b847bd7f 100644 --- a/src/intel/compiler/brw_ir_performance.cpp +++ b/src/intel/compiler/brw_ir_performance.cpp @@ -376,6 +376,7 @@ namespace { case BRW_OPCODE_CMP: case BRW_OPCODE_ADD: case BRW_OPCODE_MUL: + case SHADER_OPCODE_MOV_RELOC_IMM: if (devinfo->gen >= 11) { return calculate_desc(info, unit_fpu, 0, 2, 0, 0, 2, 0, 10, 6, 14, 0, 0); diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 3219a01e60a..2650e89040c 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -502,6 +502,8 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) return "usub_sat"; case SHADER_OPCODE_MOV_INDIRECT: return "mov_indirect"; + case SHADER_OPCODE_MOV_RELOC_IMM: + return "mov_reloc_imm"; case VEC4_OPCODE_URB_READ: return "urb_read"; -- 2.30.2