From 8d8a3815ef698348857cd9812eaa0dc76f5b19b5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 8 Aug 2020 12:55:29 -0500 Subject: [PATCH 1/1] intel/eu: Add a mechanism for emitting relocatable constant MOVs Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/brw_compiler.c | 19 +++++++++++++ src/intel/compiler/brw_compiler.h | 36 ++++++++++++++++++++++++ src/intel/compiler/brw_eu.cpp | 7 +++++ src/intel/compiler/brw_eu.h | 17 ++++++++++++ src/intel/compiler/brw_eu_compact.c | 9 ++++++ src/intel/compiler/brw_eu_emit.c | 43 +++++++++++++++++++++++++++++ 6 files changed, 131 insertions(+) diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index a04cf965011..a5b90e9b3c9 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -267,3 +267,22 @@ brw_prog_key_size(gl_shader_stage stage) assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_sizes)); return stage_sizes[stage]; } + +void +brw_write_shader_relocs(const struct gen_device_info *devinfo, + void *program, + const struct brw_stage_prog_data *prog_data, + struct brw_shader_reloc_value *values, + unsigned num_values) +{ + for (unsigned i = 0; i < prog_data->num_relocs; i++) { + assert(prog_data->relocs[i].offset % 8 == 0); + brw_inst *inst = (brw_inst *)(program + prog_data->relocs[i].offset); + for (unsigned j = 0; j < num_values; j++) { + if (prog_data->relocs[i].id == values[j].id) { + brw_update_reloc_imm(devinfo, inst, values[j].value); + break; + } + } + } +} diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 679a78723fe..01fd0d034e5 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -642,6 +642,32 @@ enum brw_param_builtin { #define BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(param) \ (((param) - BRW_PARAM_BUILTIN_CLIP_PLANE_0_X) & 0x3) +/** Represents a code relocation + * + * Relocatable constants are immediates in the code which we want to be able + * to replace post-compile with the actual value. + */ +struct brw_shader_reloc { + /** The 32-bit ID of the relocatable constant */ + uint32_t id; + + /** The offset in the shader to the relocatable instruction + * + * This is the offset to the instruction rather than the immediate value + * itself. This allows us to do some sanity checking while we relocate. + */ + uint32_t offset; +}; + +/** A value to write to a relocation */ +struct brw_shader_reloc_value { + /** The 32-bit ID of the relocatable constant */ + uint32_t id; + + /** The value with which to replace the relocated immediate */ + uint32_t value; +}; + struct brw_stage_prog_data { struct { /** size of our binding table. */ @@ -688,6 +714,9 @@ struct brw_stage_prog_data { unsigned const_data_size; unsigned const_data_offset; + unsigned num_relocs; + const struct brw_shader_reloc *relocs; + /** Does this program pull from any UBO or other constant buffers? */ bool has_ubo_pull; @@ -1535,6 +1564,13 @@ brw_cs_simd_size_for_group_size(const struct gen_device_info *devinfo, const struct brw_cs_prog_data *cs_prog_data, unsigned group_size); +void +brw_write_shader_relocs(const struct gen_device_info *devinfo, + void *program, + const struct brw_stage_prog_data *prog_data, + struct brw_shader_reloc_value *values, + unsigned num_values); + /** * Calculate the RightExecutionMask field used in GPGPU_WALKER. */ diff --git a/src/intel/compiler/brw_eu.cpp b/src/intel/compiler/brw_eu.cpp index 0027b0a3965..4bd52df5085 100644 --- a/src/intel/compiler/brw_eu.cpp +++ b/src/intel/compiler/brw_eu.cpp @@ -363,6 +363,13 @@ const unsigned *brw_get_program( struct brw_codegen *p, return (const unsigned *)p->store; } +const brw_shader_reloc * +brw_get_shader_relocs(struct brw_codegen *p, unsigned *num_relocs) +{ + *num_relocs = p->num_relocs; + return p->relocs; +} + bool brw_try_override_assembly(struct brw_codegen *p, int start_offset, const char *identifier) { diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index 7622cff8f6a..88cf9d1d674 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -136,6 +136,10 @@ struct brw_codegen { int *if_depth_in_loop; int loop_stack_depth; int loop_stack_array_size; + + struct brw_shader_reloc *relocs; + int num_relocs; + int reloc_array_size; }; struct brw_label { @@ -184,6 +188,8 @@ void brw_disassemble_with_labels(const struct gen_device_info *devinfo, void brw_disassemble(const struct gen_device_info *devinfo, const void *assembly, int start, int end, const struct brw_label *root_label, FILE *out); +const struct brw_shader_reloc *brw_get_shader_relocs(struct brw_codegen *p, + unsigned *num_relocs); const unsigned *brw_get_program( struct brw_codegen *p, unsigned *sz ); bool brw_try_override_assembly(struct brw_codegen *p, int start_offset, @@ -1238,6 +1244,17 @@ void brw_float_controls_mode(struct brw_codegen *p, unsigned mode, unsigned mask); +void +brw_update_reloc_imm(const struct gen_device_info *devinfo, + brw_inst *inst, + uint32_t value); + +void +brw_MOV_reloc_imm(struct brw_codegen *p, + struct brw_reg dst, + enum brw_reg_type src_type, + uint32_t id); + /*********************************************************************** * brw_eu_util.c: */ diff --git a/src/intel/compiler/brw_eu_compact.c b/src/intel/compiler/brw_eu_compact.c index f880d790b85..d7c35223642 100644 --- a/src/intel/compiler/brw_eu_compact.c +++ b/src/intel/compiler/brw_eu_compact.c @@ -2446,6 +2446,15 @@ brw_compact_instructions(struct brw_codegen *p, int start_offset, } p->nr_insn = p->next_insn_offset / sizeof(brw_inst); + for (int i = 0; i < p->num_relocs; i++) { + if (p->relocs[i].offset < (uint32_t)start_offset) + continue; + + assert(p->relocs[i].offset % 16 == 0); + unsigned idx = (p->relocs[i].offset - start_offset) / 16; + p->relocs[i].offset -= compacted_counts[idx] * 8; + } + /* Update the instruction offsets for each group. */ if (disasm) { int offset = 0; diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index e6c6dcb1607..c9c180b1fcc 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -3641,3 +3641,46 @@ brw_float_controls_mode(struct brw_codegen *p, if (p->devinfo->gen >= 12) brw_SYNC(p, TGL_SYNC_NOP); } + +void +brw_update_reloc_imm(const struct gen_device_info *devinfo, + brw_inst *inst, + uint32_t value) +{ + /* Sanity check that the instruction is a MOV of an immediate */ + assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV); + assert(brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE); + + /* If it was compacted, we can't safely rewrite */ + assert(brw_inst_cmpt_control(devinfo, inst) == 0); + + brw_inst_set_imm_ud(devinfo, inst, value); +} + +/* A default value for constants that will be patched at run-time. + * We pick an arbitrary value that prevents instruction compaction. + */ +#define DEFAULT_PATCH_IMM 0x4a7cc037 + +void +brw_MOV_reloc_imm(struct brw_codegen *p, + struct brw_reg dst, + enum brw_reg_type src_type, + uint32_t id) +{ + assert(type_sz(src_type) == 4); + assert(type_sz(dst.type) == 4); + + if (p->num_relocs + 1 > p->reloc_array_size) { + p->reloc_array_size = MAX2(16, p->reloc_array_size * 2); + p->relocs = reralloc(p->mem_ctx, p->relocs, + struct brw_shader_reloc, p->reloc_array_size); + } + + p->relocs[p->num_relocs++] = (struct brw_shader_reloc) { + .id = id, + .offset = p->next_insn_offset, + }; + + brw_MOV(p, dst, retype(brw_imm_ud(DEFAULT_PATCH_IMM), src_type)); +} -- 2.30.2