From d2f089ba17c6b17823fc3d244e15c0a18108d5ce Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sat, 7 Nov 2015 18:58:34 -0800 Subject: [PATCH] i965: Introduce a MOV_INDIRECT opcode. The geometry and tessellation control shader stages both read from multiple URB entries (one per vertex). The thread payload contains several URB handles which reference these separate memory segments. In GLSL, these inputs are represented as per-vertex arrays; the outermost array index selects which vertex's inputs to read. This array index does not necessarily need to be constant. To handle that, we need to use indirect addressing on GRFs to select which of the thread payload registers has the appropriate URB handle. (This is before we can even think about applying the pull model!) This patch introduces a new opcode which performs a MOV from a source using VxH indirect addressing (which allows each of the 8 SIMD channels to select distinct data.) Based on a patch by Jason Ekstrand. v2: Rename from INDIRECT_THREAD_PAYLOAD_MOV to MOV_INDIRECT; make it a bit more generic. Use regs_read() instead of hacking up the register allocator. (Suggested by Jason Ekstrand.) v3: Fix regs_read() to be more accurate for small unaligned regions. Also rebase on Matt's work. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand [v3] Reviewed-by: Abdiel Janulgue [v1] --- src/mesa/drivers/dri/i965/brw_defines.h | 10 ++++++ src/mesa/drivers/dri/i965/brw_fs.cpp | 28 +++++++++++++++ src/mesa/drivers/dri/i965/brw_fs.h | 5 +++ src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 1 + .../drivers/dri/i965/brw_fs_generator.cpp | 34 +++++++++++++++++++ src/mesa/drivers/dri/i965/brw_shader.cpp | 2 ++ 6 files changed, 80 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 6484484ed34..0b8de63df42 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1289,6 +1289,16 @@ enum opcode { * Calculate the high 32-bits of a 32x32 multiply. */ SHADER_OPCODE_MULH, + + /** + * A MOV that uses VxH indirect addressing. + * + * Source 0: A register to start from (HW_REG). + * Source 1: An indirect offset (in bytes, UD GRF). + * Source 2: The length of the region that could be accessed (in bytes, + * UD immediate). + */ + SHADER_OPCODE_MOV_INDIRECT, }; enum brw_urb_write_flags { diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 80b8c8e1207..84b5920d4f5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -840,6 +840,34 @@ fs_inst::regs_read(int arg) const case SHADER_OPCODE_BARRIER: return 1; + case SHADER_OPCODE_MOV_INDIRECT: + if (arg == 0) { + assert(src[2].file == IMM); + unsigned region_length = src[2].ud; + + if (src[0].file == FIXED_GRF) { + /* If the start of the region is not register aligned, then + * there's some portion of the register that's technically + * unread at the beginning. + * + * However, the register allocator works in terms of whole + * registers, and does not use subnr. It assumes that the + * read starts at the beginning of the register, and extends + * regs_read() whole registers beyond that. + * + * To compensate, we extend the region length to include this + * unread portion at the beginning. + */ + if (src[0].subnr) + region_length += src[0].subnr * type_sz(src[0].type); + + return DIV_ROUND_UP(region_length, REG_SIZE); + } else { + assert(!"Invalid register file"); + } + } + break; + default: if (is_tex() && arg == 0 && src[0].file == VGRF) return mlen; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index f40e58b8ca0..cbfc07f68bc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -527,6 +527,11 @@ private: struct brw_reg offset, struct brw_reg value); + void generate_mov_indirect(fs_inst *inst, + struct brw_reg dst, + struct brw_reg reg, + struct brw_reg indirect_byte_offset); + bool patch_discard_jumps_to_fb_writes(); const struct brw_compiler *compiler; diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 8c67caff6e0..3c40fcd4fd2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -78,6 +78,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case FS_OPCODE_LINTERP: case SHADER_OPCODE_FIND_LIVE_CHANNEL: case SHADER_OPCODE_BROADCAST: + case SHADER_OPCODE_MOV_INDIRECT: return true; case SHADER_OPCODE_RCP: case SHADER_OPCODE_RSQ: diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 139cda3ca59..e5a286a763b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -371,6 +371,36 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload) } } +void +fs_generator::generate_mov_indirect(fs_inst *inst, + struct brw_reg dst, + struct brw_reg reg, + struct brw_reg indirect_byte_offset) +{ + assert(indirect_byte_offset.type == BRW_REGISTER_TYPE_UD); + assert(indirect_byte_offset.file == BRW_GENERAL_REGISTER_FILE); + + unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr; + + /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */ + struct brw_reg addr = vec8(brw_address_reg(0)); + + /* The destination stride of an instruction (in bytes) must be greater + * than or equal to the size of the rest of the instruction. Since the + * address register is of type UW, we can't use a D-type instruction. + * In order to get around this, re re-type to UW and use a stride. + */ + indirect_byte_offset = + retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW); + + /* Prior to Broadwell, there are only 8 address registers. */ + assert(inst->exec_size == 8 || devinfo->gen >= 8); + + brw_MOV(p, addr, indirect_byte_offset); + brw_inst_set_mask_control(devinfo, brw_last_inst, BRW_MASK_DISABLE); + brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type)); +} + void fs_generator::generate_urb_read(fs_inst *inst, struct brw_reg dst, @@ -2079,6 +2109,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) fill_count++; break; + case SHADER_OPCODE_MOV_INDIRECT: + generate_mov_indirect(inst, dst, src[0], src[1]); + break; + case SHADER_OPCODE_URB_READ_SIMD8: case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: generate_urb_read(inst, dst, src[0]); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index a438e1881d5..50b288b5c51 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -554,6 +554,8 @@ brw_instruction_name(enum opcode op) return "barrier"; case SHADER_OPCODE_MULH: return "mulh"; + case SHADER_OPCODE_MOV_INDIRECT: + return "mov_indirect"; } unreachable("not reached"); -- 2.30.2