From 82ddd517afad7b133624e8dd32e90addfff27d1e Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 30 Jun 2014 08:00:25 -0700 Subject: [PATCH] i965/eu: Refactor jump distance scaling to use a helper function. Different generations of hardware measure jump distances in different units. Previously, every function that needed to set a jump target open coded this scaling, or made a hardcoded assumption (i.e. just used 2). Most functions start with the number of instructions to jump, and scale up to the hardware-specific value. So, I made the function match that. Others start with a byte offset, and divide by a constant (8) to obtain the jump distance. This is actually 16 / 2 (the jump scale for Gen5-7). v2: Make the helper a static inline defined in brw_eu.h, instead of an actual function in brw_eu_emit.c (as suggested by Matt). Signed-off-by: Kenneth Graunke Reviewed-by: Chris Forbes Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_eu.h | 20 ++++++++++++++++++ src/mesa/drivers/dri/i965/brw_eu_emit.c | 21 +++++++------------ .../drivers/dri/i965/brw_fs_generator.cpp | 8 ++++--- 3 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 7efc02816c9..3e03ab82cc0 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -316,6 +316,26 @@ void brw_shader_time_add(struct brw_compile *p, struct brw_reg payload, uint32_t surf_index); +/** + * Return the generation-specific jump distance scaling factor. + * + * Given the number of instructions to jump, we need to scale by + * some number to obtain the actual jump distance to program in an + * instruction. + */ +static inline unsigned +brw_jump_scale(const struct brw_context *brw) +{ + /* Ironlake and later measure jump targets in 64-bit data chunks (in order + * (to support compaction), so each 128-bit instruction requires 2 chunks. + */ + if (brw->gen >= 5) + return 2; + + /* Gen4 simply uses the number of 128-bit instructions. */ + return 1; +} + /* If/else/endif. Works by manipulating the execution flags on each * channel. */ diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 3ab44333ec8..36ebed2dec8 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1333,12 +1333,7 @@ patch_IF_ELSE(struct brw_compile *p, assert(endif_inst != NULL); assert(else_inst == NULL || brw_inst_opcode(brw, else_inst) == BRW_OPCODE_ELSE); - unsigned br = 1; - /* Jump count is for 64bit data chunk each, so one 128bit instruction - * requires 2 chunks. - */ - if (brw->gen >= 5) - br = 2; + unsigned br = brw_jump_scale(brw); assert(brw_inst_opcode(brw, endif_inst) == BRW_OPCODE_ENDIF); brw_inst_set_exec_size(brw, endif_inst, brw_inst_exec_size(brw, if_inst)); @@ -1635,7 +1630,7 @@ brw_patch_break_cont(struct brw_compile *p, brw_inst *while_inst) struct brw_context *brw = p->brw; brw_inst *do_inst = get_inner_do_insn(p); brw_inst *inst; - int br = (brw->gen == 5) ? 2 : 1; + unsigned br = brw_jump_scale(brw); assert(brw->gen < 6); @@ -1659,10 +1654,7 @@ brw_WHILE(struct brw_compile *p) { struct brw_context *brw = p->brw; brw_inst *insn, *do_insn; - unsigned br = 1; - - if (brw->gen >= 5) - br = 2; + unsigned br = brw_jump_scale(brw); if (brw->gen >= 7) { insn = next_insn(p, BRW_OPCODE_WHILE); @@ -2346,7 +2338,7 @@ brw_find_loop_end(struct brw_compile *p, int start_offset) { struct brw_context *brw = p->brw; int offset; - int scale = 8; + int scale = 16 / brw_jump_scale(brw); void *store = p->store; assert(brw->gen >= 6); @@ -2378,7 +2370,8 @@ brw_set_uip_jip(struct brw_compile *p) { struct brw_context *brw = p->brw; int offset; - int scale = 8; + int br = brw_jump_scale(brw); + int scale = 16 / br; void *store = p->store; if (brw->gen < 6) @@ -2418,7 +2411,7 @@ brw_set_uip_jip(struct brw_compile *p) case BRW_OPCODE_ENDIF: if (block_end_offset == 0) - brw_inst_set_jip(brw, insn, 2); + brw_inst_set_jip(brw, insn, 1 * br); else brw_inst_set_jip(brw, insn, (block_end_offset - offset) / scale); break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index cec2e826e17..d3509a0f1de 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -67,6 +67,8 @@ fs_generator::patch_discard_jumps_to_fb_writes() if (brw->gen < 6 || this->discard_halt_patches.is_empty()) return false; + int scale = brw_jump_scale(brw); + /* There is a somewhat strange undocumented requirement of using * HALT, according to the simulator. If some channel has HALTed to * a particular UIP, then by the end of the program, every channel @@ -79,8 +81,8 @@ fs_generator::patch_discard_jumps_to_fb_writes() * tests. */ brw_inst *last_halt = gen6_HALT(p); - brw_inst_set_uip(brw, last_halt, 2); - brw_inst_set_jip(brw, last_halt, 2); + brw_inst_set_uip(brw, last_halt, 1 * scale); + brw_inst_set_jip(brw, last_halt, 1 * scale); int ip = p->nr_insn; @@ -89,7 +91,7 @@ fs_generator::patch_discard_jumps_to_fb_writes() assert(brw_inst_opcode(brw, patch) == BRW_OPCODE_HALT); /* HALT takes a half-instruction distance from the pre-incremented IP. */ - brw_inst_set_uip(brw, patch, (ip - patch_ip->ip) * 2); + brw_inst_set_uip(brw, patch, (ip - patch_ip->ip) * scale); } this->discard_halt_patches.make_empty(); -- 2.30.2