From: Eric Anholt Date: Wed, 18 Oct 2006 07:24:01 +0000 (-0700) Subject: Avoid branch instructions while in single program flow mode. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9a94dae4c292bfc2aa94a3f86865550e2217b870;p=mesa.git Avoid branch instructions while in single program flow mode. There is an errata for Broadwater that threads don't have the instruction/loop mask stacks initialized on thread spawn. In single program flow mode, those stacks are not writable, so we can't initialize them. However, they do get read during ELSE and ENDIF instructions. So, instead, replace branch instructions in single program flow mode with predicated jumps (ADD to the ip register), avoiding use of the more complicated branch instructions that may fail. This is also a minor optimization as no ENDIF equivalent is necessary. --- diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 0e8591aaa8d..3bec153075a 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -62,6 +62,8 @@ static void compile_clip_prog( struct brw_context *brw, */ brw_init_compile(&c.func); + c.func.single_program_flow = 1; + c.key = *key; diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 1afa0f816b8..d4dbcf38a7a 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -104,6 +104,7 @@ struct brw_compile { struct brw_instruction *current; GLuint flag_value; + GLboolean single_program_flow; }; diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 6425c91450c..9992b47d8ae 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -464,7 +464,6 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, return insn; } - /* EU takes the value from the flag register and pushes it onto some * sort of a stack (presumably merging with any flag value already on * the stack). Within an if block, the flags at the top of the stack @@ -482,7 +481,16 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, */ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_IF); + struct brw_instruction *insn; + + if (p->single_program_flow) { + assert(execute_size == BRW_EXECUTE_1); + + insn = next_insn(p, BRW_OPCODE_ADD); + insn->header.predicate_inverse = 1; + } else { + insn = next_insn(p, BRW_OPCODE_IF); + } /* Override the defaults for this instruction: */ @@ -504,7 +512,13 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) struct brw_instruction *brw_ELSE(struct brw_compile *p, struct brw_instruction *if_insn) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ELSE); + struct brw_instruction *insn; + + if (p->single_program_flow) { + insn = next_insn(p, BRW_OPCODE_ADD); + } else { + insn = next_insn(p, BRW_OPCODE_ELSE); + } brw_set_dest(insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); @@ -516,11 +530,17 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, /* Patch the if instruction to point at this instruction. */ - assert(if_insn->header.opcode == BRW_OPCODE_IF); + if (p->single_program_flow) { + assert(if_insn->header.opcode == BRW_OPCODE_ADD); - if_insn->bits3.if_else.jump_count = insn - if_insn; - if_insn->bits3.if_else.pop_count = 1; - if_insn->bits3.if_else.pad0 = 0; + if_insn->bits3.ud = (insn - if_insn + 1) * 16; + } else { + assert(if_insn->header.opcode == BRW_OPCODE_IF); + + if_insn->bits3.if_else.jump_count = insn - if_insn; + if_insn->bits3.if_else.pop_count = 1; + if_insn->bits3.if_else.pad0 = 0; + } return insn; } @@ -528,63 +548,76 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, void brw_ENDIF(struct brw_compile *p, struct brw_instruction *patch_insn) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); + if (p->single_program_flow) { + /* In single program flow mode, there's no need to execute an ENDIF, + * since we don't need to do any stack operations, and if we're executing + * currently, we want to just continue executing. + */ + struct brw_instruction *next = &p->store[p->nr_insn]; - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src1(insn, brw_imm_d(0x0)); + assert(patch_insn->header.opcode == BRW_OPCODE_ADD); - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = patch_insn->header.execution_size; - insn->header.mask_control = BRW_MASK_ENABLE; + patch_insn->bits3.ud = (next - patch_insn) * 16; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); - assert(patch_insn->bits3.if_else.jump_count == 0); - - /* Patch the if or else instructions to point at this or the next - * instruction respectively. - */ - if (patch_insn->header.opcode == BRW_OPCODE_IF) { - /* Automagically turn it into an IFF: + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = patch_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + + assert(patch_insn->bits3.if_else.jump_count == 0); + + /* Patch the if or else instructions to point at this or the next + * instruction respectively. */ - patch_insn->header.opcode = BRW_OPCODE_IFF; - patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; - patch_insn->bits3.if_else.pop_count = 0; - patch_insn->bits3.if_else.pad0 = 0; + if (patch_insn->header.opcode == BRW_OPCODE_IF) { + /* Automagically turn it into an IFF: + */ + patch_insn->header.opcode = BRW_OPCODE_IFF; + patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.pop_count = 0; + patch_insn->bits3.if_else.pad0 = 0; + } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { + patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.pop_count = 1; + patch_insn->bits3.if_else.pad0 = 0; + } else { + assert(0); + } + /* Also pop item off the stack in the endif instruction: + */ + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; } - else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { - patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; - patch_insn->bits3.if_else.pop_count = 1; - patch_insn->bits3.if_else.pad0 = 0; - } - else { - assert(0); - } - - /* Also pop item off the stack in the endif instruction: - */ - insn->bits3.if_else.jump_count = 0; - insn->bits3.if_else.pop_count = 1; - insn->bits3.if_else.pad0 = 0; } /* DO/WHILE loop: */ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); + if (p->single_program_flow) { + return &p->store[p->nr_insn]; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); - /* Override the defaults for this instruction: - */ - brw_set_dest(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src1(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); + /* Override the defaults for this instruction: + */ + brw_set_dest(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = execute_size; -/* insn->header.mask_control = BRW_MASK_ENABLE; */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = execute_size; + /* insn->header.mask_control = BRW_MASK_ENABLE; */ - return insn; + return insn; + } } @@ -592,19 +625,31 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) void brw_WHILE(struct brw_compile *p, struct brw_instruction *do_insn) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WHILE); + struct brw_instruction *insn; + + if (p->single_program_flow) + insn = next_insn(p, BRW_OPCODE_ADD); + else + insn = next_insn(p, BRW_OPCODE_WHILE); brw_set_dest(insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = do_insn->header.execution_size; - assert(do_insn->header.opcode == BRW_OPCODE_DO); - insn->bits3.if_else.jump_count = do_insn - insn; - insn->bits3.if_else.pop_count = 0; - insn->bits3.if_else.pad0 = 0; + if (p->single_program_flow) { + insn->header.execution_size = BRW_EXECUTE_1; + + insn->bits3.d = (do_insn - insn) * 16; + } else { + insn->header.execution_size = do_insn->header.execution_size; + + assert(do_insn->header.opcode == BRW_OPCODE_DO); + insn->bits3.if_else.jump_count = do_insn - insn; + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } /* insn->header.mask_control = BRW_MASK_ENABLE; */ diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 7d3f9dd5e3b..9066e42252d 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -66,7 +66,9 @@ static void compile_gs_prog( struct brw_context *brw, /* Begin the compilation: */ brw_init_compile(&c.func); - + + c.func.single_program_flow = 1; + /* For some reason the thread is spawned with only 4 channels * unmasked. */ diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 25acdcfe947..10fee944e8d 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -519,7 +519,22 @@ struct thread3 struct brw_clip_unit_state { struct thread0 thread0; - struct thread1 thread1; + struct + { + GLuint pad0:7; + GLuint sw_exception_enable:1; + GLuint pad1:3; + GLuint mask_stack_exception_enable:1; + GLuint pad2:1; + GLuint illegal_op_exception_enable:1; + GLuint pad3:2; + GLuint floating_point_mode:1; + GLuint thread_priority:1; + GLuint binding_table_entry_count:8; + GLuint pad4:5; + GLuint single_program_flow:1; + } thread1; + struct thread2 thread2; struct thread3 thread3; @@ -532,8 +547,8 @@ struct brw_clip_unit_state GLuint pad1:1; GLuint urb_entry_allocation_size:5; GLuint pad2:1; - GLuint max_threads:6; /* may be less */ - GLuint pad3:1; + GLuint max_threads:1; /* may be less */ + GLuint pad3:6; } thread4; struct @@ -1322,6 +1337,7 @@ struct brw_instruction GLuint end_of_thread:1; } generic; + GLint d; GLuint ud; } bits3; };