X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_eu_emit.c;h=33471576446413f8b0cc7da1cbb2d17caeda230b;hb=7b36c68ba6899c7f30fd56b7ef07a78b027771ac;hp=7284bdc45ea4f39bdb5dde58c06f7cbfdf9e2094;hpb=973b4ddd0e2f25cfd72cb945fbd38aed629a6fed;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 7284bdc45ea..33471576446 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -67,6 +67,9 @@ gen6_resolve_implied_move(struct brw_compile *p, if (intel->gen < 6) return; + if (src->file == BRW_MESSAGE_REGISTER_FILE) + return; + if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); @@ -81,10 +84,18 @@ gen6_resolve_implied_move(struct brw_compile *p, static void gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) { + /* From the BSpec / ISA Reference / send - [DevIVB+]: + * "The send with EOT should use register space R112-R127 for . This is + * to enable loading of a new thread into the same slot while the message + * with EOT for current thread is pending dispatch." + * + * Since we're pretending to have 16 MRFs anyway, we may as well use the + * registers required for messages with EOT. + */ struct intel_context *intel = &p->brw->intel; if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) { reg->file = BRW_GENERAL_REGISTER_FILE; - reg->nr += 111; + reg->nr += GEN7_MRF_HACK_START; } } @@ -642,15 +653,17 @@ brw_set_dp_read_message(struct brw_compile *p, } } -static void brw_set_sampler_message(struct brw_compile *p, - struct brw_instruction *insn, - GLuint binding_table_index, - GLuint sampler, - GLuint msg_type, - GLuint response_length, - GLuint msg_length, - GLuint header_present, - GLuint simd_mode) +void +brw_set_sampler_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint sampler, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLuint header_present, + GLuint simd_mode, + GLuint return_format) { struct brw_context *brw = p->brw; struct intel_context *intel = &brw->intel; @@ -676,7 +689,7 @@ static void brw_set_sampler_message(struct brw_compile *p, insn->bits3.sampler.binding_table_index = binding_table_index; insn->bits3.sampler.sampler = sampler; insn->bits3.sampler.msg_type = msg_type; - insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; + insn->bits3.sampler.return_format = return_format; } } @@ -687,7 +700,15 @@ brw_next_insn(struct brw_compile *p, GLuint opcode) { struct brw_instruction *insn; - assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); + if (p->nr_insn + 1 > p->store_size) { + if (0) + printf("incresing the store size to %d\n", p->store_size << 1); + p->store_size <<= 1; + p->store = reralloc(p->mem_ctx, p->store, + struct brw_instruction, p->store_size); + if (!p->store) + assert(!"realloc eu store memeory failed"); + } insn = &p->store[p->nr_insn++]; memcpy(insn, p->current, sizeof(*insn)); @@ -897,16 +918,45 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, static void push_if_stack(struct brw_compile *p, struct brw_instruction *inst) { - p->if_stack[p->if_stack_depth] = inst; + p->if_stack[p->if_stack_depth] = inst - p->store; p->if_stack_depth++; if (p->if_stack_array_size <= p->if_stack_depth) { p->if_stack_array_size *= 2; - p->if_stack = reralloc(p->mem_ctx, p->if_stack, struct brw_instruction *, + p->if_stack = reralloc(p->mem_ctx, p->if_stack, int, p->if_stack_array_size); } } +static struct brw_instruction * +pop_if_stack(struct brw_compile *p) +{ + p->if_stack_depth--; + return &p->store[p->if_stack[p->if_stack_depth]]; +} + +static void +push_loop_stack(struct brw_compile *p, struct brw_instruction *inst) +{ + if (p->loop_stack_array_size < p->loop_stack_depth) { + p->loop_stack_array_size *= 2; + p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int, + p->loop_stack_array_size); + p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int, + p->loop_stack_array_size); + } + + p->loop_stack[p->loop_stack_depth] = inst - p->store; + p->loop_stack_depth++; + p->if_depth_in_loop[p->loop_stack_depth] = 0; +} + +static struct brw_instruction * +get_inner_do_insn(struct brw_compile *p) +{ + return &p->store[p->loop_stack[p->loop_stack_depth - 1]]; +} + /* EU takes the value from the flag register and pushes it onto some * sort of a stack (presumably merging with any flag value already on * the stack). Within an if block, the flags at the top of the stack @@ -937,11 +987,11 @@ brw_IF(struct brw_compile *p, GLuint execute_size) } else if (intel->gen == 6) { brw_set_dest(p, insn, brw_imm_w(0)); insn->bits1.branch_gen6.jump_count = 0; - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); + brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); } else { - brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); + brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); brw_set_src1(p, insn, brw_imm_ud(0)); insn->bits3.break_cont.jip = 0; insn->bits3.break_cont.uip = 0; @@ -957,6 +1007,7 @@ brw_IF(struct brw_compile *p, GLuint execute_size) p->current->header.predicate_control = BRW_PREDICATE_NONE; push_if_stack(p, insn); + p->if_depth_in_loop[p->loop_stack_depth]++; return insn; } @@ -1043,7 +1094,21 @@ patch_IF_ELSE(struct brw_compile *p, { struct intel_context *intel = &p->brw->intel; - assert(!p->single_program_flow); + /* We shouldn't be patching IF and ELSE instructions in single program flow + * mode when gen < 6, because in single program flow mode on those + * platforms, we convert flow control instructions to conditional ADDs that + * operate on IP (see brw_ENDIF). + * + * However, on Gen6, writing to IP doesn't work in single program flow mode + * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may + * not be updated by non-flow control instructions."). And on later + * platforms, there is no significant benefit to converting control flow + * instructions to conditional ADDs. So we do patch IF and ELSE + * instructions in single program flow mode on those platforms. + */ + if (intel->gen < 6) + assert(!p->single_program_flow); + assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); assert(endif_inst != NULL); assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); @@ -1145,26 +1210,50 @@ void brw_ENDIF(struct brw_compile *p) { struct intel_context *intel = &p->brw->intel; - struct brw_instruction *insn; + struct brw_instruction *insn = NULL; struct brw_instruction *else_inst = NULL; struct brw_instruction *if_inst = NULL; + struct brw_instruction *tmp; + bool emit_endif = true; + + /* In single program flow mode, we can express IF and ELSE instructions + * equivalently as ADD instructions that operate on IP. On platforms prior + * to Gen6, flow control instructions cause an implied thread switch, so + * this is a significant savings. + * + * However, on Gen6, writing to IP doesn't work in single program flow mode + * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may + * not be updated by non-flow control instructions."). And on later + * platforms, there is no significant benefit to converting control flow + * instructions to conditional ADDs. So we only do this trick on Gen4 and + * Gen5. + */ + if (intel->gen < 6 && p->single_program_flow) + emit_endif = false; + + /* + * A single next_insn() may change the base adress of instruction store + * memory(p->store), so call it first before referencing the instruction + * store pointer from an index + */ + if (emit_endif) + insn = next_insn(p, BRW_OPCODE_ENDIF); /* Pop the IF and (optional) ELSE instructions from the stack */ - p->if_stack_depth--; - if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) { - else_inst = p->if_stack[p->if_stack_depth]; - p->if_stack_depth--; + p->if_depth_in_loop[p->loop_stack_depth]--; + tmp = pop_if_stack(p); + if (tmp->header.opcode == BRW_OPCODE_ELSE) { + else_inst = tmp; + tmp = pop_if_stack(p); } - if_inst = p->if_stack[p->if_stack_depth]; + if_inst = tmp; - if (p->single_program_flow) { + if (!emit_endif) { /* ENDIF is useless; don't bother emitting it. */ convert_IF_ELSE_to_ADD(p, if_inst, else_inst); return; } - insn = next_insn(p, BRW_OPCODE_ENDIF); - if (intel->gen < 6) { brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); @@ -1196,7 +1285,7 @@ brw_ENDIF(struct brw_compile *p) patch_IF_ELSE(p, if_inst, else_inst, insn); } -struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) +struct brw_instruction *brw_BREAK(struct brw_compile *p) { struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; @@ -1211,7 +1300,7 @@ struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) brw_set_src0(p, insn, brw_ip_reg()); brw_set_src1(p, insn, brw_imm_d(0x0)); insn->bits3.if_else.pad0 = 0; - insn->bits3.if_else.pop_count = pop_count; + insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth]; } insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = BRW_EXECUTE_8; @@ -1219,8 +1308,7 @@ struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) return insn; } -struct brw_instruction *gen6_CONT(struct brw_compile *p, - struct brw_instruction *do_insn) +struct brw_instruction *gen6_CONT(struct brw_compile *p) { struct brw_instruction *insn; @@ -1236,7 +1324,7 @@ struct brw_instruction *gen6_CONT(struct brw_compile *p, return insn; } -struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) +struct brw_instruction *brw_CONT(struct brw_compile *p) { struct brw_instruction *insn; insn = next_insn(p, BRW_OPCODE_CONTINUE); @@ -1247,7 +1335,7 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) insn->header.execution_size = BRW_EXECUTE_8; /* insn->header.mask_control = BRW_MASK_DISABLE; */ insn->bits3.if_else.pad0 = 0; - insn->bits3.if_else.pop_count = pop_count; + insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth]; return insn; } @@ -1272,10 +1360,13 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) struct intel_context *intel = &p->brw->intel; if (intel->gen >= 6 || p->single_program_flow) { + push_loop_stack(p, &p->store[p->nr_insn]); return &p->store[p->nr_insn]; } else { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); + push_loop_stack(p, insn); + /* Override the defaults for this instruction: */ brw_set_dest(p, insn, brw_null_reg()); @@ -1292,13 +1383,40 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) } } +/** + * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE + * instruction here. + * + * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop + * nesting, since it can always just point to the end of the block/current loop. + */ +static void +brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst) +{ + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *do_inst = get_inner_do_insn(p); + struct brw_instruction *inst; + int br = (intel->gen == 5) ? 2 : 1; + + for (inst = while_inst - 1; inst != do_inst; inst--) { + /* If the jump count is != 0, that means that this instruction has already + * been patched because it's part of a loop inside of the one we're + * patching. + */ + if (inst->header.opcode == BRW_OPCODE_BREAK && + inst->bits3.if_else.jump_count == 0) { + inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1); + } else if (inst->header.opcode == BRW_OPCODE_CONTINUE && + inst->bits3.if_else.jump_count == 0) { + inst->bits3.if_else.jump_count = br * (while_inst - inst); + } + } +} - -struct brw_instruction *brw_WHILE(struct brw_compile *p, - struct brw_instruction *do_insn) +struct brw_instruction *brw_WHILE(struct brw_compile *p) { struct intel_context *intel = &p->brw->intel; - struct brw_instruction *insn; + struct brw_instruction *insn, *do_insn; GLuint br = 1; if (intel->gen >= 5) @@ -1306,6 +1424,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, if (intel->gen >= 7) { insn = next_insn(p, BRW_OPCODE_WHILE); + do_insn = get_inner_do_insn(p); brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); @@ -1315,6 +1434,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, insn->header.execution_size = BRW_EXECUTE_8; } else if (intel->gen == 6) { insn = next_insn(p, BRW_OPCODE_WHILE); + do_insn = get_inner_do_insn(p); brw_set_dest(p, insn, brw_imm_w(0)); insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); @@ -1325,6 +1445,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, } else { if (p->single_program_flow) { insn = next_insn(p, BRW_OPCODE_ADD); + do_insn = get_inner_do_insn(p); brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(p, insn, brw_ip_reg()); @@ -1332,6 +1453,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, insn->header.execution_size = BRW_EXECUTE_1; } else { insn = next_insn(p, BRW_OPCODE_WHILE); + do_insn = get_inner_do_insn(p); assert(do_insn->header.opcode == BRW_OPCODE_DO); @@ -1343,22 +1465,25 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); insn->bits3.if_else.pop_count = 0; insn->bits3.if_else.pad0 = 0; + + brw_patch_break_cont(p, insn); } } insn->header.compression_control = BRW_COMPRESSION_NONE; p->current->header.predicate_control = BRW_PREDICATE_NONE; + p->loop_stack_depth--; + return insn; } /* FORWARD JUMPS: */ -void brw_land_fwd_jump(struct brw_compile *p, - struct brw_instruction *jmp_insn) +void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx) { struct intel_context *intel = &p->brw->intel; - struct brw_instruction *landing = &p->store[p->nr_insn]; + struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx]; GLuint jmpi = 1; if (intel->gen >= 5) @@ -1367,7 +1492,7 @@ void brw_land_fwd_jump(struct brw_compile *p, assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); - jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); + jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1); } @@ -1443,11 +1568,14 @@ void brw_math( struct brw_compile *p, assert(src.file == BRW_GENERAL_REGISTER_FILE); assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); - assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + if (intel->gen == 6) + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); - /* Source modifiers are ignored for extended math instructions. */ - assert(!src.negate); - assert(!src.abs); + /* Source modifiers are ignored for extended math instructions on Gen6. */ + if (intel->gen == 6) { + assert(!src.negate); + assert(!src.abs); + } if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || @@ -1507,8 +1635,10 @@ void brw_math2(struct brw_compile *p, assert(src1.file == BRW_GENERAL_REGISTER_FILE); assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); - assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); - assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); + if (intel->gen == 6) { + assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); + } if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || @@ -1520,11 +1650,13 @@ void brw_math2(struct brw_compile *p, assert(src1.type == BRW_REGISTER_TYPE_F); } - /* Source modifiers are ignored for extended math instructions. */ - assert(!src0.negate); - assert(!src0.abs); - assert(!src1.negate); - assert(!src1.abs); + /* Source modifiers are ignored for extended math instructions on Gen6. */ + if (intel->gen == 6) { + assert(!src0.negate); + assert(!src0.abs); + assert(!src1.negate); + assert(!src1.abs); + } /* Math is the same ISA format as other opcodes, except that CondModifier * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. @@ -2079,7 +2211,8 @@ void brw_SAMPLE(struct brw_compile *p, GLuint response_length, GLuint msg_length, GLuint header_present, - GLuint simd_mode) + GLuint simd_mode, + GLuint return_format) { struct intel_context *intel = &p->brw->intel; bool need_stall = 0; @@ -2173,7 +2306,8 @@ void brw_SAMPLE(struct brw_compile *p, response_length, msg_length, header_present, - simd_mode); + simd_mode, + return_format); } if (need_stall) { @@ -2351,3 +2485,42 @@ void brw_ff_sync(struct brw_compile *p, response_length, eot); } + +/** + * Emit the SEND instruction necessary to generate stream output data on Gen6 + * (for transform feedback). + * + * If send_commit_msg is true, this is the last piece of stream output data + * from this thread, so send the data as a committed write. According to the + * Sandy Bridge PRM (volume 2 part 1, section 4.5.1): + * + * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all + * writes are complete by sending the final write as a committed write." + */ +void +brw_svb_write(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + bool send_commit_msg) +{ + struct brw_instruction *insn; + + gen6_resolve_implied_move(p, &src0, msg_reg_nr); + + insn = next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, brw_imm_d(0)); + brw_set_dp_write_message(p, insn, + binding_table_index, + 0, /* msg_control: ignored */ + GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, + 1, /* msg_length */ + true, /* header_present */ + 0, /* last_render_target: ignored */ + send_commit_msg, /* response_length */ + 0, /* end_of_thread */ + send_commit_msg); /* send_commit_msg */ +}