X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_eu_emit.c;h=419b40ba84b7c01765da7b5659405dbf9a3e428d;hb=5b24d69fcd6359dc959ec465c7e77b4626a27e72;hp=785d382a009d8f7e772ba6106cf8a790e8bbe057;hpb=cdcef6cbf4dd80047819e9098e34a3b98bd502a4;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 785d382a009..419b40ba84b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -75,6 +75,8 @@ static void brw_set_dest( struct brw_instruction *insn, else { insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; + /* even ignored in da16, still need to set as '01' */ + insn->bits1.da16.dest_horiz_stride = 1; } } else { @@ -90,6 +92,8 @@ static void brw_set_dest( struct brw_instruction *insn, } else { insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; + /* even ignored in da16, still need to set as '01' */ + insn->bits1.ia16.dest_horiz_stride = 1; } } @@ -99,12 +103,92 @@ static void brw_set_dest( struct brw_instruction *insn, guess_execution_size(insn, dest); } +extern int reg_type_size[]; + +static void +validate_reg(struct brw_instruction *insn, struct brw_reg reg) +{ + int hstride_for_reg[] = {0, 1, 2, 4}; + int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; + int width_for_reg[] = {1, 2, 4, 8, 16}; + int execsize_for_reg[] = {1, 2, 4, 8, 16}; + int width, hstride, vstride, execsize; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + /* 3.3.6: Region Parameters. Restriction: Immediate vectors + * mean the destination has to be 128-bit aligned and the + * destination horiz stride has to be a word. + */ + if (reg.type == BRW_REGISTER_TYPE_V) { + assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * + reg_type_size[insn->bits1.da1.dest_reg_type] == 2); + } + + return; + } + + if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && + reg.file == BRW_ARF_NULL) + return; + + assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg)); + hstride = hstride_for_reg[reg.hstride]; + + if (reg.vstride == 0xf) { + vstride = -1; + } else { + assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg)); + vstride = vstride_for_reg[reg.vstride]; + } + + assert(reg.width >= 0 && reg.width < Elements(width_for_reg)); + width = width_for_reg[reg.width]; + + assert(insn->header.execution_size >= 0 && + insn->header.execution_size < Elements(execsize_for_reg)); + execsize = execsize_for_reg[insn->header.execution_size]; + + /* Restrictions from 3.3.10: Register Region Restrictions. */ + /* 3. */ + assert(execsize >= width); + + /* 4. */ + if (execsize == width && hstride != 0) { + assert(vstride == -1 || vstride == width * hstride); + } + + /* 5. */ + if (execsize == width && hstride == 0) { + /* no restriction on vstride. */ + } + + /* 6. */ + if (width == 1) { + assert(hstride == 0); + } + + /* 7. */ + if (execsize == 1 && width == 1) { + assert(hstride == 0); + assert(vstride == 0); + } + + /* 8. */ + if (vstride == 0 && hstride == 0) { + assert(width == 1); + } + + /* 10. Check destination issues. */ +} + static void brw_set_src0( struct brw_instruction *insn, struct brw_reg reg ) { if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) assert(reg.nr < 128); + validate_reg(insn, reg); + insn->bits1.da1.src0_reg_file = reg.file; insn->bits1.da1.src0_reg_type = reg.type; insn->bits2.da1.src0_abs = reg.abs; @@ -180,6 +264,8 @@ void brw_set_src1( struct brw_instruction *insn, assert(reg.nr < 128); + validate_reg(insn, reg); + insn->bits1.da1.src1_reg_file = reg.file; insn->bits1.da1.src1_reg_type = reg.type; insn->bits3.da1.src1_abs = reg.abs; @@ -280,31 +366,31 @@ static void brw_set_math_message( struct brw_context *brw, } -static void brw_set_ff_sync_message( struct brw_context *brw, - struct brw_instruction *insn, - GLboolean allocate, - GLboolean used, - GLuint msg_length, - GLuint response_length, - GLboolean end_of_thread, - GLboolean complete, - GLuint offset, - GLuint swizzle_control ) +static void brw_set_ff_sync_message(struct brw_context *brw, + struct brw_instruction *insn, + GLboolean allocate, + GLuint response_length, + GLboolean end_of_thread) { + struct intel_context *intel = &brw->intel; brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.urb_gen5.opcode = 1; - insn->bits3.urb_gen5.offset = offset; - insn->bits3.urb_gen5.swizzle_control = swizzle_control; + insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ + insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ insn->bits3.urb_gen5.allocate = allocate; - insn->bits3.urb_gen5.used = used; - insn->bits3.urb_gen5.complete = complete; + insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ insn->bits3.urb_gen5.header_present = 1; - insn->bits3.urb_gen5.response_length = response_length; - insn->bits3.urb_gen5.msg_length = msg_length; + insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */ + insn->bits3.urb_gen5.msg_length = 1; insn->bits3.urb_gen5.end_of_thread = end_of_thread; - insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; - insn->bits2.send_gen5.end_of_thread = end_of_thread; + if (intel->gen >= 6) { + insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; + } else { + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_gen5.end_of_thread = end_of_thread; + } } static void brw_set_urb_message( struct brw_context *brw, @@ -362,20 +448,36 @@ static void brw_set_dp_write_message( struct brw_context *brw, GLuint msg_control, GLuint msg_type, GLuint msg_length, + GLboolean header_present, GLuint pixel_scoreboard_clear, GLuint response_length, - GLuint end_of_thread ) + GLuint end_of_thread, + GLuint send_commit_msg) { struct intel_context *intel = &brw->intel; - brw_set_src1(insn, brw_imm_d(0)); + brw_set_src1(insn, brw_imm_ud(0)); - if (intel->gen == 5) { + if (intel->gen >= 6) { + insn->bits3.dp_render_cache.binding_table_index = binding_table_index; + insn->bits3.dp_render_cache.msg_control = msg_control; + insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_render_cache.msg_type = msg_type; + insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg; + insn->bits3.dp_render_cache.header_present = header_present; + insn->bits3.dp_render_cache.response_length = response_length; + insn->bits3.dp_render_cache.msg_length = msg_length; + insn->bits3.dp_render_cache.end_of_thread = end_of_thread; + insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + /* XXX really need below? */ + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits2.send_gen5.end_of_thread = end_of_thread; + } else if (intel->gen == 5) { insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; insn->bits3.dp_write_gen5.msg_control = msg_control; insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear; insn->bits3.dp_write_gen5.msg_type = msg_type; - insn->bits3.dp_write_gen5.send_commit_msg = 0; - insn->bits3.dp_write_gen5.header_present = 1; + insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; + insn->bits3.dp_write_gen5.header_present = header_present; insn->bits3.dp_write_gen5.response_length = response_length; insn->bits3.dp_write_gen5.msg_length = msg_length; insn->bits3.dp_write_gen5.end_of_thread = end_of_thread; @@ -386,7 +488,7 @@ static void brw_set_dp_write_message( struct brw_context *brw, insn->bits3.dp_write.msg_control = msg_control; insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; insn->bits3.dp_write.msg_type = msg_type; - insn->bits3.dp_write.send_commit_msg = 0; + insn->bits3.dp_write.send_commit_msg = send_commit_msg; insn->bits3.dp_write.response_length = response_length; insn->bits3.dp_write.msg_length = msg_length; insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; @@ -447,7 +549,7 @@ static void brw_set_sampler_message(struct brw_context *brw, assert(eot == 0); brw_set_src1(insn, brw_imm_d(0)); - if (intel->gen == 5) { + if (intel->gen >= 5) { insn->bits3.sampler_gen5.binding_table_index = binding_table_index; insn->bits3.sampler_gen5.sampler = sampler; insn->bits3.sampler_gen5.msg_type = msg_type; @@ -456,8 +558,12 @@ static void brw_set_sampler_message(struct brw_context *brw, insn->bits3.sampler_gen5.response_length = response_length; insn->bits3.sampler_gen5.msg_length = msg_length; insn->bits3.sampler_gen5.end_of_thread = eot; - insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; - insn->bits2.send_gen5.end_of_thread = eot; + if (intel->gen >= 6) + insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER; + else { + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; + insn->bits2.send_gen5.end_of_thread = eot; + } } else if (intel->is_g4x) { insn->bits3.sampler_g4x.binding_table_index = binding_table_index; insn->bits3.sampler_g4x.sampler = sampler; @@ -560,8 +666,6 @@ ALU2(SHL) ALU2(RSR) ALU2(RSL) ALU2(ASR) -ALU2(ADD) -ALU2(MUL) ALU1(FRC) ALU1(RNDD) ALU1(RNDZ) @@ -575,6 +679,63 @@ ALU2(DP2) ALU2(LINE) ALU2(PLN) +struct brw_instruction *brw_ADD(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + /* 6.2.2: add */ + if (src0.type == BRW_REGISTER_TYPE_F || + (src0.file == BRW_IMMEDIATE_VALUE && + src0.type == BRW_REGISTER_TYPE_VF)) { + assert(src1.type != BRW_REGISTER_TYPE_UD); + assert(src1.type != BRW_REGISTER_TYPE_D); + } + + if (src1.type == BRW_REGISTER_TYPE_F || + (src1.file == BRW_IMMEDIATE_VALUE && + src1.type == BRW_REGISTER_TYPE_VF)) { + assert(src0.type != BRW_REGISTER_TYPE_UD); + assert(src0.type != BRW_REGISTER_TYPE_D); + } + + return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); +} + +struct brw_instruction *brw_MUL(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + /* 6.32.38: mul */ + if (src0.type == BRW_REGISTER_TYPE_D || + src0.type == BRW_REGISTER_TYPE_UD || + src1.type == BRW_REGISTER_TYPE_D || + src1.type == BRW_REGISTER_TYPE_UD) { + assert(dest.type != BRW_REGISTER_TYPE_F); + } + + if (src0.type == BRW_REGISTER_TYPE_F || + (src0.file == BRW_IMMEDIATE_VALUE && + src0.type == BRW_REGISTER_TYPE_VF)) { + assert(src1.type != BRW_REGISTER_TYPE_UD); + assert(src1.type != BRW_REGISTER_TYPE_D); + } + + if (src1.type == BRW_REGISTER_TYPE_F || + (src1.file == BRW_IMMEDIATE_VALUE && + src1.type == BRW_REGISTER_TYPE_VF)) { + assert(src0.type != BRW_REGISTER_TYPE_UD); + assert(src0.type != BRW_REGISTER_TYPE_D); + } + + assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || + src0.nr != BRW_ARF_ACCUMULATOR); + assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || + src1.nr != BRW_ARF_ACCUMULATOR); + + return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); +} void brw_NOP(struct brw_compile *p) @@ -626,6 +787,7 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, */ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; if (p->single_program_flow) { @@ -639,9 +801,15 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) /* Override the defaults for this instruction: */ - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); + if (intel->gen < 6) { + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + } else { + brw_set_dest(insn, brw_imm_w(0)); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); + } insn->header.execution_size = execute_size; insn->header.compression_control = BRW_COMPRESSION_NONE; @@ -663,7 +831,9 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, struct brw_instruction *insn; GLuint br = 1; - if (intel->gen == 5) + /* jump count is for 64bit data chunk each, so one 128bit + instruction requires 2 chunks. */ + if (intel->gen >= 5) br = 2; if (p->single_program_flow) { @@ -672,9 +842,15 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, insn = next_insn(p, BRW_OPCODE_ELSE); } - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); + if (intel->gen < 6) { + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + } else { + brw_set_dest(insn, brw_imm_w(0)); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); + } insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = if_insn->header.execution_size; @@ -691,9 +867,13 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, } else { assert(if_insn->header.opcode == BRW_OPCODE_IF); - if_insn->bits3.if_else.jump_count = br * (insn - if_insn); - if_insn->bits3.if_else.pop_count = 0; - if_insn->bits3.if_else.pad0 = 0; + if (intel->gen < 6) { + if_insn->bits3.if_else.jump_count = br * (insn - if_insn); + if_insn->bits3.if_else.pop_count = 0; + if_insn->bits3.if_else.pad0 = 0; + } else { + if_insn->bits1.branch_gen6.jump_count = br * (insn - if_insn + 1); + } } return insn; @@ -705,7 +885,7 @@ void brw_ENDIF(struct brw_compile *p, struct intel_context *intel = &p->brw->intel; GLuint br = 1; - if (intel->gen == 5) + if (intel->gen >= 5) br = 2; if (p->single_program_flow) { @@ -721,9 +901,15 @@ void brw_ENDIF(struct brw_compile *p, } else { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src1(insn, brw_imm_d(0x0)); + if (intel->gen < 6) { + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_d(0x0)); + } else { + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_W)); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); + } insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = patch_insn->header.execution_size; @@ -736,29 +922,46 @@ void brw_ENDIF(struct brw_compile *p, * instruction respectively. */ if (patch_insn->header.opcode == BRW_OPCODE_IF) { - /* Automagically turn it into an IFF: - */ - patch_insn->header.opcode = BRW_OPCODE_IFF; - patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); - patch_insn->bits3.if_else.pop_count = 0; - patch_insn->bits3.if_else.pad0 = 0; - } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { - patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); - patch_insn->bits3.if_else.pop_count = 1; - patch_insn->bits3.if_else.pad0 = 0; + if (intel->gen < 6) { + /* Turn it into an IFF, which means no mask stack operations for + * all-false and jumping past the ENDIF. + */ + patch_insn->header.opcode = BRW_OPCODE_IFF; + patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); + patch_insn->bits3.if_else.pop_count = 0; + patch_insn->bits3.if_else.pad0 = 0; + } else { + /* As of gen6, there is no IFF and IF must point to the ENDIF. */ + patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn); + } } else { - assert(0); + assert(patch_insn->header.opcode == BRW_OPCODE_ELSE); + if (intel->gen < 6) { + /* BRW_OPCODE_ELSE pre-gen6 should point just past the + * matching ENDIF. + */ + patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); + patch_insn->bits3.if_else.pop_count = 1; + patch_insn->bits3.if_else.pad0 = 0; + } else { + /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ + patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn); + } } /* Also pop item off the stack in the endif instruction: */ - insn->bits3.if_else.jump_count = 0; - insn->bits3.if_else.pop_count = 1; - insn->bits3.if_else.pad0 = 0; + if (intel->gen < 6) { + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; + } else { + insn->bits1.branch_gen6.jump_count = 2; + } } } -struct brw_instruction *brw_BREAK(struct brw_compile *p) +struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) { struct brw_instruction *insn; insn = next_insn(p, BRW_OPCODE_BREAK); @@ -769,10 +972,11 @@ struct brw_instruction *brw_BREAK(struct brw_compile *p) insn->header.execution_size = BRW_EXECUTE_8; /* insn->header.mask_control = BRW_MASK_DISABLE; */ insn->bits3.if_else.pad0 = 0; + insn->bits3.if_else.pop_count = pop_count; return insn; } -struct brw_instruction *brw_CONT(struct brw_compile *p) +struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) { struct brw_instruction *insn; insn = next_insn(p, BRW_OPCODE_CONTINUE); @@ -783,6 +987,7 @@ struct brw_instruction *brw_CONT(struct brw_compile *p) insn->header.execution_size = BRW_EXECUTE_8; /* insn->header.mask_control = BRW_MASK_DISABLE; */ insn->bits3.if_else.pad0 = 0; + insn->bits3.if_else.pop_count = pop_count; return insn; } @@ -820,7 +1025,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *insn; GLuint br = 1; - if (intel->gen == 5) + if (intel->gen >= 5) br = 2; if (p->single_program_flow) @@ -864,7 +1069,7 @@ void brw_land_fwd_jump(struct brw_compile *p, struct brw_instruction *landing = &p->store[p->nr_insn]; GLuint jmpi = 1; - if (intel->gen == 5) + if (intel->gen >= 5) jmpi = 2; assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); @@ -906,6 +1111,20 @@ void brw_CMP(struct brw_compile *p, } } +/* Issue 'wait' instruction for n1, host could program MMIO + to wake up thread. */ +void brw_WAIT (struct brw_compile *p) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); + struct brw_reg src = brw_notification_1_reg(); + + brw_set_dest(insn, src); + brw_set_src0(insn, src); + brw_set_src1(insn, brw_null_reg()); + insn->header.execution_size = 0; /* must */ + insn->header.predicate_control = 0; + insn->header.compression_control = 0; +} /*********************************************************************** @@ -959,6 +1178,29 @@ void brw_math( struct brw_compile *p, } } +/** Extended math function, float[8]. + */ +void brw_math2(struct brw_compile *p, + struct brw_reg dest, + GLuint function, + struct brw_reg src0, + struct brw_reg src1) +{ + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); + + assert(intel->gen >= 6); + + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); +} + /** * Extended math function, float[16]. * Use 2 send instructions. @@ -971,10 +1213,25 @@ void brw_math_16( struct brw_compile *p, struct brw_reg src, GLuint precision ) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + if (intel->gen >= 6) { + insn = next_insn(p, BRW_OPCODE_MATH); + + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_src1(insn, brw_null_reg()); + return; + } + /* First instruction: */ brw_push_insn_state(p); @@ -1025,6 +1282,7 @@ void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, GLuint scratch_offset ) { + struct intel_context *intel = &p->brw->intel; GLuint msg_reg_nr = 1; { brw_push_insn_state(p); @@ -1041,13 +1299,32 @@ void brw_dp_WRITE_16( struct brw_compile *p, { GLuint msg_length = 3; - struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + struct brw_reg dest; struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - + int send_commit_msg; + insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; - + + /* Until gen6, writes followed by reads from the same location + * are not guaranteed to be ordered unless write_commit is set. + * If set, then a no-op write is issued to the destination + * register to set a dependency, and a read from the destination + * can be used to ensure the ordering. + * + * For gen6, only writes between different threads need ordering + * protection. Our use of DP writes is all about register + * spilling within a thread. + */ + if (intel->gen >= 6) { + dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); + send_commit_msg = 0; + } else { + dest = brw_uw16_grf(0, 0); + send_commit_msg = 1; + } + brw_set_dest(insn, dest); brw_set_src0(insn, src); @@ -1057,9 +1334,11 @@ void brw_dp_WRITE_16( struct brw_compile *p, BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ msg_length, + GL_TRUE, /* header_present */ 0, /* pixel scoreboard */ - 0, /* response_length */ - 0); /* eot */ + send_commit_msg, /* response_length */ + 0, /* eot */ + send_commit_msg); } } @@ -1100,7 +1379,7 @@ void brw_dp_READ_16( struct brw_compile *p, brw_set_dp_read_message(p->brw, insn, 255, /* binding table index (255=stateless) */ - 3, /* msg_control (3 means 4 Owords) */ + BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1, /* target cache (render/scratch) */ 1, /* msg_length */ @@ -1175,73 +1454,113 @@ void brw_dp_READ_4( struct brw_compile *p, */ void brw_dp_READ_4_vs(struct brw_compile *p, struct brw_reg dest, - GLuint oword, - GLboolean relAddr, - struct brw_reg addrReg, GLuint location, GLuint bind_table_index) { + struct brw_instruction *insn; GLuint msg_reg_nr = 1; + struct brw_reg b; - assert(oword < 2); /* printf("vs const read msg, location %u, msg_reg_nr %d\n", location, msg_reg_nr); */ /* Setup MRF[1] with location/offset into const buffer */ - { - struct brw_reg b; + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - /*brw_set_access_mode(p, BRW_ALIGN_16);*/ + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /*b = get_element_ud(b, 2);*/ + brw_MOV(p, b, brw_imm_ud(location)); - /* XXX I think we're setting all the dwords of MRF[1] to 'location'. - * when the docs say only dword[2] should be set. Hmmm. But it works. - */ - b = brw_message_reg(msg_reg_nr); - b = retype(b, BRW_REGISTER_TYPE_UD); - /*b = get_element_ud(b, 2);*/ - if (relAddr) { - brw_ADD(p, b, addrReg, brw_imm_ud(location)); - } - else { - brw_MOV(p, b, brw_imm_ud(location)); - } + brw_pop_insn_state(p); - brw_pop_insn_state(p); - } + insn = next_insn(p, BRW_OPCODE_SEND); - { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - - insn->header.predicate_control = BRW_PREDICATE_NONE; - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditionalmod = msg_reg_nr; - insn->header.mask_control = BRW_MASK_DISABLE; - /*insn->header.access_mode = BRW_ALIGN_16;*/ - - brw_set_dest(insn, dest); - brw_set_src0(insn, brw_null_reg()); + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; - brw_set_dp_read_message(p->brw, - insn, - bind_table_index, - oword, /* 0 = lower Oword, 1 = upper Oword */ - BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ - 0, /* source cache = data cache */ - 1, /* msg_length */ - 1, /* response_length (1 Oword) */ - 0); /* eot */ - } + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_null_reg()); + + brw_set_dp_read_message(p->brw, + insn, + bind_table_index, + 0, + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ +} + +/** + * Read a float[4] constant per vertex from VS constant buffer, with + * relative addressing. + */ +void brw_dp_READ_4_vs_relative(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg addr_reg, + GLuint offset, + GLuint bind_table_index) +{ + struct intel_context *intel = &p->brw->intel; + int msg_type; + + /* Setup MRF[1] with offset into const buffer */ + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* M1.0 is block offset 0, M1.4 is block offset 1, all other + * fields ignored. + */ + brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), + addr_reg, brw_imm_d(offset)); + brw_pop_insn_state(p); + + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = 0; + insn->header.mask_control = BRW_MASK_DISABLE; + + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_vec8_grf(0, 0)); + + if (intel->gen == 6) + msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + else if (intel->gen == 5 || intel->is_g4x) + msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + else + msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + + brw_set_dp_read_message(p->brw, + insn, + bind_table_index, + BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, + msg_type, + 0, /* source cache = data cache */ + 2, /* msg_length */ + 1, /* response_length */ + 0); /* eot */ } void brw_fb_WRITE(struct brw_compile *p, + int dispatch_width, struct brw_reg dest, GLuint msg_reg_nr, struct brw_reg src0, @@ -1250,23 +1569,47 @@ void brw_fb_WRITE(struct brw_compile *p, GLuint response_length, GLboolean eot) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn; + GLuint msg_control, msg_type; + GLboolean header_present = GL_TRUE; + + insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditionalmod = msg_reg_nr; - + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (intel->gen >= 6) { + if (msg_length == 4) + header_present = GL_FALSE; + + /* headerless version, just submit color payload */ + src0 = brw_message_reg(msg_reg_nr); + + msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6; + } else { + insn->header.destreg__conditionalmod = msg_reg_nr; + + msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + } + + if (dispatch_width == 16) + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; + else + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; + brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_dp_write_message(p->brw, insn, binding_table_index, - BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ - BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ + msg_control, + msg_type, msg_length, + header_present, 1, /* pixel scoreboard */ - response_length, - eot); + response_length, + eot, + 0 /* send_commit_msg */); } @@ -1289,6 +1632,7 @@ void brw_SAMPLE(struct brw_compile *p, GLuint header_present, GLuint simd_mode) { + struct intel_context *intel = &p->brw->intel; GLboolean need_stall = 0; if (writemask == 0) { @@ -1360,11 +1704,25 @@ void brw_SAMPLE(struct brw_compile *p, } { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + struct brw_instruction *insn; + /* Sandybridge doesn't have the implied move for SENDs, + * and the first message register index comes from src0. + */ + if (intel->gen >= 6) { + brw_push_insn_state(p); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + /* m1 contains header? */ + brw_MOV(p, brw_message_reg(msg_reg_nr), src0); + brw_pop_insn_state(p); + src0 = brw_message_reg(msg_reg_nr); + } + + insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditionalmod = msg_reg_nr; + if (intel->gen < 6) + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); @@ -1451,32 +1809,34 @@ void brw_ff_sync(struct brw_compile *p, GLuint msg_reg_nr, struct brw_reg src0, GLboolean allocate, - GLboolean used, - GLuint msg_length, GLuint response_length, - GLboolean eot, - GLboolean writes_complete, - GLuint offset, - GLuint swizzle) + GLboolean eot) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn; - assert(msg_length < 16); + /* Sandybridge doesn't have the implied move for SENDs, + * and the first message register index comes from src0. + */ + if (intel->gen >= 6) { + brw_push_insn_state(p); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_MOV(p, brw_message_reg(msg_reg_nr), src0); + brw_pop_insn_state(p); + src0 = brw_message_reg(msg_reg_nr); + } + insn = next_insn(p, BRW_OPCODE_SEND); brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); - insn->header.destreg__conditionalmod = msg_reg_nr; + if (intel->gen < 6) + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_ff_sync_message(p->brw, - insn, - allocate, - used, - msg_length, - response_length, - eot, - writes_complete, - offset, - swizzle); + insn, + allocate, + response_length, + eot); }