X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_eu_emit.c;h=637fd074ff152d816baec39862170974e8021bb5;hb=31f0967fb50101437d2568e9ab9640ffbcbf7ef9;hp=2ffd2051a0e4ba0585b507840c3d4fee49c1b859;hpb=61c4702489fa1694892c5ce90ccf65a5094df3e7;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 2ffd2051a0e..637fd074ff1 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -44,7 +44,7 @@ * explicit move; it should be called before emitting a SEND instruction. */ void -gen6_resolve_implied_move(struct brw_compile *p, +gen6_resolve_implied_move(struct brw_codegen *p, struct brw_reg *src, unsigned msg_reg_nr) { @@ -68,7 +68,7 @@ gen6_resolve_implied_move(struct brw_compile *p, } static void -gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) +gen7_convert_mrf_to_grf(struct brw_codegen *p, struct brw_reg *reg) { /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"): * "The send with EOT should use register space R112-R127 for . This is @@ -95,7 +95,7 @@ brw_reg_type_to_hw_type(const struct brw_device_info *devinfo, enum brw_reg_type type, unsigned file) { if (file == BRW_IMMEDIATE_VALUE) { - const static int imm_hw_types[] = { + static const int imm_hw_types[] = { [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD, [BRW_REGISTER_TYPE_D] = BRW_HW_REG_TYPE_D, [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW, @@ -117,7 +117,7 @@ brw_reg_type_to_hw_type(const struct brw_device_info *devinfo, return imm_hw_types[type]; } else { /* Non-immediate registers */ - const static int hw_types[] = { + static const int hw_types[] = { [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD, [BRW_REGISTER_TYPE_D] = BRW_HW_REG_TYPE_D, [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW, @@ -142,7 +142,7 @@ brw_reg_type_to_hw_type(const struct brw_device_info *devinfo, } void -brw_set_dest(struct brw_compile *p, brw_inst *inst, struct brw_reg dest) +brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest) { const struct brw_device_info *devinfo = p->devinfo; @@ -296,7 +296,7 @@ is_compactable_immediate(unsigned imm) } void -brw_set_src0(struct brw_compile *p, brw_inst *inst, struct brw_reg reg) +brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) { const struct brw_device_info *devinfo = p->devinfo; @@ -436,7 +436,7 @@ brw_set_src0(struct brw_compile *p, brw_inst *inst, struct brw_reg reg) void -brw_set_src1(struct brw_compile *p, brw_inst *inst, struct brw_reg reg) +brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) { const struct brw_device_info *devinfo = p->devinfo; @@ -515,7 +515,7 @@ brw_set_src1(struct brw_compile *p, brw_inst *inst, struct brw_reg reg) * choose not to fill in irrelevant bits; they will be zero. */ static void -brw_set_message_descriptor(struct brw_compile *p, +brw_set_message_descriptor(struct brw_codegen *p, brw_inst *inst, enum brw_message_target sfid, unsigned msg_length, @@ -548,7 +548,7 @@ brw_set_message_descriptor(struct brw_compile *p, } } -static void brw_set_math_message( struct brw_compile *p, +static void brw_set_math_message( struct brw_codegen *p, brw_inst *inst, unsigned function, unsigned integer_type, @@ -595,7 +595,7 @@ static void brw_set_math_message( struct brw_compile *p, } -static void brw_set_ff_sync_message(struct brw_compile *p, +static void brw_set_ff_sync_message(struct brw_codegen *p, brw_inst *insn, bool allocate, unsigned response_length, @@ -614,7 +614,7 @@ static void brw_set_ff_sync_message(struct brw_compile *p, brw_inst_set_urb_complete(devinfo, insn, 0); } -static void brw_set_urb_message( struct brw_compile *p, +static void brw_set_urb_message( struct brw_codegen *p, brw_inst *insn, enum brw_urb_write_flags flags, unsigned msg_length, @@ -656,7 +656,7 @@ static void brw_set_urb_message( struct brw_compile *p, } void -brw_set_dp_write_message(struct brw_compile *p, +brw_set_dp_write_message(struct brw_codegen *p, brw_inst *insn, unsigned binding_table_index, unsigned msg_control, @@ -697,7 +697,7 @@ brw_set_dp_write_message(struct brw_compile *p, } void -brw_set_dp_read_message(struct brw_compile *p, +brw_set_dp_read_message(struct brw_codegen *p, brw_inst *insn, unsigned binding_table_index, unsigned msg_control, @@ -732,7 +732,7 @@ brw_set_dp_read_message(struct brw_compile *p, } void -brw_set_sampler_message(struct brw_compile *p, +brw_set_sampler_message(struct brw_codegen *p, brw_inst *inst, unsigned binding_table_index, unsigned sampler, @@ -759,7 +759,7 @@ brw_set_sampler_message(struct brw_compile *p, } static void -gen7_set_dp_scratch_message(struct brw_compile *p, +gen7_set_dp_scratch_message(struct brw_codegen *p, brw_inst *inst, bool write, bool dword, @@ -785,7 +785,7 @@ gen7_set_dp_scratch_message(struct brw_compile *p, #define next_insn brw_next_insn brw_inst * -brw_next_insn(struct brw_compile *p, unsigned opcode) +brw_next_insn(struct brw_codegen *p, unsigned opcode) { const struct brw_device_info *devinfo = p->devinfo; brw_inst *insn; @@ -804,7 +804,7 @@ brw_next_insn(struct brw_compile *p, unsigned opcode) } static brw_inst * -brw_alu1(struct brw_compile *p, unsigned opcode, +brw_alu1(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, struct brw_reg src) { brw_inst *insn = next_insn(p, opcode); @@ -814,7 +814,7 @@ brw_alu1(struct brw_compile *p, unsigned opcode, } static brw_inst * -brw_alu2(struct brw_compile *p, unsigned opcode, +brw_alu2(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, struct brw_reg src0, struct brw_reg src1) { brw_inst *insn = next_insn(p, opcode); @@ -836,7 +836,7 @@ get_3src_subreg_nr(struct brw_reg reg) } static brw_inst * -brw_alu3(struct brw_compile *p, unsigned opcode, struct brw_reg dest, +brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, struct brw_reg src0, struct brw_reg src1, struct brw_reg src2) { const struct brw_device_info *devinfo = p->devinfo; @@ -914,6 +914,8 @@ brw_alu3(struct brw_compile *p, unsigned opcode, struct brw_reg dest, brw_inst_set_3src_src_type(devinfo, inst, BRW_3SRC_TYPE_UD); brw_inst_set_3src_dst_type(devinfo, inst, BRW_3SRC_TYPE_UD); break; + default: + unreachable("not reached"); } } @@ -925,7 +927,7 @@ brw_alu3(struct brw_compile *p, unsigned opcode, struct brw_reg dest, * Convenience routines. */ #define ALU1(OP) \ -brw_inst *brw_##OP(struct brw_compile *p, \ +brw_inst *brw_##OP(struct brw_codegen *p, \ struct brw_reg dest, \ struct brw_reg src0) \ { \ @@ -933,7 +935,7 @@ brw_inst *brw_##OP(struct brw_compile *p, \ } #define ALU2(OP) \ -brw_inst *brw_##OP(struct brw_compile *p, \ +brw_inst *brw_##OP(struct brw_codegen *p, \ struct brw_reg dest, \ struct brw_reg src0, \ struct brw_reg src1) \ @@ -942,7 +944,7 @@ brw_inst *brw_##OP(struct brw_compile *p, \ } #define ALU3(OP) \ -brw_inst *brw_##OP(struct brw_compile *p, \ +brw_inst *brw_##OP(struct brw_codegen *p, \ struct brw_reg dest, \ struct brw_reg src0, \ struct brw_reg src1, \ @@ -952,7 +954,7 @@ brw_inst *brw_##OP(struct brw_compile *p, \ } #define ALU3F(OP) \ -brw_inst *brw_##OP(struct brw_compile *p, \ +brw_inst *brw_##OP(struct brw_codegen *p, \ struct brw_reg dest, \ struct brw_reg src0, \ struct brw_reg src1, \ @@ -973,7 +975,7 @@ brw_inst *brw_##OP(struct brw_compile *p, \ * Sandybridge and later appear to round correctly without an ADD. */ #define ROUND(OP) \ -void brw_##OP(struct brw_compile *p, \ +void brw_##OP(struct brw_codegen *p, \ struct brw_reg dest, \ struct brw_reg src) \ { \ @@ -1027,7 +1029,7 @@ ROUND(RNDE) brw_inst * -brw_ADD(struct brw_compile *p, struct brw_reg dest, +brw_ADD(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0, struct brw_reg src1) { /* 6.2.2: add */ @@ -1049,7 +1051,7 @@ brw_ADD(struct brw_compile *p, struct brw_reg dest, } brw_inst * -brw_AVG(struct brw_compile *p, struct brw_reg dest, +brw_AVG(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0, struct brw_reg src1) { assert(dest.type == src0.type); @@ -1070,7 +1072,7 @@ brw_AVG(struct brw_compile *p, struct brw_reg dest, } brw_inst * -brw_MUL(struct brw_compile *p, struct brw_reg dest, +brw_MUL(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0, struct brw_reg src1) { /* 6.32.38: mul */ @@ -1104,7 +1106,7 @@ brw_MUL(struct brw_compile *p, struct brw_reg dest, } brw_inst * -brw_LINE(struct brw_compile *p, struct brw_reg dest, +brw_LINE(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0, struct brw_reg src1) { src0.vstride = BRW_VERTICAL_STRIDE_0; @@ -1114,7 +1116,7 @@ brw_LINE(struct brw_compile *p, struct brw_reg dest, } brw_inst * -brw_PLN(struct brw_compile *p, struct brw_reg dest, +brw_PLN(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0, struct brw_reg src1) { src0.vstride = BRW_VERTICAL_STRIDE_0; @@ -1127,7 +1129,7 @@ brw_PLN(struct brw_compile *p, struct brw_reg dest, } brw_inst * -brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src) +brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src) { const struct brw_device_info *devinfo = p->devinfo; const bool align16 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_16; @@ -1174,7 +1176,7 @@ brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src) } brw_inst * -brw_F16TO32(struct brw_compile *p, struct brw_reg dst, struct brw_reg src) +brw_F16TO32(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src) { const struct brw_device_info *devinfo = p->devinfo; bool align16 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_16; @@ -1205,7 +1207,7 @@ brw_F16TO32(struct brw_compile *p, struct brw_reg dst, struct brw_reg src) } -void brw_NOP(struct brw_compile *p) +void brw_NOP(struct brw_codegen *p) { brw_inst *insn = next_insn(p, BRW_OPCODE_NOP); brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); @@ -1222,7 +1224,7 @@ void brw_NOP(struct brw_compile *p) */ brw_inst * -brw_JMPI(struct brw_compile *p, struct brw_reg index, +brw_JMPI(struct brw_codegen *p, struct brw_reg index, unsigned predicate_control) { const struct brw_device_info *devinfo = p->devinfo; @@ -1238,7 +1240,7 @@ brw_JMPI(struct brw_compile *p, struct brw_reg index, } static void -push_if_stack(struct brw_compile *p, brw_inst *inst) +push_if_stack(struct brw_codegen *p, brw_inst *inst) { p->if_stack[p->if_stack_depth] = inst - p->store; @@ -1251,14 +1253,14 @@ push_if_stack(struct brw_compile *p, brw_inst *inst) } static brw_inst * -pop_if_stack(struct brw_compile *p) +pop_if_stack(struct brw_codegen *p) { p->if_stack_depth--; return &p->store[p->if_stack[p->if_stack_depth]]; } static void -push_loop_stack(struct brw_compile *p, brw_inst *inst) +push_loop_stack(struct brw_codegen *p, brw_inst *inst) { if (p->loop_stack_array_size < p->loop_stack_depth) { p->loop_stack_array_size *= 2; @@ -1274,7 +1276,7 @@ push_loop_stack(struct brw_compile *p, brw_inst *inst) } static brw_inst * -get_inner_do_insn(struct brw_compile *p) +get_inner_do_insn(struct brw_codegen *p) { return &p->store[p->loop_stack[p->loop_stack_depth - 1]]; } @@ -1287,13 +1289,13 @@ get_inner_do_insn(struct brw_compile *p) * * When the matching 'else' instruction is reached (presumably by * countdown of the instruction count patched in by our ELSE/ENDIF - * functions), the relevent flags are inverted. + * functions), the relevant flags are inverted. * * When the matching 'endif' instruction is reached, the flags are * popped off. If the stack is now empty, normal execution resumes. */ brw_inst * -brw_IF(struct brw_compile *p, unsigned execute_size) +brw_IF(struct brw_codegen *p, unsigned execute_size) { const struct brw_device_info *devinfo = p->devinfo; brw_inst *insn; @@ -1340,7 +1342,7 @@ brw_IF(struct brw_compile *p, unsigned execute_size) * embedded comparison (conditional modifier). It is not used on gen7. */ brw_inst * -gen6_IF(struct brw_compile *p, enum brw_conditional_mod conditional, +gen6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional, struct brw_reg src0, struct brw_reg src1) { const struct brw_device_info *devinfo = p->devinfo; @@ -1367,7 +1369,7 @@ gen6_IF(struct brw_compile *p, enum brw_conditional_mod conditional, * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. */ static void -convert_IF_ELSE_to_ADD(struct brw_compile *p, +convert_IF_ELSE_to_ADD(struct brw_codegen *p, brw_inst *if_inst, brw_inst *else_inst) { const struct brw_device_info *devinfo = p->devinfo; @@ -1408,7 +1410,7 @@ convert_IF_ELSE_to_ADD(struct brw_compile *p, * Patch IF and ELSE instructions with appropriate jump targets. */ static void -patch_IF_ELSE(struct brw_compile *p, +patch_IF_ELSE(struct brw_codegen *p, brw_inst *if_inst, brw_inst *else_inst, brw_inst *endif_inst) { const struct brw_device_info *devinfo = p->devinfo; @@ -1496,7 +1498,7 @@ patch_IF_ELSE(struct brw_compile *p, } void -brw_ELSE(struct brw_compile *p) +brw_ELSE(struct brw_codegen *p) { const struct brw_device_info *devinfo = p->devinfo; brw_inst *insn; @@ -1534,7 +1536,7 @@ brw_ELSE(struct brw_compile *p) } void -brw_ENDIF(struct brw_compile *p) +brw_ENDIF(struct brw_codegen *p) { const struct brw_device_info *devinfo = p->devinfo; brw_inst *insn = NULL; @@ -1559,7 +1561,7 @@ brw_ENDIF(struct brw_compile *p) emit_endif = false; /* - * A single next_insn() may change the base adress of instruction store + * A single next_insn() may change the base address of instruction store * memory(p->store), so call it first before referencing the instruction * store pointer from an index */ @@ -1582,8 +1584,8 @@ brw_ENDIF(struct brw_compile *p) } if (devinfo->gen < 6) { - brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(p, insn, brw_imm_d(0x0)); } else if (devinfo->gen == 6) { brw_set_dest(p, insn, brw_imm_w(0)); @@ -1615,7 +1617,7 @@ brw_ENDIF(struct brw_compile *p) } brw_inst * -brw_BREAK(struct brw_compile *p) +brw_BREAK(struct brw_codegen *p) { const struct brw_device_info *devinfo = p->devinfo; brw_inst *insn; @@ -1643,7 +1645,7 @@ brw_BREAK(struct brw_compile *p) } brw_inst * -brw_CONT(struct brw_compile *p) +brw_CONT(struct brw_codegen *p) { const struct brw_device_info *devinfo = p->devinfo; brw_inst *insn; @@ -1668,7 +1670,7 @@ brw_CONT(struct brw_compile *p) } brw_inst * -gen6_HALT(struct brw_compile *p) +gen6_HALT(struct brw_codegen *p) { const struct brw_device_info *devinfo = p->devinfo; brw_inst *insn; @@ -1708,7 +1710,7 @@ gen6_HALT(struct brw_compile *p) * just points back to the first instruction of the loop. */ brw_inst * -brw_DO(struct brw_compile *p, unsigned execute_size) +brw_DO(struct brw_codegen *p, unsigned execute_size) { const struct brw_device_info *devinfo = p->devinfo; @@ -1742,7 +1744,7 @@ brw_DO(struct brw_compile *p, unsigned execute_size) * nesting, since it can always just point to the end of the block/current loop. */ static void -brw_patch_break_cont(struct brw_compile *p, brw_inst *while_inst) +brw_patch_break_cont(struct brw_codegen *p, brw_inst *while_inst) { const struct brw_device_info *devinfo = p->devinfo; brw_inst *do_inst = get_inner_do_insn(p); @@ -1767,7 +1769,7 @@ brw_patch_break_cont(struct brw_compile *p, brw_inst *while_inst) } brw_inst * -brw_WHILE(struct brw_compile *p) +brw_WHILE(struct brw_codegen *p) { const struct brw_device_info *devinfo = p->devinfo; brw_inst *insn, *do_insn; @@ -1830,7 +1832,7 @@ brw_WHILE(struct brw_compile *p) /* FORWARD JUMPS: */ -void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx) +void brw_land_fwd_jump(struct brw_codegen *p, int jmp_insn_idx) { const struct brw_device_info *devinfo = p->devinfo; brw_inst *jmp_insn = &p->store[jmp_insn_idx]; @@ -1850,7 +1852,7 @@ void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx) * instruction should populate the flag register. It might be simpler * just to use the flag reg for most WM tasks? */ -void brw_CMP(struct brw_compile *p, +void brw_CMP(struct brw_codegen *p, struct brw_reg dest, unsigned conditional, struct brw_reg src0, @@ -1885,7 +1887,7 @@ void brw_CMP(struct brw_compile *p, /** Extended math function, float[8]. */ -void gen4_math(struct brw_compile *p, +void gen4_math(struct brw_codegen *p, struct brw_reg dest, unsigned function, unsigned msg_reg_nr, @@ -1919,7 +1921,7 @@ void gen4_math(struct brw_compile *p, data_type); } -void gen6_math(struct brw_compile *p, +void gen6_math(struct brw_codegen *p, struct brw_reg dest, unsigned function, struct brw_reg src0, @@ -1983,7 +1985,7 @@ void gen6_math(struct brw_compile *p, * The offset must be aligned to oword size (16 bytes). Used for * register spilling. */ -void brw_oword_block_write_scratch(struct brw_compile *p, +void brw_oword_block_write_scratch(struct brw_codegen *p, struct brw_reg mrf, int num_regs, unsigned offset) @@ -2096,7 +2098,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p, * spilling. */ void -brw_oword_block_read_scratch(struct brw_compile *p, +brw_oword_block_read_scratch(struct brw_codegen *p, struct brw_reg dest, struct brw_reg mrf, int num_regs, @@ -2172,7 +2174,7 @@ brw_oword_block_read_scratch(struct brw_compile *p, } void -gen7_block_read_scratch(struct brw_compile *p, +gen7_block_read_scratch(struct brw_codegen *p, struct brw_reg dest, int num_regs, unsigned offset) @@ -2212,7 +2214,7 @@ gen7_block_read_scratch(struct brw_compile *p, * Location (in buffer) should be a multiple of 16. * Used for fetching shader constants. */ -void brw_oword_block_read(struct brw_compile *p, +void brw_oword_block_read(struct brw_codegen *p, struct brw_reg dest, struct brw_reg mrf, uint32_t offset, @@ -2268,7 +2270,7 @@ void brw_oword_block_read(struct brw_compile *p, } -void brw_fb_WRITE(struct brw_compile *p, +void brw_fb_WRITE(struct brw_codegen *p, int dispatch_width, struct brw_reg payload, struct brw_reg implied_header, @@ -2331,7 +2333,7 @@ void brw_fb_WRITE(struct brw_compile *p, * Note: the msg_type plus msg_length values determine exactly what kind * of sampling operation is performed. See volume 4, page 161 of docs. */ -void brw_SAMPLE(struct brw_compile *p, +void brw_SAMPLE(struct brw_codegen *p, struct brw_reg dest, unsigned msg_reg_nr, struct brw_reg src0, @@ -2387,7 +2389,7 @@ void brw_SAMPLE(struct brw_compile *p, /* Adjust the message header's sampler state pointer to * select the correct group of 16 samplers. */ -void brw_adjust_sampler_state_pointer(struct brw_compile *p, +void brw_adjust_sampler_state_pointer(struct brw_codegen *p, struct brw_reg header, struct brw_reg sampler_index) { @@ -2434,7 +2436,7 @@ void brw_adjust_sampler_state_pointer(struct brw_compile *p, * using bitmasks and macros for this, in the old style. Or perhaps * just having the caller instantiate the fields in dword3 itself. */ -void brw_urb_WRITE(struct brw_compile *p, +void brw_urb_WRITE(struct brw_codegen *p, struct brw_reg dest, unsigned msg_reg_nr, struct brw_reg src0, @@ -2482,7 +2484,7 @@ void brw_urb_WRITE(struct brw_compile *p, } struct brw_inst * -brw_send_indirect_message(struct brw_compile *p, +brw_send_indirect_message(struct brw_codegen *p, unsigned sfid, struct brw_reg dst, struct brw_reg payload, @@ -2524,8 +2526,50 @@ brw_send_indirect_message(struct brw_compile *p, return setup; } +static struct brw_inst * +brw_send_indirect_surface_message(struct brw_codegen *p, + unsigned sfid, + struct brw_reg dst, + struct brw_reg payload, + struct brw_reg surface, + unsigned message_len, + unsigned response_len, + bool header_present) +{ + const struct brw_device_info *devinfo = p->devinfo; + struct brw_inst *insn; + + if (surface.file != BRW_IMMEDIATE_VALUE) { + struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); + + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + + /* Mask out invalid bits from the surface index to avoid hangs e.g. when + * some surface array is accessed out of bounds. + */ + insn = brw_AND(p, addr, + suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)), + BRW_GET_SWZ(surface.dw1.bits.swizzle, 0)), + brw_imm_ud(0xff)); + + brw_pop_insn_state(p); + + surface = addr; + } + + insn = brw_send_indirect_message(p, sfid, dst, payload, surface); + brw_inst_set_mlen(devinfo, insn, message_len); + brw_inst_set_rlen(devinfo, insn, response_len); + brw_inst_set_header_present(devinfo, insn, header_present); + + return insn; +} + static int -brw_find_next_block_end(struct brw_compile *p, int start_offset) +brw_find_next_block_end(struct brw_codegen *p, int start_offset) { int offset; void *store = p->store; @@ -2553,7 +2597,7 @@ brw_find_next_block_end(struct brw_compile *p, int start_offset) * instruction. */ static int -brw_find_loop_end(struct brw_compile *p, int start_offset) +brw_find_loop_end(struct brw_codegen *p, int start_offset) { const struct brw_device_info *devinfo = p->devinfo; int offset; @@ -2585,7 +2629,7 @@ brw_find_loop_end(struct brw_compile *p, int start_offset) * BREAK, CONT, and HALT instructions to their correct locations. */ void -brw_set_uip_jip(struct brw_compile *p) +brw_set_uip_jip(struct brw_codegen *p) { const struct brw_device_info *devinfo = p->devinfo; int offset; @@ -2662,7 +2706,7 @@ brw_set_uip_jip(struct brw_compile *p) } } -void brw_ff_sync(struct brw_compile *p, +void brw_ff_sync(struct brw_codegen *p, struct brw_reg dest, unsigned msg_reg_nr, struct brw_reg src0, @@ -2702,7 +2746,7 @@ void brw_ff_sync(struct brw_compile *p, * writes are complete by sending the final write as a committed write." */ void -brw_svb_write(struct brw_compile *p, +brw_svb_write(struct brw_codegen *p, struct brw_reg dest, unsigned msg_reg_nr, struct brw_reg src0, @@ -2730,7 +2774,7 @@ brw_svb_write(struct brw_compile *p, } static unsigned -brw_surface_payload_size(struct brw_compile *p, +brw_surface_payload_size(struct brw_codegen *p, unsigned num_channels, bool has_simd4x2, bool has_simd16) @@ -2744,28 +2788,19 @@ brw_surface_payload_size(struct brw_compile *p, } static void -brw_set_dp_untyped_atomic_message(struct brw_compile *p, +brw_set_dp_untyped_atomic_message(struct brw_codegen *p, brw_inst *insn, unsigned atomic_op, - unsigned bind_table_index, - unsigned msg_length, - unsigned response_length, - bool header_present) + bool response_expected) { const struct brw_device_info *devinfo = p->devinfo; - unsigned msg_control = atomic_op | /* Atomic Operation Type: BRW_AOP_* */ - (response_length ? 1 << 5 : 0); /* Return data expected */ + (response_expected ? 1 << 5 : 0); /* Return data expected */ if (devinfo->gen >= 8 || devinfo->is_haswell) { - brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1, - msg_length, response_length, - header_present, false); - - - if (brw_inst_access_mode(devinfo, insn) == BRW_ALIGN_1) { - if (brw_inst_exec_size(devinfo, insn) != BRW_EXECUTE_16) + if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { + if (!p->compressed) msg_control |= 1 << 4; /* SIMD8 mode */ brw_inst_set_dp_msg_type(devinfo, insn, @@ -2775,30 +2810,29 @@ brw_set_dp_untyped_atomic_message(struct brw_compile *p, HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2); } } else { - brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, - msg_length, response_length, - header_present, false); - - brw_inst_set_dp_msg_type(devinfo, insn, GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP); + brw_inst_set_dp_msg_type(devinfo, insn, + GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP); - if (brw_inst_exec_size(devinfo, insn) != BRW_EXECUTE_16) + if (!p->compressed) msg_control |= 1 << 4; /* SIMD8 mode */ } - brw_inst_set_binding_table_index(devinfo, insn, bind_table_index); brw_inst_set_dp_msg_control(devinfo, insn, msg_control); } void -brw_untyped_atomic(struct brw_compile *p, - struct brw_reg dest, +brw_untyped_atomic(struct brw_codegen *p, + struct brw_reg dst, struct brw_reg payload, + struct brw_reg surface, unsigned atomic_op, - unsigned bind_table_index, unsigned msg_length, bool response_expected) { const struct brw_device_info *devinfo = p->devinfo; + const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? + HSW_SFID_DATAPORT_DATA_CACHE_1 : + GEN7_SFID_DATAPORT_DATA_CACHE); const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1; /* Mask out unused components -- This is especially important in Align16 * mode on generations that don't have native support for SIMD4x2 atomics, @@ -2807,83 +2841,353 @@ brw_untyped_atomic(struct brw_compile *p, * uninitialized Y, Z and W coordinates of the payload. */ const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X; - brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND); - - brw_set_dest(p, insn, retype(brw_writemask(dest, mask), - BRW_REGISTER_TYPE_UD)); - brw_set_src0(p, insn, retype(payload, BRW_REGISTER_TYPE_UD)); - brw_set_src1(p, insn, brw_imm_d(0)); - brw_set_dp_untyped_atomic_message( - p, insn, atomic_op, bind_table_index, msg_length, + struct brw_inst *insn = brw_send_indirect_surface_message( + p, sfid, brw_writemask(dst, mask), payload, surface, msg_length, brw_surface_payload_size(p, response_expected, devinfo->gen >= 8 || devinfo->is_haswell, true), align1); + + brw_set_dp_untyped_atomic_message( + p, insn, atomic_op, response_expected); +} + +static void +brw_set_dp_untyped_surface_read_message(struct brw_codegen *p, + struct brw_inst *insn, + unsigned num_channels) +{ + const struct brw_device_info *devinfo = p->devinfo; + /* Set mask of 32-bit channels to drop. */ + unsigned msg_control = 0xf & (0xf << num_channels); + + if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { + if (p->compressed) + msg_control |= 1 << 4; /* SIMD16 mode */ + else + msg_control |= 2 << 4; /* SIMD8 mode */ + } + + brw_inst_set_dp_msg_type(devinfo, insn, + (devinfo->gen >= 8 || devinfo->is_haswell ? + HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ : + GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ)); + brw_inst_set_dp_msg_control(devinfo, insn, msg_control); +} + +void +brw_untyped_surface_read(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg payload, + struct brw_reg surface, + unsigned msg_length, + unsigned num_channels) +{ + const struct brw_device_info *devinfo = p->devinfo; + const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? + HSW_SFID_DATAPORT_DATA_CACHE_1 : + GEN7_SFID_DATAPORT_DATA_CACHE); + const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1); + struct brw_inst *insn = brw_send_indirect_surface_message( + p, sfid, dst, payload, surface, msg_length, + brw_surface_payload_size(p, num_channels, true, true), + align1); + + brw_set_dp_untyped_surface_read_message( + p, insn, num_channels); +} + +static void +brw_set_dp_untyped_surface_write_message(struct brw_codegen *p, + struct brw_inst *insn, + unsigned num_channels) +{ + const struct brw_device_info *devinfo = p->devinfo; + /* Set mask of 32-bit channels to drop. */ + unsigned msg_control = 0xf & (0xf << num_channels); + + if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { + if (p->compressed) + msg_control |= 1 << 4; /* SIMD16 mode */ + else + msg_control |= 2 << 4; /* SIMD8 mode */ + } else { + if (devinfo->gen >= 8 || devinfo->is_haswell) + msg_control |= 0 << 4; /* SIMD4x2 mode */ + else + msg_control |= 2 << 4; /* SIMD8 mode */ + } + + brw_inst_set_dp_msg_type(devinfo, insn, + devinfo->gen >= 8 || devinfo->is_haswell ? + HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE : + GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE); + brw_inst_set_dp_msg_control(devinfo, insn, msg_control); +} + +void +brw_untyped_surface_write(struct brw_codegen *p, + struct brw_reg payload, + struct brw_reg surface, + unsigned msg_length, + unsigned num_channels) +{ + const struct brw_device_info *devinfo = p->devinfo; + const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? + HSW_SFID_DATAPORT_DATA_CACHE_1 : + GEN7_SFID_DATAPORT_DATA_CACHE); + const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1; + /* Mask out unused components -- See comment in brw_untyped_atomic(). */ + const unsigned mask = devinfo->gen == 7 && !devinfo->is_haswell && !align1 ? + WRITEMASK_X : WRITEMASK_XYZW; + struct brw_inst *insn = brw_send_indirect_surface_message( + p, sfid, brw_writemask(brw_null_reg(), mask), + payload, surface, msg_length, 0, align1); + + brw_set_dp_untyped_surface_write_message( + p, insn, num_channels); } static void -brw_set_dp_untyped_surface_read_message(struct brw_compile *p, - brw_inst *insn, - unsigned bind_table_index, - unsigned msg_length, - unsigned response_length, - unsigned num_channels, - bool header_present) +brw_set_dp_typed_atomic_message(struct brw_codegen *p, + struct brw_inst *insn, + unsigned atomic_op, + bool response_expected) { const struct brw_device_info *devinfo = p->devinfo; - const unsigned dispatch_width = - (brw_inst_exec_size(devinfo, insn) == BRW_EXECUTE_16 ? 16 : 8); + unsigned msg_control = + atomic_op | /* Atomic Operation Type: BRW_AOP_* */ + (response_expected ? 1 << 5 : 0); /* Return data expected */ if (devinfo->gen >= 8 || devinfo->is_haswell) { - brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1, - msg_length, response_length, - header_present, false); + if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { + if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q) + msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */ + brw_inst_set_dp_msg_type(devinfo, insn, + HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP); + } else { + brw_inst_set_dp_msg_type(devinfo, insn, + HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2); + } + + } else { brw_inst_set_dp_msg_type(devinfo, insn, - HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ); + GEN7_DATAPORT_RC_TYPED_ATOMIC_OP); + + if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q) + msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */ + } + + brw_inst_set_dp_msg_control(devinfo, insn, msg_control); +} + +void +brw_typed_atomic(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg payload, + struct brw_reg surface, + unsigned atomic_op, + unsigned msg_length, + bool response_expected) { + const struct brw_device_info *devinfo = p->devinfo; + const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? + HSW_SFID_DATAPORT_DATA_CACHE_1 : + GEN6_SFID_DATAPORT_RENDER_CACHE); + const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1); + /* Mask out unused components -- See comment in brw_untyped_atomic(). */ + const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X; + struct brw_inst *insn = brw_send_indirect_surface_message( + p, sfid, brw_writemask(dst, mask), payload, surface, msg_length, + brw_surface_payload_size(p, response_expected, + devinfo->gen >= 8 || devinfo->is_haswell, false), + true); + + brw_set_dp_typed_atomic_message( + p, insn, atomic_op, response_expected); +} + +static void +brw_set_dp_typed_surface_read_message(struct brw_codegen *p, + struct brw_inst *insn, + unsigned num_channels) +{ + const struct brw_device_info *devinfo = p->devinfo; + /* Set mask of unused channels. */ + unsigned msg_control = 0xf & (0xf << num_channels); + + if (devinfo->gen >= 8 || devinfo->is_haswell) { + if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { + if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q) + msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */ + else + msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */ + } + + brw_inst_set_dp_msg_type(devinfo, insn, + HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ); } else { - brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, - msg_length, response_length, - header_present, false); + if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { + if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q) + msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */ + } brw_inst_set_dp_msg_type(devinfo, insn, - GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ); + GEN7_DATAPORT_RC_TYPED_SURFACE_READ); } - /* Set mask of 32-bit channels to drop. */ - unsigned msg_control = (0xf & (0xf << num_channels)); + brw_inst_set_dp_msg_control(devinfo, insn, msg_control); +} - if (brw_inst_access_mode(devinfo, insn) == BRW_ALIGN_1) { - if (dispatch_width == 16) - msg_control |= 1 << 4; /* SIMD16 mode */ - else - msg_control |= 2 << 4; /* SIMD8 mode */ +void +brw_typed_surface_read(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg payload, + struct brw_reg surface, + unsigned msg_length, + unsigned num_channels) +{ + const struct brw_device_info *devinfo = p->devinfo; + const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? + HSW_SFID_DATAPORT_DATA_CACHE_1 : + GEN6_SFID_DATAPORT_RENDER_CACHE); + struct brw_inst *insn = brw_send_indirect_surface_message( + p, sfid, dst, payload, surface, msg_length, + brw_surface_payload_size(p, num_channels, + devinfo->gen >= 8 || devinfo->is_haswell, false), + true); + + brw_set_dp_typed_surface_read_message( + p, insn, num_channels); +} + +static void +brw_set_dp_typed_surface_write_message(struct brw_codegen *p, + struct brw_inst *insn, + unsigned num_channels) +{ + const struct brw_device_info *devinfo = p->devinfo; + /* Set mask of unused channels. */ + unsigned msg_control = 0xf & (0xf << num_channels); + + if (devinfo->gen >= 8 || devinfo->is_haswell) { + if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { + if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q) + msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */ + else + msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */ + } + + brw_inst_set_dp_msg_type(devinfo, insn, + HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE); + + } else { + if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { + if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q) + msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */ + } + + brw_inst_set_dp_msg_type(devinfo, insn, + GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE); } - brw_inst_set_binding_table_index(devinfo, insn, bind_table_index); brw_inst_set_dp_msg_control(devinfo, insn, msg_control); } void -brw_untyped_surface_read(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg mrf, - unsigned bind_table_index, - unsigned msg_length, - unsigned num_channels) +brw_typed_surface_write(struct brw_codegen *p, + struct brw_reg payload, + struct brw_reg surface, + unsigned msg_length, + unsigned num_channels) { const struct brw_device_info *devinfo = p->devinfo; - brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); + const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? + HSW_SFID_DATAPORT_DATA_CACHE_1 : + GEN6_SFID_DATAPORT_RENDER_CACHE); + const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1); + /* Mask out unused components -- See comment in brw_untyped_atomic(). */ + const unsigned mask = (devinfo->gen == 7 && !devinfo->is_haswell && !align1 ? + WRITEMASK_X : WRITEMASK_XYZW); + struct brw_inst *insn = brw_send_indirect_surface_message( + p, sfid, brw_writemask(brw_null_reg(), mask), + payload, surface, msg_length, 0, true); - brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD)); - brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD)); - brw_set_dp_untyped_surface_read_message( - p, insn, bind_table_index, msg_length, - brw_surface_payload_size(p, num_channels, true, true), - num_channels, brw_inst_access_mode(devinfo, insn) == BRW_ALIGN_1); + brw_set_dp_typed_surface_write_message( + p, insn, num_channels); +} + +static void +brw_set_memory_fence_message(struct brw_codegen *p, + struct brw_inst *insn, + enum brw_message_target sfid, + bool commit_enable) +{ + const struct brw_device_info *devinfo = p->devinfo; + + brw_set_message_descriptor(p, insn, sfid, + 1 /* message length */, + (commit_enable ? 1 : 0) /* response length */, + true /* header present */, + false); + + switch (sfid) { + case GEN6_SFID_DATAPORT_RENDER_CACHE: + brw_inst_set_dp_msg_type(devinfo, insn, GEN7_DATAPORT_RC_MEMORY_FENCE); + break; + case GEN7_SFID_DATAPORT_DATA_CACHE: + brw_inst_set_dp_msg_type(devinfo, insn, GEN7_DATAPORT_DC_MEMORY_FENCE); + break; + default: + unreachable("Not reached"); + } + + if (commit_enable) + brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5); +} + +void +brw_memory_fence(struct brw_codegen *p, + struct brw_reg dst) +{ + const struct brw_device_info *devinfo = p->devinfo; + const bool commit_enable = devinfo->gen == 7 && !devinfo->is_haswell; + struct brw_inst *insn; + + /* Set dst as destination for dependency tracking, the MEMORY_FENCE + * message doesn't write anything back. + */ + insn = next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, insn, dst); + brw_set_src0(p, insn, dst); + brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, + commit_enable); + + if (devinfo->gen == 7 && !devinfo->is_haswell) { + /* IVB does typed surface access through the render cache, so we need to + * flush it too. Use a different register so both flushes can be + * pipelined by the hardware. + */ + insn = next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, insn, offset(dst, 1)); + brw_set_src0(p, insn, offset(dst, 1)); + brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, + commit_enable); + + /* Now write the response of the second message into the response of the + * first to trigger a pipeline stall -- This way future render and data + * cache messages will be properly ordered with respect to past data and + * render cache messages. + */ + brw_push_insn_state(p); + brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, dst, offset(dst, 1)); + brw_pop_insn_state(p); + } } void -brw_pixel_interpolator_query(struct brw_compile *p, +brw_pixel_interpolator_query(struct brw_codegen *p, struct brw_reg dest, struct brw_reg mrf, bool noperspective, @@ -2910,6 +3214,153 @@ brw_pixel_interpolator_query(struct brw_compile *p, brw_inst_set_pi_message_data(devinfo, insn, data); } +void +brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst) +{ + const struct brw_device_info *devinfo = p->devinfo; + brw_inst *inst; + + assert(devinfo->gen >= 7); + + brw_push_insn_state(p); + + if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + + if (devinfo->gen >= 8) { + /* Getting the first active channel index is easy on Gen8: Just find + * the first bit set in the mask register. The same register exists + * on HSW already but it reads back as all ones when the current + * instruction has execution masking disabled, so it's kind of + * useless. + */ + inst = brw_FBL(p, vec1(dst), + retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)); + + /* Quarter control has the effect of magically shifting the value of + * this register. Make sure it's set to zero. + */ + brw_inst_set_qtr_control(devinfo, inst, GEN6_COMPRESSION_1Q); + } else { + const struct brw_reg flag = retype(brw_flag_reg(1, 0), + BRW_REGISTER_TYPE_UD); + + brw_MOV(p, flag, brw_imm_ud(0)); + + /* Run a 16-wide instruction returning zero with execution masking + * and a conditional modifier enabled in order to get the current + * execution mask in f1.0. + */ + inst = brw_MOV(p, brw_null_reg(), brw_imm_ud(0)); + brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_16); + brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE); + brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z); + brw_inst_set_flag_reg_nr(devinfo, inst, 1); + + brw_FBL(p, vec1(dst), flag); + } + } else { + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + + if (devinfo->gen >= 8) { + /* In SIMD4x2 mode the first active channel index is just the + * negation of the first bit of the mask register. + */ + inst = brw_AND(p, brw_writemask(dst, WRITEMASK_X), + negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)), + brw_imm_ud(1)); + + } else { + /* Overwrite the destination without and with execution masking to + * find out which of the channels is active. + */ + brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X), + brw_imm_ud(1)); + + inst = brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X), + brw_imm_ud(0)); + brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE); + } + } + + brw_pop_insn_state(p); +} + +void +brw_broadcast(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg idx) +{ + const struct brw_device_info *devinfo = p->devinfo; + const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1; + brw_inst *inst; + + assert(src.file == BRW_GENERAL_REGISTER_FILE && + src.address_mode == BRW_ADDRESS_DIRECT); + + if ((src.vstride == 0 && (src.hstride == 0 || !align1)) || + idx.file == BRW_IMMEDIATE_VALUE) { + /* Trivial, the source is already uniform or the index is a constant. + * We will typically not get here if the optimizer is doing its job, but + * asserting would be mean. + */ + const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.dw1.ud : 0; + brw_MOV(p, dst, + (align1 ? stride(suboffset(src, i), 0, 1, 0) : + stride(suboffset(src, 4 * i), 0, 4, 1))); + } else { + if (align1) { + const struct brw_reg addr = + retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); + const unsigned offset = src.nr * REG_SIZE + src.subnr; + /* Limit in bytes of the signed indirect addressing immediate. */ + const unsigned limit = 512; + + brw_push_insn_state(p); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + + /* Take into account the component size and horizontal stride. */ + assert(src.vstride == src.hstride + src.width); + brw_SHL(p, addr, vec1(idx), + brw_imm_ud(_mesa_logbase2(type_sz(src.type)) + + src.hstride - 1)); + + /* We can only address up to limit bytes using the indirect + * addressing immediate, account for the difference if the source + * register is above this limit. + */ + if (offset >= limit) + brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit)); + + brw_pop_insn_state(p); + + /* Use indirect addressing to fetch the specified component. */ + brw_MOV(p, dst, + retype(brw_vec1_indirect(addr.subnr, offset % limit), + src.type)); + } else { + /* In SIMD4x2 mode the index can be either zero or one, replicate it + * to all bits of a flag register, + */ + inst = brw_MOV(p, + brw_null_reg(), + stride(brw_swizzle1(idx, 0), 0, 4, 1)); + brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NONE); + brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_NZ); + brw_inst_set_flag_reg_nr(devinfo, inst, 1); + + /* and use predicated SEL to pick the right channel. */ + inst = brw_SEL(p, dst, + stride(suboffset(src, 4), 0, 4, 1), + stride(src, 0, 4, 1)); + brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NORMAL); + brw_inst_set_flag_reg_nr(devinfo, inst, 1); + } + } +} + /** * This instruction is generated as a single-channel align1 instruction by * both the VS and FS stages when using INTEL_DEBUG=shader_time. @@ -2926,17 +3377,20 @@ brw_pixel_interpolator_query(struct brw_compile *p, * format, and is only accessible through the legacy DATA_CACHE dataport * messages. */ -void brw_shader_time_add(struct brw_compile *p, +void brw_shader_time_add(struct brw_codegen *p, struct brw_reg payload, uint32_t surf_index) { + const unsigned sfid = (p->devinfo->gen >= 8 || p->devinfo->is_haswell ? + HSW_SFID_DATAPORT_DATA_CACHE_1 : + GEN7_SFID_DATAPORT_DATA_CACHE); assert(p->devinfo->gen >= 7); brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); - brw_pop_insn_state(p); /* We use brw_vec1_reg and unmasked because we want to increment the given * offset only once. @@ -2945,8 +3399,61 @@ void brw_shader_time_add(struct brw_compile *p, BRW_ARF_NULL, 0)); brw_set_src0(p, send, brw_vec1_reg(payload.file, payload.nr, 0)); - brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, surf_index, - 2 /* message length */, - 0 /* response length */, - false /* header present */); + brw_set_src1(p, send, brw_imm_ud(0)); + brw_set_message_descriptor(p, send, sfid, 2, 0, false, false); + brw_inst_set_binding_table_index(p->devinfo, send, surf_index); + brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, false); + + brw_pop_insn_state(p); +} + + +/** + * Emit the SEND message for a barrier + */ +void +brw_barrier(struct brw_codegen *p, struct brw_reg src) +{ + const struct brw_device_info *devinfo = p->devinfo; + struct brw_inst *inst; + + assert(devinfo->gen >= 7); + + inst = next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, inst, brw_null_reg()); + brw_set_src0(p, inst, src); + brw_set_src1(p, inst, brw_null_reg()); + + brw_set_message_descriptor(p, inst, BRW_SFID_MESSAGE_GATEWAY, + 1 /* msg_length */, + 0 /* response_length */, + false /* header_present */, + false /* end_of_thread */); + + brw_inst_set_gateway_notify(devinfo, inst, 1); + brw_inst_set_gateway_subfuncid(devinfo, inst, + BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG); + + brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE); +} + + +/** + * Emit the wait instruction for a barrier + */ +void +brw_WAIT(struct brw_codegen *p) +{ + const struct brw_device_info *devinfo = p->devinfo; + struct brw_inst *insn; + + struct brw_reg src = brw_notification_reg(); + + insn = next_insn(p, BRW_OPCODE_WAIT); + brw_set_dest(p, insn, src); + brw_set_src0(p, insn, src); + brw_set_src1(p, insn, brw_null_reg()); + + brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); + brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); }