case BRW_REGISTER_TYPE_UD:
brw_reg = brw_imm_ud(src[i].fixed_hw_reg.dw1.ud);
break;
+ case BRW_REGISTER_TYPE_VF:
+ brw_reg = brw_imm_vf(src[i].fixed_hw_reg.dw1.ud);
+ break;
default:
unreachable("not reached");
}
brw_math_function(inst->opcode),
inst->base_mrf,
src,
- BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
}
brw_math_function(inst->opcode),
inst->base_mrf,
op0,
- BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
}
* use an implied move from g0 to the first message register.
*/
if (inst->header_present) {
- if (brw->gen < 6 && !inst->texture_offset) {
+ if (brw->gen < 6 && !inst->offset) {
/* Set up an implied move from g0 to the MRF. */
src = brw_vec8_grf(0, 0);
} else {
brw_set_default_access_mode(p, BRW_ALIGN_1);
- if (inst->texture_offset) {
+ if (inst->offset) {
/* Set the texel offset bits in DWord 2. */
brw_MOV(p, get_element_ud(header, 2),
- brw_imm_ud(inst->texture_offset));
+ brw_imm_ud(inst->offset));
}
brw_adjust_sampler_state_pointer(p, header, sampler_index, dst);
*
* We can do this with the following EU instruction:
*
- * mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all }
+ * mul(2) dst.3<1>UD src0<8;2,4>UD src1<...>UW { Align1 WE_all }
*/
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ assert(brw->gen >= 7 &&
+ src1.file == BRW_IMMEDIATE_VALUE &&
+ src1.type == BRW_REGISTER_TYPE_UD &&
+ src1.dw1.ud <= USHRT_MAX);
brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
- src1);
+ retype(src1, BRW_REGISTER_TYPE_UW));
brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
}
brw_pop_insn_state(p);
}
+void
+vec4_generator::generate_gs_svb_set_destination_index(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+
+ int vertex = inst->sol_vertex;
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, get_element_ud(dst, 5), get_element_ud(src, vertex));
+ brw_pop_insn_state(p);
+}
+
void
vec4_generator::generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src)
{
brw_pop_insn_state(p);
}
+void
+vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1,
+ struct brw_reg src2)
+{
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ /* Save src0 data in 16:31 bits of dst.0 */
+ brw_AND(p, suboffset(vec1(dst), 0), suboffset(vec1(src0), 0),
+ brw_imm_ud(0xffffu));
+ brw_SHL(p, suboffset(vec1(dst), 0), suboffset(vec1(dst), 0), brw_imm_ud(16));
+ /* Save src1 data in 0:15 bits of dst.0 */
+ brw_AND(p, suboffset(vec1(src2), 0), suboffset(vec1(src1), 0),
+ brw_imm_ud(0xffffu));
+ brw_OR(p, suboffset(vec1(dst), 0),
+ suboffset(vec1(dst), 0),
+ suboffset(vec1(src2), 0));
+ brw_pop_insn_state(p);
+}
+
void
vec4_generator::generate_gs_ff_sync(vec4_instruction *inst,
struct brw_reg dst,
- struct brw_reg src0)
+ struct brw_reg src0,
+ struct brw_reg src1)
{
/* This opcode uses an implied MRF register for:
* - the header of the ff_sync message. And as such it is expected to be
struct brw_reg header =
retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);
- /* Overwrite dword 0 of the header (cleared for now since we are not doing
- * transform feedback) and dword 1 (to hold the number of primitives
- * written).
+ /* Overwrite dword 0 of the header (SO vertices to write) and
+ * dword 1 (number of primitives written).
*/
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_access_mode(p, BRW_ALIGN_1);
- brw_MOV(p, get_element_ud(header, 0), brw_imm_ud(0));
+ brw_MOV(p, get_element_ud(header, 0), get_element_ud(src1, 0));
brw_MOV(p, get_element_ud(header, 1), get_element_ud(src0, 0));
brw_pop_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, get_element_ud(header, 0), get_element_ud(dst, 0));
+
+ /* src1 is not an immediate when we use transform feedback */
+ if (src1.file != BRW_IMMEDIATE_VALUE)
+ brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1));
+
brw_pop_insn_state(p);
}
}
switch (inst->opcode) {
+ case VEC4_OPCODE_UNPACK_UNIFORM:
case BRW_OPCODE_MOV:
brw_MOV(p, dst, src[0]);
break;
case SHADER_OPCODE_LOG2:
case SHADER_OPCODE_SIN:
case SHADER_OPCODE_COS:
+ assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
if (brw->gen >= 7) {
gen6_math(p, dst, brw_math_function(inst->opcode), src[0],
brw_null_reg());
case SHADER_OPCODE_POW:
case SHADER_OPCODE_INT_QUOTIENT:
case SHADER_OPCODE_INT_REMAINDER:
+ assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
if (brw->gen >= 7) {
gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
} else if (brw->gen == 6) {
generate_gs_svb_write(inst, dst, src[0], src[1]);
break;
+ case GS_OPCODE_SVB_SET_DST_INDEX:
+ generate_gs_svb_set_destination_index(inst, dst, src[0]);
+ break;
+
case GS_OPCODE_THREAD_END:
generate_gs_thread_end(inst);
break;
break;
case GS_OPCODE_FF_SYNC:
- generate_gs_ff_sync(inst, dst, src[0]);
+ generate_gs_ff_sync(inst, dst, src[0], src[1]);
+ break;
+
+ case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
+ generate_gs_ff_sync_set_primitives(dst, src[0], src[1], src[2]);
break;
case GS_OPCODE_SET_PRIMITIVE_ID:
generate_unpack_flags(inst, dst);
break;
+ case VEC4_OPCODE_PACK_BYTES: {
+ /* Is effectively:
+ *
+ * mov(8) dst<16,4,1>:UB src<4,1,0>:UB
+ *
+ * but destinations' only regioning is horizontal stride, so instead we
+ * have to use two instructions:
+ *
+ * mov(4) dst<1>:UB src<4,1,0>:UB
+ * mov(4) dst.16<1>:UB src.16<4,1,0>:UB
+ *
+ * where they pack the four bytes from the low and high four DW.
+ */
+ assert(is_power_of_two(dst.dw1.bits.writemask) &&
+ dst.dw1.bits.writemask != 0);
+ unsigned offset = __builtin_ctz(dst.dw1.bits.writemask);
+
+ dst.type = BRW_REGISTER_TYPE_UB;
+
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+
+ src[0].type = BRW_REGISTER_TYPE_UB;
+ src[0].vstride = BRW_VERTICAL_STRIDE_4;
+ src[0].width = BRW_WIDTH_1;
+ src[0].hstride = BRW_HORIZONTAL_STRIDE_0;
+ dst.subnr = offset * 4;
+ struct brw_inst *insn = brw_MOV(p, dst, src[0]);
+ brw_inst_set_exec_size(brw, insn, BRW_EXECUTE_4);
+ brw_inst_set_no_dd_clear(brw, insn, true);
+ brw_inst_set_no_dd_check(brw, insn, inst->no_dd_check);
+
+ src[0].subnr = 16;
+ dst.subnr = 16 + offset * 4;
+ insn = brw_MOV(p, dst, src[0]);
+ brw_inst_set_exec_size(brw, insn, BRW_EXECUTE_4);
+ brw_inst_set_no_dd_clear(brw, insn, inst->no_dd_clear);
+ brw_inst_set_no_dd_check(brw, insn, true);
+
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
+ break;
+ }
+
default:
if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
_mesa_problem(&brw->ctx, "Unsupported opcode in `%s' in vec4\n",
abort();
}
- if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) {
+ if (inst->opcode == VEC4_OPCODE_PACK_BYTES) {
+ /* Handled dependency hints in the generator. */
+
+ assert(!inst->conditional_mod);
+ } else if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) {
assert(p->nr_insn == pre_emit_nr_insn + 1 ||
!"conditional_mod, no_dd_check, or no_dd_clear set for IR "
"emitting more than 1 instruction");
brw_inst *last = &p->store[pre_emit_nr_insn];
- brw_inst_set_cond_modifier(brw, last, inst->conditional_mod);
+ if (inst->conditional_mod)
+ brw_inst_set_cond_modifier(brw, last, inst->conditional_mod);
brw_inst_set_no_dd_clear(brw, last, inst->no_dd_clear);
brw_inst_set_no_dd_check(brw, last, inst->no_dd_check);
}