void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,
struct brw_reg index,
struct brw_reg offset);
+ void generate_uniform_pull_constant_load_gen7(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg surf_index,
+ struct brw_reg offset);
void generate_varying_pull_constant_load(fs_inst *inst, struct brw_reg dst,
struct brw_reg index);
void generate_varying_pull_constant_load_gen7(fs_inst *inst,
struct brw_reg index,
struct brw_reg offset);
void generate_mov_dispatch_to_flags(fs_inst *inst);
+ void generate_set_global_offset(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg offset);
void generate_discard_jump(fs_inst *inst);
void patch_discard_jumps_to_fb_writes();
}
}
+void
+fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg index,
+ struct brw_reg offset)
+{
+ assert(inst->mlen == 0);
+
+ assert(index.file == BRW_IMMEDIATE_VALUE &&
+ index.type == BRW_REGISTER_TYPE_UD);
+ uint32_t surf_index = index.dw1.ud;
+
+ assert(offset.file == BRW_GENERAL_REGISTER_FILE);
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_pop_insn_state(p);
+
+ brw_set_dest(p, send, dst);
+ brw_set_src0(p, send, offset);
+ if (intel->gen < 6)
+ send->header.destreg__conditionalmod = inst->base_mrf;
+
+ uint32_t msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
+ uint32_t msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ;
+ bool header_present = true;
+ brw_set_dp_read_message(p, send,
+ surf_index,
+ msg_control,
+ msg_type,
+ BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+ 1,
+ header_present,
+ 1);
+}
+
void
fs_generator::generate_varying_pull_constant_load(fs_inst *inst,
struct brw_reg dst,
return brw_reg;
}
+/**
+ * Sets the second dword of a vgrf for gen7+ message setup.
+ *
+ * For setting up gen7 messages in VGRFs, we need to be able to set the second
+ * dword for some payloads where in the MRF world we'd have just used
+ * brw_message_reg(). We don't want to bake it into the send message's code
+ * generation because that means we don't get a chance to schedule the
+ * instructions.
+ */
+void
+fs_generator::generate_set_global_offset(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg value)
+{
+ /* We use a matching src and dst to get the information on how this
+ * instruction works exposed to various optimization passes that would
+ * otherwise treat it as completely overwriting the dst.
+ */
+ assert(src.file == dst.file && src.nr == dst.nr);
+ assert(value.file == BRW_IMMEDIATE_VALUE);
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, retype(brw_vec1_reg(dst.file, dst.nr, 2), value.type), value);
+ brw_pop_insn_state(p);
+}
+
void
fs_generator::generate_code(exec_list *instructions)
{
generate_uniform_pull_constant_load(inst, dst, src[0], src[1]);
break;
+ case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
+ generate_uniform_pull_constant_load_gen7(inst, dst, src[0], src[1]);
+ break;
+
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
generate_varying_pull_constant_load(inst, dst, src[0]);
break;
brw_shader_time_add(p, inst->base_mrf, SURF_INDEX_WM_SHADER_TIME);
break;
+ case FS_OPCODE_SET_GLOBAL_OFFSET:
+ generate_set_global_offset(inst, dst, src[0], src[1]);
+ break;
+
default:
if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
_mesa_problem(ctx, "Unsupported opcode `%s' in FS",
if (const_offset) {
fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
packed_consts.type = result.type;
- fs_inst *pull = emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
- packed_consts,
- surf_index,
- fs_reg(const_offset->value.u[0])));
- pull->base_mrf = 14;
- pull->mlen = 1;
+
+ if (intel->gen >= 7) {
+ fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] / 16);
+ fs_reg payload = fs_reg(this, glsl_type::uint_type);
+ struct brw_reg g0 = retype(brw_vec8_grf(0, 0),
+ BRW_REGISTER_TYPE_UD);
+ fs_inst *setup = emit(MOV(payload, fs_reg(g0)));
+ setup->force_writemask_all = true;
+ /* We don't need the second half of this vgrf to be filled with g1
+ * in the 16-wide case, but if we use force_uncompressed then live
+ * variable analysis won't consider this a def!
+ */
+
+ emit(FS_OPCODE_SET_GLOBAL_OFFSET, payload,
+ payload, const_offset_reg);
+ emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, packed_consts,
+ surf_index, payload);
+ } else {
+ fs_reg const_offset_reg = fs_reg(const_offset->value.u[0]);
+ fs_inst *pull = emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+ packed_consts,
+ surf_index,
+ const_offset_reg));
+ pull->base_mrf = 14;
+ pull->mlen = 1;
+ }
packed_consts.smear = const_offset->value.u[0] % 16 / 4;
for (int i = 0; i < ir->type->vector_elements; i++) {