header_size = 1;
sources[0] = fs_reg();
length++;
+
+ /* If we're requesting fewer than four channels worth of response,
+ * and we have an explicit header, we need to set up the sampler
+ * writemask. It's reversed from normal: 1 means "don't write".
+ */
+ if (inst->regs_written != 4 * reg_width) {
+ assert((inst->regs_written % reg_width) == 0);
+ unsigned mask = ~((1 << (inst->regs_written / reg_width)) - 1) & 0xf;
+ inst->offset |= mask << 12;
+ }
}
if (shadow_c.file != BAD_FILE) {
unreachable("unknown texture opcode");
}
+ unsigned num_components = nir_tex_instr_dest_size(instr);
+
+ if (instr->dest.is_ssa) {
+ uint8_t write_mask = nir_ssa_def_components_read(&instr->dest.ssa);
+ assert(write_mask != 0); /* dead code should have been eliminated */
+ num_components = _mesa_fls(write_mask);
+ }
+
+ const bool can_reduce_return_length = devinfo->gen >= 9 &&
+ instr->op != nir_texop_tg4 && instr->op != nir_texop_query_levels;
+
emit_texture(op, dest_type, coordinate, instr->coord_components,
shadow_comparitor, lod, lod2, lod_components, sample_index,
tex_offset, mcs, gather_component, is_cube_array,
- texture, texture_reg, sampler, sampler_reg);
+ texture, texture_reg, sampler, sampler_reg,
+ can_reduce_return_length ? num_components : 4);
fs_reg dest = get_nir_dest(instr->dest);
dest.type = this->result.type;
- unsigned num_components = nir_tex_instr_dest_size(instr);
emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
dest, this->result),
(1 << num_components) - 1);
uint32_t surface,
fs_reg surface_reg,
uint32_t sampler,
- fs_reg sampler_reg)
+ fs_reg sampler_reg,
+ unsigned return_channels)
{
fs_inst *inst = NULL;
}
inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
- inst->regs_written = 4 * dispatch_width / 8;
+ inst->regs_written = return_channels * dispatch_width / 8;
if (shadow_c.file != BAD_FILE)
inst->shadow_compare = true;