* type to match src0 so we can compact the instruction.
*/
dst.type = src0.type;
- if (dst.file == HW_REG)
- dst.fixed_hw_reg.type = dst.type;
resolve_ud_negate(&src0);
resolve_ud_negate(&src1);
{
init();
- this->file = GRF;
- this->reg = v->alloc.allocate(type_size_vec4(type));
+ this->file = VGRF;
+ this->nr = v->alloc.allocate(type_size_vec4(type));
if (type->is_array() || type->is_record()) {
this->swizzle = BRW_SWIZZLE_NOOP;
init();
- this->file = GRF;
- this->reg = v->alloc.allocate(type_size_vec4(type) * size);
+ this->file = VGRF;
+ this->nr = v->alloc.allocate(type_size_vec4(type) * size);
this->swizzle = BRW_SWIZZLE_NOOP;
{
init();
- this->file = GRF;
- this->reg = v->alloc.allocate(type_size_vec4(type));
+ this->file = VGRF;
+ this->nr = v->alloc.allocate(type_size_vec4(type));
if (type->is_array() || type->is_record()) {
this->writemask = WRITEMASK_XYZW;
if (devinfo->gen < 8 && !devinfo->is_haswell)
return false;
- return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16;
+ return sampler.file != IMM || sampler.ud >= 16;
}
void
src_reg offset_value,
src_reg mcs,
bool is_cube_array,
+ uint32_t surface,
+ src_reg surface_reg,
uint32_t sampler,
src_reg sampler_reg)
{
+ /* The sampler can only meaningfully compute LOD for fragment shader
+ * messages. For all other stages, we change the opcode to TXL and hardcode
+ * the LOD to 0.
+ *
+ * textureQueryLevels() is implemented in terms of TXS so we need to pass a
+ * valid LOD argument.
+ */
+ if (op == ir_tex || op == ir_query_levels) {
+ assert(lod.file == BAD_FILE);
+ lod = src_reg(0.0f);
+ }
+
enum opcode opcode;
switch (op) {
case ir_tex: opcode = SHADER_OPCODE_TXL; break;
case ir_txl: opcode = SHADER_OPCODE_TXL; break;
case ir_txd: opcode = SHADER_OPCODE_TXD; break;
case ir_txf: opcode = SHADER_OPCODE_TXF; break;
- case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
+ case ir_txf_ms: opcode = (devinfo->gen >= 9 ? SHADER_OPCODE_TXF_CMS_W :
+ SHADER_OPCODE_TXF_CMS); break;
case ir_txs: opcode = SHADER_OPCODE_TXS; break;
case ir_tg4: opcode = offset_value.file != BAD_FILE
? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
inst->dst.writemask = WRITEMASK_XYZW;
inst->shadow_compare = shadow_comparitor.file != BAD_FILE;
- inst->src[1] = sampler_reg;
+ inst->src[1] = surface_reg;
+ inst->src[2] = sampler_reg;
/* MRF for the first parameter */
int param_base = inst->base_mrf + inst->header_size;
} else if (op == ir_txf_ms) {
emit(MOV(dst_reg(MRF, param_base + 1, sample_index.type, WRITEMASK_X),
sample_index));
- if (devinfo->gen >= 7) {
+ if (opcode == SHADER_OPCODE_TXF_CMS_W) {
+ /* MCS data is stored in the first two channels of ‘mcs’, but we
+ * need to get it into the .y and .z channels of the second vec4
+ * of params.
+ */
+ mcs.swizzle = BRW_SWIZZLE4(0, 0, 1, 1);
+ emit(MOV(dst_reg(MRF, param_base + 1,
+ glsl_type::uint_type, WRITEMASK_YZ),
+ mcs));
+ } else if (devinfo->gen >= 7) {
/* MCS data is in the first channel of `mcs`, but we need to get it into
* the .y channel of the second vec4 of params, so replicate .x across
* the whole vec4 and then mask off everything except .y
}
if (devinfo->gen == 6 && op == ir_tg4) {
- emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], inst->dst);
+ emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], inst->dst);
}
swizzle_result(op, dest,
* Set up the gather channel based on the swizzle, for gather4.
*/
uint32_t
-vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler)
+vec4_visitor::gather_channel(unsigned gather_component,
+ uint32_t surface, uint32_t sampler)
{
int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component);
switch (swiz) {
/* gather4 sampler is broken for green channel on RG32F --
* we must ask for blue instead.
*/
- if (key_tex->gather_channel_quirk_mask & (1 << sampler))
+ if (key_tex->gather_channel_quirk_mask & (1 << surface))
return 2;
return 1;
case SWIZZLE_Z: return 2;
void
vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
- dst_reg dst, src_reg offset,
+ dst_reg dst, src_reg surf_offset,
src_reg src0, src_reg src1)
{
- unsigned mlen = 0;
+ unsigned mlen = 1 + (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
+ src_reg src_payload(this, glsl_type::uint_type, mlen);
+ dst_reg payload(src_payload);
+ payload.writemask = WRITEMASK_X;
/* Set the atomic operation offset. */
- emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), offset));
- mlen++;
+ emit(MOV(offset(payload, 0), surf_offset));
+ unsigned i = 1;
/* Set the atomic operation arguments. */
if (src0.file != BAD_FILE) {
- emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src0));
- mlen++;
+ emit(MOV(offset(payload, i), src0));
+ i++;
}
if (src1.file != BAD_FILE) {
- emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src1));
- mlen++;
+ emit(MOV(offset(payload, i), src1));
+ i++;
}
/* Emit the instruction. Note that this maps to the normal SIMD8
* unused channels will be masked out.
*/
vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
- brw_message_reg(0),
+ src_payload,
src_reg(surf_index), src_reg(atomic_op));
inst->mlen = mlen;
}
void
vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
- src_reg offset)
+ src_reg surf_offset)
{
+ dst_reg offset(this, glsl_type::uint_type);
+ offset.writemask = WRITEMASK_X;
+
/* Set the surface read offset. */
- emit(MOV(brw_writemask(brw_uvec_mrf(8, 0, 0), WRITEMASK_X), offset));
+ emit(MOV(offset, surf_offset));
/* Emit the instruction. Note that this maps to the normal SIMD8
* untyped surface read message, but that's OK because unused
* channels will be masked out.
*/
vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst,
- brw_message_reg(0),
+ src_reg(offset),
src_reg(surf_index), src_reg(1));
inst->mlen = 1;
}
void
vec4_visitor::emit_ndc_computation()
{
+ if (output_reg[VARYING_SLOT_POS].file == BAD_FILE)
+ return;
+
/* Get the position */
src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
* Later, clipping will detect ucp[6] and ensure the primitive is
* clipped against all fixed planes.
*/
- if (devinfo->has_negative_rhw_bug) {
+ if (devinfo->has_negative_rhw_bug &&
+ output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE) {
src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
ndc_w.swizzle = BRW_SWIZZLE_WWWW;
emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
assert(varying < VARYING_SLOT_MAX);
assert(output_reg[varying].type == reg.type);
current_annotation = output_reg_annotation[varying];
- /* Copy the register, saturating if necessary */
- return emit(MOV(reg, src_reg(output_reg[varying])));
+ if (output_reg[varying].file != BAD_FILE)
+ return emit(MOV(reg, src_reg(output_reg[varying])));
+ else
+ return NULL;
}
void
}
case BRW_VARYING_SLOT_NDC:
current_annotation = "NDC";
- emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
+ if (output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE)
+ emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
break;
case VARYING_SLOT_POS:
current_annotation = "gl_Position";
- emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
+ if (output_reg[VARYING_SLOT_POS].file != BAD_FILE)
+ emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
break;
case VARYING_SLOT_EDGE:
/* This is present when doing unfilled polygons. We're supposed to copy
inst->insert_after(block, write);
inst->dst.file = temp.file;
- inst->dst.reg = temp.reg;
+ inst->dst.nr = temp.nr;
inst->dst.reg_offset = temp.reg_offset;
inst->dst.reladdr = NULL;
}
*src.reladdr);
/* Now handle scratch access on src */
- if (src.file == GRF && scratch_loc[src.reg] != -1) {
+ if (src.file == VGRF && scratch_loc[src.nr] != -1) {
dst_reg temp = dst_reg(this, glsl_type::vec4_type);
- emit_scratch_read(block, inst, temp, src, scratch_loc[src.reg]);
- src.reg = temp.reg;
+ emit_scratch_read(block, inst, temp, src, scratch_loc[src.nr]);
+ src.nr = temp.nr;
src.reg_offset = temp.reg_offset;
src.reladdr = NULL;
}
* scratch.
*/
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
- if (inst->dst.file == GRF && inst->dst.reladdr) {
- if (scratch_loc[inst->dst.reg] == -1) {
- scratch_loc[inst->dst.reg] = last_scratch;
- last_scratch += this->alloc.sizes[inst->dst.reg];
+ if (inst->dst.file == VGRF && inst->dst.reladdr) {
+ if (scratch_loc[inst->dst.nr] == -1) {
+ scratch_loc[inst->dst.nr] = last_scratch;
+ last_scratch += this->alloc.sizes[inst->dst.nr];
}
for (src_reg *iter = inst->dst.reladdr;
iter->reladdr;
iter = iter->reladdr) {
- if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
- scratch_loc[iter->reg] = last_scratch;
- last_scratch += this->alloc.sizes[iter->reg];
+ if (iter->file == VGRF && scratch_loc[iter->nr] == -1) {
+ scratch_loc[iter->nr] = last_scratch;
+ last_scratch += this->alloc.sizes[iter->nr];
}
}
}
for (src_reg *iter = &inst->src[i];
iter->reladdr;
iter = iter->reladdr) {
- if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
- scratch_loc[iter->reg] = last_scratch;
- last_scratch += this->alloc.sizes[iter->reg];
+ if (iter->file == VGRF && scratch_loc[iter->nr] == -1) {
+ scratch_loc[iter->nr] = last_scratch;
+ last_scratch += this->alloc.sizes[iter->nr];
}
}
}
/* Now that we have handled any (possibly recursive) reladdr scratch
* accesses for dst we can safely do the scratch write for dst itself
*/
- if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1)
- emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]);
+ if (inst->dst.file == VGRF && scratch_loc[inst->dst.nr] != -1)
+ emit_scratch_write(block, inst, scratch_loc[inst->dst.nr]);
/* Now handle scratch access on any src. In this case, since inst->src[i]
* already is a src_reg, we can just call emit_resolve_reladdr with
int base_offset)
{
int reg_offset = base_offset + orig_src.reg_offset;
- src_reg index = src_reg(prog_data->base.binding_table.pull_constants_start);
+ const unsigned index = prog_data->base.binding_table.pull_constants_start;
src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr,
reg_offset);
emit_pull_constant_load_reg(temp,
- index,
+ src_reg(index),
offset,
block, inst);
+
+ brw_mark_surface_used(&prog_data->base, index);
}
/**
if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
continue;
- int uniform = inst->src[i].reg;
+ int uniform = inst->src[i].nr;
if (inst->src[i].reladdr->reladdr)
nested_reladdr = true; /* will need another pass */
pull_constant_loc[uniform]);
inst->src[i].file = temp.file;
- inst->src[i].reg = temp.reg;
+ inst->src[i].nr = temp.nr;
inst->src[i].reg_offset = temp.reg_offset;
inst->src[i].reladdr = NULL;
}