fs_visitor::visit(ir_dereference_variable *ir)
{
fs_reg *reg = variable_storage(ir->var);
+
+ if (!reg) {
+ fail("Failed to find variable storage for %s\n", ir->var->name);
+ this->result = fs_reg(reg_null_d);
+ return;
+ }
this->result = *reg;
}
src.type = brw_type_for_base_type(ir->type);
if (constant_index) {
- assert(src.file == UNIFORM || src.file == GRF);
+ assert(src.file == UNIFORM || src.file == GRF || src.file == HW_REG);
src.reg_offset += constant_index->value.i[0] * element_size;
} else {
/* Variable index array dereference. We attach the variable index
!y.is_valid_3src() ||
!a.is_valid_3src()) {
/* We can't use the LRP instruction. Emit x*(1-a) + y*a. */
+ fs_reg y_times_a = fs_reg(this, glsl_type::float_type);
fs_reg one_minus_a = fs_reg(this, glsl_type::float_type);
+ fs_reg x_times_one_minus_a = fs_reg(this, glsl_type::float_type);
+
+ emit(MUL(y_times_a, y, a));
fs_reg negative_a = a;
negative_a.negate = !a.negate;
-
emit(ADD(one_minus_a, negative_a, fs_reg(1.0f)));
- fs_inst *mul = emit(MUL(reg_null_f, y, a));
- mul->writes_accumulator = true;
- emit(MAC(dst, x, one_minus_a));
+ emit(MUL(x_times_one_minus_a, x, one_minus_a));
+
+ emit(ADD(dst, x_times_one_minus_a, y_times_a));
} else {
/* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
* we need to reorder the operands.
fs_inst *inst = NULL;
switch (ir->op) {
case ir_tex:
- inst = emit(SHADER_OPCODE_TEX, dst);
+ inst = emit(SHADER_OPCODE_TEX, dst, reg_undef);
break;
case ir_txb:
- inst = emit(FS_OPCODE_TXB, dst);
+ inst = emit(FS_OPCODE_TXB, dst, reg_undef);
break;
case ir_txl:
- inst = emit(SHADER_OPCODE_TXL, dst);
+ inst = emit(SHADER_OPCODE_TXL, dst, reg_undef);
break;
case ir_txd:
- inst = emit(SHADER_OPCODE_TXD, dst);
+ inst = emit(SHADER_OPCODE_TXD, dst, reg_undef);
break;
case ir_txs:
- inst = emit(SHADER_OPCODE_TXS, dst);
+ inst = emit(SHADER_OPCODE_TXS, dst, reg_undef);
break;
case ir_txf:
- inst = emit(SHADER_OPCODE_TXF, dst);
+ inst = emit(SHADER_OPCODE_TXF, dst, reg_undef);
break;
default:
fail("unrecognized texture opcode");
fs_inst *inst = NULL;
switch (ir->op) {
case ir_tex:
- inst = emit(SHADER_OPCODE_TEX, dst);
+ inst = emit(SHADER_OPCODE_TEX, dst, reg_undef);
break;
case ir_txb:
mlen = MAX2(mlen, header_present + 4 * reg_width);
emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
mlen += reg_width;
- inst = emit(FS_OPCODE_TXB, dst);
+ inst = emit(FS_OPCODE_TXB, dst, reg_undef);
break;
case ir_txl:
mlen = MAX2(mlen, header_present + 4 * reg_width);
emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
mlen += reg_width;
- inst = emit(SHADER_OPCODE_TXL, dst);
+ inst = emit(SHADER_OPCODE_TXL, dst, reg_undef);
break;
case ir_txd: {
mlen = MAX2(mlen, header_present + 4 * reg_width); /* skip over 'ai' */
case ir_txs:
emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
mlen += reg_width;
- inst = emit(SHADER_OPCODE_TXS, dst);
+ inst = emit(SHADER_OPCODE_TXS, dst, reg_undef);
break;
case ir_query_levels:
emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
mlen += reg_width;
- inst = emit(SHADER_OPCODE_TXS, dst);
+ inst = emit(SHADER_OPCODE_TXS, dst, reg_undef);
break;
case ir_txf:
mlen = header_present + 4 * reg_width;
emit(MOV(fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), lod));
- inst = emit(SHADER_OPCODE_TXF, dst);
+ inst = emit(SHADER_OPCODE_TXF, dst, reg_undef);
break;
case ir_txf_ms:
mlen = header_present + 4 * reg_width;
/* sample index */
emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index));
mlen += reg_width;
- inst = emit(SHADER_OPCODE_TXF_CMS, dst);
+ inst = emit(SHADER_OPCODE_TXF_CMS, dst, reg_undef);
break;
case ir_lod:
- inst = emit(SHADER_OPCODE_LOD, dst);
+ inst = emit(SHADER_OPCODE_LOD, dst, reg_undef);
break;
case ir_tg4:
- inst = emit(SHADER_OPCODE_TG4, dst);
+ inst = emit(SHADER_OPCODE_TG4, dst, reg_undef);
break;
default:
fail("unrecognized texture opcode");
int reg_width = dispatch_width / 8;
bool header_present = false;
- fs_reg payload = fs_reg(this, glsl_type::float_type);
- fs_reg next = payload;
+ fs_reg *sources = ralloc_array(mem_ctx, fs_reg, MAX_SAMPLER_MESSAGE_SIZE);
+ for (int i = 0; i < MAX_SAMPLER_MESSAGE_SIZE; i++) {
+ sources[i] = fs_reg(this, glsl_type::float_type);
+ }
+ int length = 0;
if (ir->op == ir_tg4 || (ir->offset && ir->op != ir_txf) || sampler >= 16) {
/* For general texture offsets (no txf workaround), we need a header to
* need to offset the Sampler State Pointer in the header.
*/
header_present = true;
- next.reg_offset++;
+ sources[length] = reg_undef;
+ length++;
}
if (ir->shadow_comparitor) {
- emit(MOV(next, shadow_c));
- next.reg_offset++;
+ emit(MOV(sources[length], shadow_c));
+ length++;
}
bool has_nonconstant_offset = ir->offset && !ir->offset->as_constant();
case ir_lod:
break;
case ir_txb:
- emit(MOV(next, lod));
- next.reg_offset++;
+ emit(MOV(sources[length], lod));
+ length++;
break;
case ir_txl:
- emit(MOV(next, lod));
- next.reg_offset++;
+ emit(MOV(sources[length], lod));
+ length++;
break;
case ir_txd: {
no16("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
* [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
*/
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(next, coordinate));
+ emit(MOV(sources[length], coordinate));
coordinate.reg_offset++;
- next.reg_offset++;
+ length++;
/* For cube map array, the coordinate is (u,v,r,ai) but there are
* only derivatives for (u, v, r).
*/
if (i < ir->lod_info.grad.dPdx->type->vector_elements) {
- emit(MOV(next, lod));
+ emit(MOV(sources[length], lod));
lod.reg_offset++;
- next.reg_offset++;
+ length++;
- emit(MOV(next, lod2));
+ emit(MOV(sources[length], lod2));
lod2.reg_offset++;
- next.reg_offset++;
+ length++;
}
}
break;
}
case ir_txs:
- emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), lod));
- next.reg_offset++;
+ emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod));
+ length++;
break;
case ir_query_levels:
- emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
- next.reg_offset++;
+ emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u)));
+ length++;
break;
case ir_txf:
/* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */
- emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate));
+ emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
coordinate.reg_offset++;
- next.reg_offset++;
+ length++;
- emit(MOV(retype(next, BRW_REGISTER_TYPE_D), lod));
- next.reg_offset++;
+ emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod));
+ length++;
for (int i = 1; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate));
+ emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
coordinate.reg_offset++;
- next.reg_offset++;
+ length++;
}
coordinate_done = true;
break;
case ir_txf_ms:
- emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), sample_index));
- next.reg_offset++;
+ emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index));
+ length++;
/* data from the multisample control surface */
- emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), mcs));
- next.reg_offset++;
+ emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs));
+ length++;
/* there is no offsetting for this message; just copy in the integer
* texture coordinates
*/
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate));
+ emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
coordinate.reg_offset++;
- next.reg_offset++;
+ length++;
}
coordinate_done = true;
fs_reg offset_value = this->result;
for (int i = 0; i < 2; i++) { /* u, v */
- emit(MOV(next, coordinate));
+ emit(MOV(sources[length], coordinate));
coordinate.reg_offset++;
- next.reg_offset++;
+ length++;
}
for (int i = 0; i < 2; i++) { /* offu, offv */
- emit(MOV(retype(next, BRW_REGISTER_TYPE_D), offset_value));
+ emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value));
offset_value.reg_offset++;
- next.reg_offset++;
+ length++;
}
if (ir->coordinate->type->vector_elements == 3) { /* r if present */
- emit(MOV(next, coordinate));
+ emit(MOV(sources[length], coordinate));
coordinate.reg_offset++;
- next.reg_offset++;
+ length++;
}
coordinate_done = true;
/* Set up the coordinate (except for cases where it was done above) */
if (ir->coordinate && !coordinate_done) {
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(next, coordinate));
+ emit(MOV(sources[length], coordinate));
coordinate.reg_offset++;
- next.reg_offset++;
+ length++;
}
}
+ fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(length),
+ BRW_REGISTER_TYPE_F);
+ emit(LOAD_PAYLOAD(src_payload, sources, length));
+
/* Generate the SEND */
- fs_inst *inst = NULL;
+ enum opcode opcode;
switch (ir->op) {
- case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst, payload); break;
- case ir_txb: inst = emit(FS_OPCODE_TXB, dst, payload); break;
- case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst, payload); break;
- case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst, payload); break;
- case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst, payload); break;
- case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_CMS, dst, payload); break;
- case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst, payload); break;
- case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst, payload); break;
- case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst, payload); break;
+ case ir_tex: opcode = SHADER_OPCODE_TEX; break;
+ case ir_txb: opcode = FS_OPCODE_TXB; break;
+ case ir_txl: opcode = SHADER_OPCODE_TXL; break;
+ case ir_txd: opcode = SHADER_OPCODE_TXD; break;
+ case ir_txf: opcode = SHADER_OPCODE_TXF; break;
+ case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
+ case ir_txs: opcode = SHADER_OPCODE_TXS; break;
+ case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
+ case ir_lod: opcode = SHADER_OPCODE_LOD; break;
case ir_tg4:
if (has_nonconstant_offset)
- inst = emit(SHADER_OPCODE_TG4_OFFSET, dst, payload);
+ opcode = SHADER_OPCODE_TG4_OFFSET;
else
- inst = emit(SHADER_OPCODE_TG4, dst, payload);
+ opcode = SHADER_OPCODE_TG4;
break;
}
+ fs_inst *inst = emit(opcode, dst, src_payload);
inst->base_mrf = -1;
if (reg_width == 2)
- inst->mlen = next.reg_offset * reg_width - header_present;
+ inst->mlen = length * reg_width - header_present;
else
- inst->mlen = next.reg_offset * reg_width;
+ inst->mlen = length * reg_width;
inst->header_present = header_present;
inst->regs_written = 4;
- virtual_grf_sizes[payload.reg] = next.reg_offset;
if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) {
fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE)
" disallowed by hardware\n");
return coordinate;
}
- scale_x = fs_reg(UNIFORM, uniforms);
- scale_y = fs_reg(UNIFORM, uniforms + 1);
-
GLuint index = _mesa_add_state_reference(params,
(gl_state_index *)tokens);
- stage_prog_data->param[uniforms++] =
- &prog->Parameters->ParameterValues[index][0].f;
- stage_prog_data->param[uniforms++] =
- &prog->Parameters->ParameterValues[index][1].f;
+ /* Try to find existing copies of the texrect scale uniforms. */
+ for (unsigned i = 0; i < uniforms; i++) {
+ if (stage_prog_data->param[i] ==
+ &prog->Parameters->ParameterValues[index][0].f) {
+ scale_x = fs_reg(UNIFORM, i);
+ scale_y = fs_reg(UNIFORM, i + 1);
+ break;
+ }
+ }
+
+ /* If we didn't already set them up, do so now. */
+ if (scale_x.file == BAD_FILE) {
+ scale_x = fs_reg(UNIFORM, uniforms);
+ scale_y = fs_reg(UNIFORM, uniforms + 1);
+
+ stage_prog_data->param[uniforms++] =
+ &prog->Parameters->ParameterValues[index][0].f;
+ stage_prog_data->param[uniforms++] =
+ &prog->Parameters->ParameterValues[index][1].f;
+ }
}
/* The 965 requires the EU to do the normalization of GL rectangle
fs_reg chan = coordinate;
chan.reg_offset += i;
- inst = emit(BRW_OPCODE_SEL, chan, chan, brw_imm_f(0.0));
+ inst = emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f));
inst->conditional_mod = BRW_CONDITIONAL_G;
/* Our parameter comes in as 1.0/width or 1.0/height,
fs_visitor::emit_mcs_fetch(ir_texture *ir, fs_reg coordinate, int sampler)
{
int reg_width = dispatch_width / 8;
- fs_reg payload = fs_reg(this, glsl_type::float_type);
+ int length = ir->coordinate->type->vector_elements;
+ fs_reg payload = fs_reg(GRF, virtual_grf_alloc(length),
+ BRW_REGISTER_TYPE_F);
fs_reg dest = fs_reg(this, glsl_type::uvec4_type);
- fs_reg next = payload;
+ fs_reg *sources = ralloc_array(mem_ctx, fs_reg, length);
- /* parameters are: u, v, r, lod; missing parameters are treated as zero */
- for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate));
+ /* parameters are: u, v, r; missing parameters are treated as zero */
+ for (int i = 0; i < length; i++) {
+ sources[i] = fs_reg(this, glsl_type::float_type);
+ emit(MOV(retype(sources[i], BRW_REGISTER_TYPE_D), coordinate));
coordinate.reg_offset++;
- next.reg_offset++;
}
+ emit(LOAD_PAYLOAD(payload, sources, length));
+
fs_inst *inst = emit(SHADER_OPCODE_TXF_MCS, dest, payload);
- virtual_grf_sizes[payload.reg] = next.reg_offset;
inst->base_mrf = -1;
- inst->mlen = next.reg_offset * reg_width;
+ inst->mlen = length * reg_width;
inst->header_present = false;
- inst->regs_written = 4 * reg_width; /* we only care about one reg of response,
- * but the sampler always writes 4/8
- */
+ inst->regs_written = 4; /* we only care about one reg of response,
+ * but the sampler always writes 4/8
+ */
inst->sampler = sampler;
return dest;
type->sampler_array) {
fs_reg depth = dst;
depth.reg_offset = 2;
- emit_math(SHADER_OPCODE_INT_QUOTIENT, depth, depth, fs_reg(6));
+ fs_reg fixed_depth = fs_reg(this, glsl_type::int_type);
+ emit_math(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6));
+
+ fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written);
+ fs_reg d = dst;
+ for (int i = 0; i < inst->regs_written; i++) {
+ if (i == 2) {
+ fixed_payload[i] = fixed_depth;
+ } else {
+ d.reg_offset = i;
+ fixed_payload[i] = d;
+ }
+ }
+ emit(LOAD_PAYLOAD(dst, fixed_payload, inst->regs_written));
}
}
}
}
} else if (ir->type->is_record()) {
- foreach_list(node, &ir->components) {
- ir_constant *const field = (ir_constant *) node;
+ foreach_in_list(ir_constant, field, &ir->components) {
const unsigned size = type_size(field->type);
field->accept(this);
emit(IF(BRW_PREDICATE_NORMAL));
}
- foreach_list(node, &ir->then_instructions) {
- ir_instruction *ir = (ir_instruction *)node;
- this->base_ir = ir;
-
- ir->accept(this);
+ foreach_in_list(ir_instruction, ir_, &ir->then_instructions) {
+ this->base_ir = ir_;
+ ir_->accept(this);
}
if (!ir->else_instructions.is_empty()) {
emit(BRW_OPCODE_ELSE);
- foreach_list(node, &ir->else_instructions) {
- ir_instruction *ir = (ir_instruction *)node;
- this->base_ir = ir;
-
- ir->accept(this);
+ foreach_in_list(ir_instruction, ir_, &ir->else_instructions) {
+ this->base_ir = ir_;
+ ir_->accept(this);
}
}
this->base_ir = NULL;
emit(BRW_OPCODE_DO);
- foreach_list(node, &ir->body_instructions) {
- ir_instruction *ir = (ir_instruction *)node;
-
- this->base_ir = ir;
- ir->accept(this);
+ foreach_in_list(ir_instruction, ir_, &ir->body_instructions) {
+ this->base_ir = ir_;
+ ir_->accept(this);
}
this->base_ir = NULL;
assert(sig);
- foreach_list(node, &sig->body) {
- ir_instruction *ir = (ir_instruction *)node;
- this->base_ir = ir;
-
- ir->accept(this);
+ foreach_in_list(ir_instruction, ir_, &sig->body) {
+ this->base_ir = ir_;
+ ir_->accept(this);
}
}
}
unsigned mlen = 0;
/* Initialize the sample mask in the message header. */
- emit(MOV(brw_uvec_mrf(8, mlen, 0), brw_imm_ud(0)))
+ emit(MOV(brw_uvec_mrf(8, mlen, 0), fs_reg(0u)))
->force_writemask_all = true;
if (fp->UsesKill) {
unsigned mlen = 0;
/* Initialize the sample mask in the message header. */
- emit(MOV(brw_uvec_mrf(8, mlen, 0), brw_imm_ud(0)))
+ emit(MOV(brw_uvec_mrf(8, mlen, 0), fs_reg(0u)))
->force_writemask_all = true;
if (fp->UsesKill) {
}
fs_visitor::fs_visitor(struct brw_context *brw,
- struct brw_wm_compile *c,
+ void *mem_ctx,
+ const struct brw_wm_prog_key *key,
+ struct brw_wm_prog_data *prog_data,
struct gl_shader_program *shader_prog,
struct gl_fragment_program *fp,
unsigned dispatch_width)
- : backend_visitor(brw, shader_prog, &fp->Base, &c->prog_data.base,
+ : backend_visitor(brw, shader_prog, &fp->Base, &prog_data->base,
MESA_SHADER_FRAGMENT),
- key(&c->key),
+ key(key), prog_data(prog_data),
dispatch_width(dispatch_width)
{
- this->c = c;
- this->prog_data = &c->prog_data;
this->fp = fp;
- this->mem_ctx = c;
+ this->mem_ctx = mem_ctx;
this->failed = false;
this->simd16_unsupported = false;
this->no16_msg = NULL;