static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
struct gl_shader *
-brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
+brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
{
struct brw_shader *shader;
}
struct gl_shader_program *
-brw_new_shader_program(GLcontext *ctx, GLuint name)
+brw_new_shader_program(struct gl_context *ctx, GLuint name)
{
struct brw_shader_program *prog;
prog = talloc_zero(NULL, struct brw_shader_program);
}
GLboolean
-brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
+brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader)
{
if (!_mesa_ir_compile_shader(ctx, shader))
return GL_FALSE;
}
GLboolean
-brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
+brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
{
struct intel_context *intel = intel_context(ctx);
- if (using_new_fs == -1)
- using_new_fs = getenv("INTEL_NEW_FS") != NULL;
+
+ if (using_new_fs == -1) {
+ if (intel->gen >= 6)
+ using_new_fs = 1;
+ else
+ using_new_fs = getenv("INTEL_NEW_FS") != NULL;
+ }
for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
assert(!"not reached: bad math opcode");
return NULL;
}
+
+ /* Can't do hstride == 0 args to gen6 math, so expand it out. We
+ * might be able to do better by doing execsize = 1 math and then
+ * expanding that result out, but we would need to be careful with
+ * masking.
+ */
+ if (intel->gen >= 6 && src.file == UNIFORM) {
+ fs_reg expanded = fs_reg(this, glsl_type::float_type);
+ emit(fs_inst(BRW_OPCODE_MOV, expanded, src));
+ src = expanded;
+ }
+
fs_inst *inst = emit(fs_inst(opcode, dst, src));
- inst->base_mrf = 2;
- inst->mlen = 1;
+ if (intel->gen < 6) {
+ inst->base_mrf = 2;
+ inst->mlen = 1;
+ }
return inst;
}
fs_inst *
fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
{
+ int base_mrf = 2;
+ fs_inst *inst;
+
assert(opcode == FS_OPCODE_POW);
- fs_inst *inst = emit(fs_inst(opcode, dst, src0, src1));
+ if (intel->gen >= 6) {
+ /* Can't do hstride == 0 args to gen6 math, so expand it out. */
+ if (src0.file == UNIFORM) {
+ fs_reg expanded = fs_reg(this, glsl_type::float_type);
+ emit(fs_inst(BRW_OPCODE_MOV, expanded, src0));
+ src0 = expanded;
+ }
- inst->base_mrf = 2;
- inst->mlen = 2;
+ if (src1.file == UNIFORM) {
+ fs_reg expanded = fs_reg(this, glsl_type::float_type);
+ emit(fs_inst(BRW_OPCODE_MOV, expanded, src1));
+ src1 = expanded;
+ }
+ inst = emit(fs_inst(opcode, dst, src0, src1));
+ } else {
+ emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1));
+ inst = emit(fs_inst(opcode, dst, src0, reg_null));
+
+ inst->base_mrf = base_mrf;
+ inst->mlen = 2;
+ }
return inst;
}
case ir_unop_i2f:
case ir_unop_b2f:
case ir_unop_b2i:
- emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
- break;
case ir_unop_f2i:
emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
break;
case ir_unop_i2b:
inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ inst = emit(fs_inst(BRW_OPCODE_AND, this->result,
+ this->result, fs_reg(1)));
+ break;
case ir_unop_trunc:
emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
void
fs_visitor::visit(ir_texture *ir)
{
+ int sampler;
fs_inst *inst = NULL;
ir->coordinate->accept(this);
/* Should be lowered by do_lower_texture_projection */
assert(!ir->projector);
+ sampler = _mesa_get_sampler_uniform_value(ir->sampler,
+ ctx->Shader.CurrentProgram,
+ &brw->fragment_program->Base);
+ sampler = c->fp->program.Base.SamplerUnits[sampler];
+
+ /* The 965 requires the EU to do the normalization of GL rectangle
+ * texture coordinates. We use the program parameter state
+ * tracking to get the scaling factor.
+ */
+ if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
+ struct gl_program_parameter_list *params = c->fp->program.Base.Parameters;
+ int tokens[STATE_LENGTH] = {
+ STATE_INTERNAL,
+ STATE_TEXRECT_SCALE,
+ sampler,
+ 0,
+ 0
+ };
+
+ fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params);
+ fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1);
+ GLuint index = _mesa_add_state_reference(params,
+ (gl_state_index *)tokens);
+ float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
+
+ c->prog_data.param[c->prog_data.nr_params++] = &vec_values[0];
+ c->prog_data.param[c->prog_data.nr_params++] = &vec_values[1];
+
+ fs_reg dst = fs_reg(this, ir->coordinate->type);
+ fs_reg src = coordinate;
+ coordinate = dst;
+
+ emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_x));
+ dst.reg_offset++;
+ src.reg_offset++;
+ emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_y));
+ }
+
/* Writemasking doesn't eliminate channels on SIMD8 texture
* samples, so don't worry about them.
*/
inst = emit_texture_gen5(ir, dst, coordinate);
}
- inst->sampler =
- _mesa_get_sampler_uniform_value(ir->sampler,
- ctx->Shader.CurrentProgram,
- &brw->fragment_program->Base);
- inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler];
+ inst->sampler = sampler;
this->result = dst;
/* If the pixel centers end up used, the setup is the same as for gen4. */
this->current_annotation = "compute pixel centers";
- this->pixel_x = fs_reg(this, glsl_type::uint_type);
- this->pixel_y = fs_reg(this, glsl_type::uint_type);
- this->pixel_x.type = BRW_REGISTER_TYPE_UW;
- this->pixel_y.type = BRW_REGISTER_TYPE_UW;
+ fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type);
+ fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type);
+ int_pixel_x.type = BRW_REGISTER_TYPE_UW;
+ int_pixel_y.type = BRW_REGISTER_TYPE_UW;
emit(fs_inst(BRW_OPCODE_ADD,
- this->pixel_x,
+ int_pixel_x,
fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
fs_reg(brw_imm_v(0x10101010))));
emit(fs_inst(BRW_OPCODE_ADD,
- this->pixel_y,
+ int_pixel_y,
fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
fs_reg(brw_imm_v(0x11001100))));
+ /* As of gen6, we can no longer mix float and int sources. We have
+ * to turn the integer pixel centers into floats for their actual
+ * use.
+ */
+ this->pixel_x = fs_reg(this, glsl_type::float_type);
+ this->pixel_y = fs_reg(this, glsl_type::float_type);
+ emit(fs_inst(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x));
+ emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y));
+
this->current_annotation = "compute 1/pos.w";
this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0));
this->pixel_w = fs_reg(this, glsl_type::float_type);
break;
}
- assert(inst->mlen >= 1);
+ if (intel->gen >= 6) {
+ assert(inst->mlen == 0);
- if (inst->opcode == FS_OPCODE_POW) {
- brw_MOV(p, brw_message_reg(inst->base_mrf + 1), src[1]);
- }
+ if (inst->opcode == FS_OPCODE_POW) {
+ brw_math2(p, dst, op, src[0], src[1]);
+ } else {
+ brw_math(p, dst,
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ 0, src[0],
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+ }
+ } else {
+ assert(inst->mlen >= 1);
- brw_math(p, dst,
- op,
- inst->saturate ? BRW_MATH_SATURATE_SATURATE :
- BRW_MATH_SATURATE_NONE,
- inst->base_mrf, src[0],
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
+ brw_math(p, dst,
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf, src[0],
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+ }
}
void
return progress;
}
+
+bool
+fs_visitor::compute_to_mrf()
+{
+ bool progress = false;
+ int next_ip = 0;
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ fs_inst *inst = (fs_inst *)iter.get();
+
+ int ip = next_ip;
+ next_ip++;
+
+ if (inst->opcode != BRW_OPCODE_MOV ||
+ inst->predicated ||
+ inst->dst.file != MRF || inst->src[0].file != GRF ||
+ inst->dst.type != inst->src[0].type ||
+ inst->src[0].abs || inst->src[0].negate)
+ continue;
+
+ /* Can't compute-to-MRF this GRF if someone else was going to
+ * read it later.
+ */
+ if (this->virtual_grf_use[inst->src[0].reg] > ip)
+ continue;
+
+ /* Found a move of a GRF to a MRF. Let's see if we can go
+ * rewrite the thing that made this GRF to write into the MRF.
+ */
+ bool found = false;
+ fs_inst *scan_inst;
+ for (scan_inst = (fs_inst *)inst->prev;
+ scan_inst->prev != NULL;
+ scan_inst = (fs_inst *)scan_inst->prev) {
+ /* We don't handle flow control here. Most computation of
+ * values that end up in MRFs are shortly before the MRF
+ * write anyway.
+ */
+ if (scan_inst->opcode == BRW_OPCODE_DO ||
+ scan_inst->opcode == BRW_OPCODE_WHILE ||
+ scan_inst->opcode == BRW_OPCODE_ENDIF) {
+ break;
+ }
+
+ /* You can't read from an MRF, so if someone else reads our
+ * MRF's source GRF that we wanted to rewrite, that stops us.
+ */
+ bool interfered = false;
+ for (int i = 0; i < 3; i++) {
+ if (scan_inst->src[i].file == GRF &&
+ scan_inst->src[i].reg == inst->src[0].reg &&
+ scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
+ interfered = true;
+ }
+ }
+ if (interfered)
+ break;
+
+ if (scan_inst->dst.file == MRF &&
+ scan_inst->dst.hw_reg == inst->dst.hw_reg) {
+ /* Somebody else wrote our MRF here, so we can't can't
+ * compute-to-MRF before that.
+ */
+ break;
+ }
+
+ if (scan_inst->mlen > 0) {
+ /* Found a SEND instruction, which will do some amount of
+ * implied write that may overwrite our MRF that we were
+ * hoping to compute-to-MRF somewhere above it. Nothing
+ * we have implied-writes more than 2 MRFs from base_mrf,
+ * though.
+ */
+ int implied_write_len = MIN2(scan_inst->mlen, 2);
+ if (inst->dst.hw_reg >= scan_inst->base_mrf &&
+ inst->dst.hw_reg < scan_inst->base_mrf + implied_write_len) {
+ break;
+ }
+ }
+
+ if (scan_inst->dst.file == GRF &&
+ scan_inst->dst.reg == inst->src[0].reg) {
+ /* Found the last thing to write our reg we want to turn
+ * into a compute-to-MRF.
+ */
+
+ if (scan_inst->opcode == FS_OPCODE_TEX) {
+ /* texturing writes several continuous regs, so we can't
+ * compute-to-mrf that.
+ */
+ break;
+ }
+
+ /* If it's predicated, it (probably) didn't populate all
+ * the channels.
+ */
+ if (scan_inst->predicated)
+ break;
+
+ /* SEND instructions can't have MRF as a destination. */
+ if (scan_inst->mlen)
+ break;
+
+ if (intel->gen >= 6) {
+ /* gen6 math instructions must have the destination be
+ * GRF, so no compute-to-MRF for them.
+ */
+ if (scan_inst->opcode == FS_OPCODE_RCP ||
+ scan_inst->opcode == FS_OPCODE_RSQ ||
+ scan_inst->opcode == FS_OPCODE_SQRT ||
+ scan_inst->opcode == FS_OPCODE_EXP2 ||
+ scan_inst->opcode == FS_OPCODE_LOG2 ||
+ scan_inst->opcode == FS_OPCODE_SIN ||
+ scan_inst->opcode == FS_OPCODE_COS ||
+ scan_inst->opcode == FS_OPCODE_POW) {
+ break;
+ }
+ }
+
+ if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
+ /* Found the creator of our MRF's source value. */
+ found = true;
+ break;
+ }
+ }
+ }
+ if (found) {
+ scan_inst->dst.file = MRF;
+ scan_inst->dst.hw_reg = inst->dst.hw_reg;
+ scan_inst->saturate |= inst->saturate;
+ inst->remove();
+ progress = true;
+ }
+ }
+
+ return progress;
+}
+
bool
fs_visitor::virtual_grf_interferes(int a, int b)
{
{
struct brw_compile *p = &c->func;
struct intel_context *intel = &brw->intel;
- GLcontext *ctx = &intel->ctx;
+ struct gl_context *ctx = &intel->ctx;
struct brw_shader *shader = NULL;
struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
v.calculate_live_intervals();
progress = v.propagate_constants() || progress;
progress = v.register_coalesce() || progress;
+ progress = v.compute_to_mrf() || progress;
progress = v.dead_code_eliminate() || progress;
} while (progress);