}
#include "brw_shader.h"
#include "brw_fs.h"
-#include "../glsl/glsl_types.h"
-#include "../glsl/ir_print_visitor.h"
+#include "glsl/glsl_types.h"
+#include "glsl/ir_print_visitor.h"
#define MAX_INSTRUCTION (1 << 30)
return 0;
switch (inst->opcode) {
- case FS_OPCODE_RCP:
- case FS_OPCODE_RSQ:
- case FS_OPCODE_SQRT:
- case FS_OPCODE_EXP2:
- case FS_OPCODE_LOG2:
- case FS_OPCODE_SIN:
- case FS_OPCODE_COS:
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
return 1 * c->dispatch_width / 8;
- case FS_OPCODE_POW:
+ case SHADER_OPCODE_POW:
+ case SHADER_OPCODE_INT_QUOTIENT:
+ case SHADER_OPCODE_INT_REMAINDER:
return 2 * c->dispatch_width / 8;
- case FS_OPCODE_TEX:
+ case SHADER_OPCODE_TEX:
case FS_OPCODE_TXB:
- case FS_OPCODE_TXD:
- case FS_OPCODE_TXL:
+ case SHADER_OPCODE_TXD:
+ case SHADER_OPCODE_TXF:
+ case SHADER_OPCODE_TXL:
+ case SHADER_OPCODE_TXS:
return 1;
case FS_OPCODE_FB_WRITE:
return 2;
virtual_grf_array_size *= 2;
virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
virtual_grf_array_size);
-
- /* This slot is always unused. */
- virtual_grf_sizes[0] = 0;
}
virtual_grf_sizes[virtual_grf_next] = size;
return virtual_grf_next++;
}
/** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int hw_reg)
+fs_reg::fs_reg(enum register_file file, int reg)
{
init();
this->file = file;
- this->hw_reg = hw_reg;
+ this->reg = reg;
this->type = BRW_REGISTER_TYPE_F;
}
/** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type)
+fs_reg::fs_reg(enum register_file file, int reg, uint32_t type)
{
init();
this->file = file;
- this->hw_reg = hw_reg;
+ this->reg = reg;
this->type = type;
}
* This brings in those uniform definitions
*/
void
-fs_visitor::import_uniforms(struct hash_table *src_variable_ht)
+fs_visitor::import_uniforms(fs_visitor *v)
{
- hash_table_call_foreach(src_variable_ht,
+ hash_table_call_foreach(v->variable_ht,
import_uniforms_callback,
variable_ht);
+ this->params_remap = v->params_remap;
}
/* Our support for uniforms is piggy-backed on the struct
assert(param < ARRAY_SIZE(c->prog_data.param));
- switch (type->base_type) {
- case GLSL_TYPE_FLOAT:
- c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
- break;
- case GLSL_TYPE_UINT:
- c->prog_data.param_convert[param] = PARAM_CONVERT_F2U;
- break;
- case GLSL_TYPE_INT:
- c->prog_data.param_convert[param] = PARAM_CONVERT_F2I;
- break;
- case GLSL_TYPE_BOOL:
- c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
- break;
- default:
- assert(!"not reached");
+ if (ctx->Const.NativeIntegers) {
c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
- break;
+ } else {
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
+ break;
+ case GLSL_TYPE_UINT:
+ c->prog_data.param_convert[param] = PARAM_CONVERT_F2U;
+ break;
+ case GLSL_TYPE_INT:
+ c->prog_data.param_convert[param] = PARAM_CONVERT_F2I;
+ break;
+ case GLSL_TYPE_BOOL:
+ c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
+ break;
+ default:
+ assert(!"not reached");
+ c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
+ break;
+ }
}
this->param_index[param] = loc;
this->param_offset[param] = i;
emit(BRW_OPCODE_MOV, wpos,
fs_reg(brw_vec8_grf(c->source_depth_reg, 0)));
} else {
- emit(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
- interp_reg(FRAG_ATTRIB_WPOS, 2));
+ emit(FS_OPCODE_LINTERP, wpos,
+ this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
+ this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
+ interp_reg(FRAG_ATTRIB_WPOS, 2));
}
wpos.reg_offset++;
fs_visitor::emit_general_interpolation(ir_variable *ir)
{
fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
- /* Interpolation is always in floating point regs. */
- reg->type = BRW_REGISTER_TYPE_F;
+ reg->type = brw_type_for_base_type(ir->type->get_scalar_type());
fs_reg attr = *reg;
unsigned int array_elements;
type = ir->type;
}
+ glsl_interp_qualifier interpolation_mode =
+ ir->determine_interpolation_mode(c->key.flat_shade);
+
int location = ir->location;
for (unsigned int i = 0; i < array_elements; i++) {
for (unsigned int j = 0; j < type->matrix_columns; j++) {
continue;
}
- bool is_gl_Color =
- location == FRAG_ATTRIB_COL0 || location == FRAG_ATTRIB_COL1;
-
- if (c->key.flat_shade && is_gl_Color) {
+ if (interpolation_mode == INTERP_QUALIFIER_FLAT) {
/* Constant interpolation (flat shading) case. The SF has
* handed us defined values in only the constant offset
* field of the setup reg.
for (unsigned int k = 0; k < type->vector_elements; k++) {
struct brw_reg interp = interp_reg(location, k);
interp = suboffset(interp, 3);
+ interp.type = reg->type;
emit(FS_OPCODE_CINTERP, attr, fs_reg(interp));
attr.reg_offset++;
}
} else {
- /* Perspective interpolation case. */
+ /* Smooth/noperspective interpolation case. */
for (unsigned int k = 0; k < type->vector_elements; k++) {
- struct brw_reg interp = interp_reg(location, k);
- emit(FS_OPCODE_LINTERP, attr,
- this->delta_x, this->delta_y, fs_reg(interp));
+ /* FINISHME: At some point we probably want to push
+ * this farther by giving similar treatment to the
+ * other potentially constant components of the
+ * attribute, as well as making brw_vs_constval.c
+ * handle varyings other than gl_TexCoord.
+ */
+ if (location >= FRAG_ATTRIB_TEX0 &&
+ location <= FRAG_ATTRIB_TEX7 &&
+ k == 3 && !(c->key.proj_attrib_mask & (1 << location))) {
+ emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f));
+ } else {
+ struct brw_reg interp = interp_reg(location, k);
+ brw_wm_barycentric_interp_mode barycoord_mode;
+ if (interpolation_mode == INTERP_QUALIFIER_SMOOTH)
+ barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
+ else
+ barycoord_mode = BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
+ emit(FS_OPCODE_LINTERP, attr,
+ this->delta_x[barycoord_mode],
+ this->delta_y[barycoord_mode], fs_reg(interp));
+ }
attr.reg_offset++;
}
}
fs_inst *
-fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
+fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)
{
switch (opcode) {
- case FS_OPCODE_RCP:
- case FS_OPCODE_RSQ:
- case FS_OPCODE_SQRT:
- case FS_OPCODE_EXP2:
- case FS_OPCODE_LOG2:
- case FS_OPCODE_SIN:
- case FS_OPCODE_COS:
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
break;
default:
assert(!"not reached: bad math opcode");
* expanding that result out, but we would need to be careful with
* masking.
*
- * The hardware ignores source modifiers (negate and abs) on math
+ * Gen 6 hardware ignores source modifiers (negate and abs) on math
* instructions, so we also move to a temp to set those up.
*/
- if (intel->gen >= 6 && (src.file == UNIFORM ||
+ if (intel->gen == 6 && (src.file == UNIFORM ||
src.abs ||
src.negate)) {
fs_reg expanded = fs_reg(this, glsl_type::float_type);
}
fs_inst *
-fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
{
int base_mrf = 2;
fs_inst *inst;
- assert(opcode == FS_OPCODE_POW);
+ switch (opcode) {
+ case SHADER_OPCODE_POW:
+ case SHADER_OPCODE_INT_QUOTIENT:
+ case SHADER_OPCODE_INT_REMAINDER:
+ break;
+ default:
+ assert(!"not reached: unsupported binary math opcode.");
+ return NULL;
+ }
- if (intel->gen >= 6) {
+ if (intel->gen >= 7) {
+ inst = emit(opcode, dst, src0, src1);
+ } else if (intel->gen == 6) {
/* Can't do hstride == 0 args to gen6 math, so expand it out.
*
* The hardware ignores source modifiers (negate and abs) on math
*/
if (src0.file == UNIFORM || src0.abs || src0.negate) {
fs_reg expanded = fs_reg(this, glsl_type::float_type);
+ expanded.type = src0.type;
emit(BRW_OPCODE_MOV, expanded, src0);
src0 = expanded;
}
if (src1.file == UNIFORM || src1.abs || src1.negate) {
fs_reg expanded = fs_reg(this, glsl_type::float_type);
+ expanded.type = src1.type;
emit(BRW_OPCODE_MOV, expanded, src1);
src1 = expanded;
}
inst = emit(opcode, dst, src0, src1);
} else {
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1);
- inst = emit(opcode, dst, src0, reg_null_f);
+ /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
+ * "Message Payload":
+ *
+ * "Operand0[7]. For the INT DIV functions, this operand is the
+ * denominator."
+ * ...
+ * "Operand1[7]. For the INT DIV functions, this operand is the
+ * numerator."
+ */
+ bool is_int_div = opcode != SHADER_OPCODE_POW;
+ fs_reg &op0 = is_int_div ? src1 : src0;
+ fs_reg &op1 = is_int_div ? src0 : src1;
+
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1, op1.type), op1);
+ inst = emit(opcode, dst, op0, reg_null_f);
inst->base_mrf = base_mrf;
inst->mlen = 2 * c->dispatch_width / 8;
/* Set up the pointers to ParamValues now that that array is finalized. */
for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
c->prog_data.param[i] =
- fp->Base.Parameters->ParameterValues[this->param_index[i]] +
+ (const float *)fp->Base.Parameters->ParameterValues[this->param_index[i]] +
this->param_offset[i];
}
}
}
/* Map the offsets in the UNIFORM file to fixed HW regs. */
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == UNIFORM) {
- int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+ int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs +
constant_nr / 8,
constant_nr % 8);
/* FINISHME: The sf doesn't map VS->FS inputs for us very well. */
for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) {
if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
- int fp_index;
-
- if (i >= VERT_RESULT_VAR0)
- fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
- else if (i <= VERT_RESULT_TEX7)
- fp_index = i;
- else
- fp_index = -1;
+ int fp_index = _mesa_vert_result_to_frag_attrib((gl_vert_result) i);
if (fp_index >= 0)
urb_setup[fp_index] = urb_next++;
/* Offset all the urb_setup[] index by the actual position of the
* setup regs, now that the location of the constants has been chosen.
*/
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
if (inst->opcode == FS_OPCODE_LINTERP) {
assert(inst->src[2].file == FIXED_HW_REG);
split_grf[i] = false;
}
- if (brw->has_pln) {
- /* PLN opcodes rely on the delta_xy being contiguous. */
- split_grf[this->delta_x.reg] = false;
+ if (brw->has_pln &&
+ this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == GRF) {
+ /* PLN opcodes rely on the delta_xy being contiguous. We only have to
+ * check this for BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC, because prior to
+ * Gen6, that was the only supported interpolation mode, and since Gen6,
+ * delta_x and delta_y are in fixed hardware registers.
+ */
+ split_grf[this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg] =
+ false;
}
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
/* Texturing produces 4 contiguous registers, so no splitting. */
if (inst->is_tex()) {
}
}
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
if (inst->dst.file == GRF &&
split_grf[inst->dst.reg] &&
this->live_intervals_valid = false;
}
+bool
+fs_visitor::remove_dead_constants()
+{
+ if (c->dispatch_width == 8) {
+ this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params);
+
+ for (unsigned int i = 0; i < c->prog_data.nr_params; i++)
+ this->params_remap[i] = -1;
+
+ /* Find which params are still in use. */
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+
+ for (int i = 0; i < 3; i++) {
+ int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
+
+ if (inst->src[i].file != UNIFORM)
+ continue;
+
+ assert(constant_nr < (int)c->prog_data.nr_params);
+
+ /* For now, set this to non-negative. We'll give it the
+ * actual new number in a moment, in order to keep the
+ * register numbers nicely ordered.
+ */
+ this->params_remap[constant_nr] = 0;
+ }
+ }
+
+ /* Figure out what the new numbers for the params will be. At some
+ * point when we're doing uniform array access, we're going to want
+ * to keep the distinction between .reg and .reg_offset, but for
+ * now we don't care.
+ */
+ unsigned int new_nr_params = 0;
+ for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+ if (this->params_remap[i] != -1) {
+ this->params_remap[i] = new_nr_params++;
+ }
+ }
+
+ /* Update the list of params to be uploaded to match our new numbering. */
+ for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+ int remapped = this->params_remap[i];
+
+ if (remapped == -1)
+ continue;
+
+ /* We've already done setup_paramvalues_refs() so no need to worry
+ * about param_index and param_offset.
+ */
+ c->prog_data.param[remapped] = c->prog_data.param[i];
+ c->prog_data.param_convert[remapped] = c->prog_data.param_convert[i];
+ }
+
+ c->prog_data.nr_params = new_nr_params;
+ } else {
+ /* This should have been generated in the 8-wide pass already. */
+ assert(this->params_remap);
+ }
+
+ /* Now do the renumbering of the shader to remove unused params. */
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+
+ for (int i = 0; i < 3; i++) {
+ int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
+
+ if (inst->src[i].file != UNIFORM)
+ continue;
+
+ assert(this->params_remap[constant_nr] != -1);
+ inst->src[i].reg = this->params_remap[constant_nr];
+ inst->src[i].reg_offset = 0;
+ }
+ }
+
+ return true;
+}
+
/**
* Choose accesses from the UNIFORM file to demote to using the pull
* constant buffer.
int pull_uniform_base = max_uniform_components;
int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base;
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
for (int i = 0; i < 3; i++) {
if (inst->src[i].file != UNIFORM)
continue;
- int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+ int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset;
if (uniform_nr < pull_uniform_base)
continue;
}
int ip = 0;
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
if (inst->opcode == BRW_OPCODE_DO) {
if (loop_depth++ == 0)
}
} else {
for (unsigned int i = 0; i < 3; i++) {
- if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
+ if (inst->src[i].file == GRF) {
int reg = inst->src[i].reg;
if (!loop_depth) {
}
}
}
- if (inst->dst.file == GRF && inst->dst.reg != 0) {
+ if (inst->dst.file == GRF) {
int reg = inst->dst.reg;
if (!loop_depth) {
calculate_live_intervals();
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
if (inst->opcode != BRW_OPCODE_MOV ||
inst->predicated ||
/* Found a move of a constant to a GRF. Find anything else using the GRF
* before it's written, and replace it with the constant if we can.
*/
- exec_list_iterator scan_iter = iter;
- scan_iter.next();
- for (; scan_iter.has_next(); scan_iter.next()) {
- fs_inst *scan_inst = (fs_inst *)scan_iter.get();
-
+ for (fs_inst *scan_inst = (fs_inst *)inst->next;
+ !scan_inst->is_tail_sentinel();
+ scan_inst = (fs_inst *)scan_inst->next) {
if (scan_inst->opcode == BRW_OPCODE_DO ||
scan_inst->opcode == BRW_OPCODE_WHILE ||
scan_inst->opcode == BRW_OPCODE_ELSE ||
scan_inst->src[i] = inst->src[0];
progress = true;
} else if (i == 0 && scan_inst->src[1].file != IMM) {
- /* Fit this constant in by commuting the operands */
+ /* Fit this constant in by commuting the operands.
+ * Exception: we can't do this for 32-bit integer MUL
+ * because it's asymmetric.
+ */
+ if (scan_inst->opcode == BRW_OPCODE_MUL &&
+ (scan_inst->src[1].type == BRW_REGISTER_TYPE_D ||
+ scan_inst->src[1].type == BRW_REGISTER_TYPE_UD))
+ break;
scan_inst->src[0] = scan_inst->src[1];
scan_inst->src[1] = inst->src[0];
progress = true;
break;
case BRW_OPCODE_CMP:
+ case BRW_OPCODE_IF:
if (i == 1) {
scan_inst->src[i] = inst->src[0];
progress = true;
progress = true;
}
break;
+
+ case SHADER_OPCODE_RCP:
+ /* The hardware doesn't do math on immediate values
+ * (because why are you doing that, seriously?), but
+ * the correct answer is to just constant fold it
+ * anyway.
+ */
+ assert(i == 0);
+ if (inst->src[0].imm.f != 0.0f) {
+ scan_inst->opcode = BRW_OPCODE_MOV;
+ scan_inst->src[0] = inst->src[0];
+ scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f;
+ progress = true;
+ }
+ break;
+
+ default:
+ break;
}
}
return progress;
}
+
+
+/**
+ * Attempts to move immediate constants into the immediate
+ * constant slot of following instructions.
+ *
+ * Immediate constants are a bit tricky -- they have to be in the last
+ * operand slot, you can't do abs/negate on them,
+ */
+
+bool
+fs_visitor::opt_algebraic()
+{
+ bool progress = false;
+
+ calculate_live_intervals();
+
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+
+ switch (inst->opcode) {
+ case BRW_OPCODE_MUL:
+ if (inst->src[1].file != IMM)
+ continue;
+
+ /* a * 1.0 = a */
+ if (inst->src[1].type == BRW_REGISTER_TYPE_F &&
+ inst->src[1].imm.f == 1.0) {
+ inst->opcode = BRW_OPCODE_MOV;
+ inst->src[1] = reg_undef;
+ progress = true;
+ break;
+ }
+
+ break;
+ default:
+ break;
+ }
+ }
+
+ return progress;
+}
+
/**
* Must be called after calculate_live_intervales() to remove unused
* writes to registers -- register allocation will fail otherwise
calculate_live_intervals();
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list_safe(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
inst->remove();
int if_depth = 0;
int loop_depth = 0;
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list_safe(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
/* Make sure that we dominate the instructions we're going to
* scan for interfering with our coalescing, or we won't have
case BRW_OPCODE_ENDIF:
if_depth--;
break;
+ default:
+ break;
}
if (loop_depth || if_depth)
continue;
if (inst->opcode != BRW_OPCODE_MOV ||
inst->predicated ||
inst->saturate ||
- inst->dst.file != GRF || inst->src[0].file != GRF ||
+ inst->dst.file != GRF || (inst->src[0].file != GRF &&
+ inst->src[0].file != UNIFORM)||
inst->dst.type != inst->src[0].type)
continue;
* program.
*/
bool interfered = false;
- exec_list_iterator scan_iter = iter;
- scan_iter.next();
- for (; scan_iter.has_next(); scan_iter.next()) {
- fs_inst *scan_inst = (fs_inst *)scan_iter.get();
+ for (fs_inst *scan_inst = (fs_inst *)inst->next;
+ !scan_inst->is_tail_sentinel();
+ scan_inst = (fs_inst *)scan_inst->next) {
if (scan_inst->dst.file == GRF) {
if (scan_inst->dst.reg == inst->dst.reg &&
(scan_inst->dst.reg_offset == inst->dst.reg_offset ||
interfered = true;
break;
}
- if (scan_inst->dst.reg == inst->src[0].reg &&
+ if (inst->src[0].file == GRF &&
+ scan_inst->dst.reg == inst->src[0].reg &&
(scan_inst->dst.reg_offset == inst->src[0].reg_offset ||
scan_inst->is_tex())) {
interfered = true;
}
}
- /* The gen6 MATH instruction can't handle source modifiers, so avoid
- * coalescing those for now. We should do something more specific.
+ /* The gen6 MATH instruction can't handle source modifiers or
+ * unusual register regions, so avoid coalescing those for
+ * now. We should do something more specific.
+ */
+ if (intel->gen >= 6 &&
+ scan_inst->is_math() &&
+ (has_source_modifiers || inst->src[0].file == UNIFORM)) {
+ interfered = true;
+ break;
+ }
+
+ /* The accumulator result appears to get used for the
+ * conditional modifier generation. When negating a UD
+ * value, there is a 33rd bit generated for the sign in the
+ * accumulator value, so now you can't check, for example,
+ * equality with a 32-bit value. See piglit fs-op-neg-uint.
*/
- if (intel->gen >= 6 && scan_inst->is_math() && has_source_modifiers) {
+ if (scan_inst->conditional_mod &&
+ inst->src[0].negate &&
+ inst->src[0].type == BRW_REGISTER_TYPE_UD) {
interfered = true;
break;
}
/* Rewrite the later usage to point at the source of the move to
* be removed.
*/
- for (exec_list_iterator scan_iter = iter; scan_iter.has_next();
- scan_iter.next()) {
- fs_inst *scan_inst = (fs_inst *)scan_iter.get();
-
+ for (fs_inst *scan_inst = inst;
+ !scan_inst->is_tail_sentinel();
+ scan_inst = (fs_inst *)scan_inst->next) {
for (int i = 0; i < 3; i++) {
if (scan_inst->src[i].file == GRF &&
scan_inst->src[i].reg == inst->dst.reg &&
scan_inst->src[i].reg_offset == inst->dst.reg_offset) {
- scan_inst->src[i].reg = inst->src[0].reg;
- scan_inst->src[i].reg_offset = inst->src[0].reg_offset;
- scan_inst->src[i].abs |= inst->src[0].abs;
- scan_inst->src[i].negate ^= inst->src[0].negate;
- scan_inst->src[i].smear = inst->src[0].smear;
+ fs_reg new_src = inst->src[0];
+ if (scan_inst->src[i].abs) {
+ new_src.negate = 0;
+ new_src.abs = 1;
+ }
+ new_src.negate ^= scan_inst->src[i].negate;
+ scan_inst->src[i] = new_src;
}
}
}
calculate_live_intervals();
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list_safe(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
int ip = next_ip;
next_ip++;
/* Work out which hardware MRF registers are written by this
* instruction.
*/
- int mrf_low = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+ int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4;
int mrf_high;
- if (inst->dst.hw_reg & BRW_MRF_COMPR4) {
+ if (inst->dst.reg & BRW_MRF_COMPR4) {
mrf_high = mrf_low + 4;
} else if (c->dispatch_width == 16 &&
(!inst->force_uncompressed && !inst->force_sechalf)) {
if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
/* Found the creator of our MRF's source value. */
scan_inst->dst.file = MRF;
- scan_inst->dst.hw_reg = inst->dst.hw_reg;
+ scan_inst->dst.reg = inst->dst.reg;
scan_inst->saturate |= inst->saturate;
inst->remove();
progress = true;
/* If somebody else writes our MRF here, we can't
* compute-to-MRF before that.
*/
- int scan_mrf_low = scan_inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+ int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4;
int scan_mrf_high;
- if (scan_inst->dst.hw_reg & BRW_MRF_COMPR4) {
+ if (scan_inst->dst.reg & BRW_MRF_COMPR4) {
scan_mrf_high = scan_mrf_low + 4;
} else if (c->dispatch_width == 16 &&
(!scan_inst->force_uncompressed &&
memset(last_mrf_move, 0, sizeof(last_mrf_move));
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list_safe(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
switch (inst->opcode) {
case BRW_OPCODE_DO:
if (inst->opcode == BRW_OPCODE_MOV &&
inst->dst.file == MRF) {
- fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg];
+ fs_inst *prev_inst = last_mrf_move[inst->dst.reg];
if (prev_inst && inst->equals(prev_inst)) {
inst->remove();
progress = true;
/* Clear out the last-write records for MRFs that were overwritten. */
if (inst->dst.file == MRF) {
- last_mrf_move[inst->dst.hw_reg] = NULL;
+ last_mrf_move[inst->dst.reg] = NULL;
}
if (inst->mlen > 0) {
inst->dst.file == MRF &&
inst->src[0].file == GRF &&
!inst->predicated) {
- last_mrf_move[inst->dst.hw_reg] = inst;
+ last_mrf_move[inst->dst.reg] = inst;
}
}
/* Generate FS IR for main(). (the visitor only descends into
* functions called "main").
*/
- foreach_iter(exec_list_iterator, iter, *shader->ir) {
- ir_instruction *ir = (ir_instruction *)iter.get();
+ foreach_list(node, &*shader->ir) {
+ ir_instruction *ir = (ir_instruction *)node;
base_ir = ir;
this->result = reg_undef;
ir->accept(this);
progress = remove_duplicate_mrf_writes() || progress;
progress = propagate_constants() || progress;
+ progress = opt_algebraic() || progress;
progress = register_coalesce() || progress;
progress = compute_to_mrf() || progress;
progress = dead_code_eliminate() || progress;
} while (progress);
+ remove_dead_constants();
+
schedule_instructions();
assign_curb_setup();
if (0) {
/* Debug of register spilling: Go spill everything. */
int virtual_grf_count = virtual_grf_next;
- for (int i = 1; i < virtual_grf_count; i++) {
+ for (int i = 0; i < virtual_grf_count; i++) {
spill_reg(i);
}
}
/* Make sure we didn't try to sneak in an extra uniform */
assert(orig_nr_params == c->prog_data.nr_params);
+ (void) orig_nr_params;
}
return !failed;
fs_visitor v(c, prog, shader);
if (!v.run()) {
- prog->LinkStatus = GL_FALSE;
- prog->InfoLog = ralloc_strdup(prog, v.fail_msg);
+ prog->LinkStatus = false;
+ ralloc_strcat(&prog->InfoLog, v.fail_msg);
return false;
}
if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) {
c->dispatch_width = 16;
fs_visitor v2(c, prog, shader);
- v2.import_uniforms(v.variable_ht);
+ v2.import_uniforms(&v);
v2.run();
}
{
struct brw_context *brw = brw_context(ctx);
struct brw_wm_prog_key key;
- struct gl_fragment_program *fp = prog->FragmentProgram;
- struct brw_fragment_program *bfp = brw_fragment_program(fp);
- if (!fp)
+ if (!prog->_LinkedShaders[MESA_SHADER_FRAGMENT])
return true;
+ struct gl_fragment_program *fp = (struct gl_fragment_program *)
+ prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program;
+ struct brw_fragment_program *bfp = brw_fragment_program(fp);
+
memset(&key, 0, sizeof(key));
if (fp->UsesKill)
key.vp_outputs_written |= BITFIELD64_BIT(FRAG_ATTRIB_WPOS);
for (int i = 0; i < FRAG_ATTRIB_MAX; i++) {
- int vp_index = -1;
-
if (!(fp->Base.InputsRead & BITFIELD64_BIT(i)))
continue;
key.proj_attrib_mask |= 1 << i;
- if (i <= FRAG_ATTRIB_TEX7)
- vp_index = i;
- else if (i >= FRAG_ATTRIB_VAR0)
- vp_index = i - FRAG_ATTRIB_VAR0 + VERT_RESULT_VAR0;
+ int vp_index = _mesa_vert_result_to_frag_attrib((gl_vert_result) i);
if (vp_index >= 0)
key.vp_outputs_written |= BITFIELD64_BIT(vp_index);
for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) {
if (fp->Base.ShadowSamplers & (1 << i))
- key.compare_funcs[i] = GL_LESS;
+ key.tex.compare_funcs[i] = GL_LESS;
/* FINISHME: depth compares might use (0,0,0,W) for example */
- key.tex_swizzles[i] = SWIZZLE_XYZW;
+ key.tex.swizzles[i] = SWIZZLE_XYZW;
}
if (fp->Base.InputsRead & FRAG_BIT_WPOS) {