GLboolean
brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
{
+ struct intel_context *intel = intel_context(ctx);
if (using_new_fs == -1)
using_new_fs = getenv("INTEL_NEW_FS") != NULL;
do_sub_to_add_neg(shader->ir);
do_explog_to_explog2(shader->ir);
do_lower_texture_projection(shader->ir);
+ brw_do_cubemap_normalize(shader->ir);
do {
progress = false;
GL_TRUE, /* temp */
GL_TRUE /* uniform */
) || progress;
+ if (intel->gen == 6) {
+ progress = do_if_to_cond_assign(shader->ir) || progress;
+ }
} while (progress);
validate_ir_tree(shader->ir);
this->sampler = 0;
this->target = 0;
this->eot = false;
+ this->header_present = false;
this->shadow_compare = false;
}
int sampler;
int target; /**< MRT target. */
bool eot;
+ bool header_present;
bool shadow_compare;
/** @{
this->virtual_grf_array_size = 0;
this->virtual_grf_def = NULL;
this->virtual_grf_use = NULL;
+
+ this->kill_emitted = false;
}
+
~fs_visitor()
{
talloc_free(this->mem_ctx);
void assign_regs();
void assign_regs_trivial();
void calculate_live_intervals();
+ bool propagate_constants();
+ bool dead_code_eliminate();
bool virtual_grf_interferes(int a, int b);
void generate_code();
void generate_fb_write(fs_inst *inst);
void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
void emit_dummy_fs();
- void emit_fragcoord_interpolation(ir_variable *ir);
- void emit_general_interpolation(ir_variable *ir);
+ fs_reg *emit_fragcoord_interpolation(ir_variable *ir);
+ fs_reg *emit_frontfacing_interpolation(ir_variable *ir);
+ fs_reg *emit_general_interpolation(ir_variable *ir);
void emit_interpolation_setup_gen4();
void emit_interpolation_setup_gen6();
fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate);
ir_variable *frag_color, *frag_data, *frag_depth;
int first_non_payload_grf;
int urb_setup[FRAG_ATTRIB_MAX];
+ bool kill_emitted;
/** @{ debug annotation info */
const char *current_annotation;
}
}
-void
+fs_reg *
fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
{
fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
/* gl_FragCoord.w: Already set up in emit_interpolation */
emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w));
- hash_table_insert(this->variable_ht, reg, ir);
+ return reg;
}
-
-void
+fs_reg *
fs_visitor::emit_general_interpolation(ir_variable *ir)
{
fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
fs_reg(interp)));
attr.reg_offset++;
}
- attr.reg_offset -= type->vector_elements;
- for (unsigned int c = 0; c < type->vector_elements; c++) {
- emit(fs_inst(BRW_OPCODE_MUL,
- attr,
- attr,
- this->pixel_w));
- attr.reg_offset++;
+ if (intel->gen < 6) {
+ attr.reg_offset -= type->vector_elements;
+ for (unsigned int c = 0; c < type->vector_elements; c++) {
+ emit(fs_inst(BRW_OPCODE_MUL,
+ attr,
+ attr,
+ this->pixel_w));
+ attr.reg_offset++;
+ }
}
location++;
}
}
- hash_table_insert(this->variable_ht, reg, ir);
+ return reg;
+}
+
+fs_reg *
+fs_visitor::emit_frontfacing_interpolation(ir_variable *ir)
+{
+ fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
+
+ /* The frontfacing comes in as a bit in the thread payload. */
+ if (intel->gen >= 6) {
+ emit(fs_inst(BRW_OPCODE_ASR,
+ *reg,
+ fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
+ fs_reg(15)));
+ emit(fs_inst(BRW_OPCODE_NOT,
+ *reg,
+ *reg));
+ emit(fs_inst(BRW_OPCODE_AND,
+ *reg,
+ *reg,
+ fs_reg(1)));
+ } else {
+ fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
+ struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
+ /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
+ * us front face
+ */
+ fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
+ *reg,
+ fs_reg(r1_6ud),
+ fs_reg(1u << 31)));
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+ emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
+ }
+
+ return reg;
}
void
if (ir->mode == ir_var_in) {
if (!strcmp(ir->name, "gl_FragCoord")) {
- emit_fragcoord_interpolation(ir);
- return;
+ reg = emit_fragcoord_interpolation(ir);
} else if (!strcmp(ir->name, "gl_FrontFacing")) {
- reg = new(this->mem_ctx) fs_reg(this, ir->type);
- struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
- /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
- * us front face
- */
- fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
- *reg,
- fs_reg(r1_6ud),
- fs_reg(1u << 31)));
- inst->conditional_mod = BRW_CONDITIONAL_L;
- emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
+ reg = emit_frontfacing_interpolation(ir);
} else {
- emit_general_interpolation(ir);
- return;
+ reg = emit_general_interpolation(ir);
}
+ assert(reg);
+ hash_table_insert(this->variable_ht, reg, ir);
+ return;
}
if (ir->mode == ir_var_uniform) {
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
coordinate));
coordinate.reg_offset++;
- mlen++;
}
/* gen4's SIMD8 sampler always has the slots for u,v,r present. */
mlen = 3;
for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
coordinate.reg_offset++;
- mlen++;
}
if (ir->shadow_comparitor) {
if (ir->shadow_comparitor)
inst->shadow_compare = true;
+
+ if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) {
+ fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type);
+
+ for (int i = 0; i < 4; i++) {
+ int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
+ fs_reg l = swizzle_dst;
+ l.reg_offset += i;
+
+ if (swiz == SWIZZLE_ZERO) {
+ emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f)));
+ } else if (swiz == SWIZZLE_ONE) {
+ emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f)));
+ } else {
+ fs_reg r = dst;
+ r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
+ emit(fs_inst(BRW_OPCODE_MOV, l, r));
+ }
+ }
+ this->result = swizzle_dst;
+ }
}
void
ir->val->accept(this);
fs_reg val = this->result;
+ if (ir->type->vector_elements == 1) {
+ this->result.reg_offset += ir->mask.x;
+ return;
+ }
+
fs_reg result = fs_reg(this, ir->type);
this->result = result;
assert(ir->condition == NULL); /* FINISHME */
emit(fs_inst(FS_OPCODE_DISCARD, temp, temp));
+ kill_emitted = true;
}
void
fs_reg(brw_imm_v(0x11001100))));
this->current_annotation = "compute pixel deltas from v0";
- this->delta_x = fs_reg(this, glsl_type::float_type);
- this->delta_y = fs_reg(this, glsl_type::float_type);
+ if (brw->has_pln) {
+ this->delta_x = fs_reg(this, glsl_type::vec2_type);
+ this->delta_y = this->delta_x;
+ this->delta_y.reg_offset++;
+ } else {
+ this->delta_x = fs_reg(this, glsl_type::float_type);
+ this->delta_y = fs_reg(this, glsl_type::float_type);
+ }
emit(fs_inst(BRW_OPCODE_ADD,
this->delta_x,
this->pixel_x,
fs_visitor::emit_fb_writes()
{
this->current_annotation = "FB write header";
+ GLboolean header_present = GL_TRUE;
int nr = 0;
- /* m0, m1 header */
- nr += 2;
+ if (intel->gen >= 6 &&
+ !this->kill_emitted &&
+ c->key.nr_color_regions == 1) {
+ header_present = false;
+ }
+
+ if (header_present) {
+ /* m0, m1 header */
+ nr += 2;
+ }
if (c->key.aa_dest_stencil_reg) {
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
inst->mlen = nr;
if (target == c->key.nr_color_regions - 1)
inst->eot = true;
+ inst->header_present = header_present;
}
if (c->key.nr_color_regions == 0) {
reg_undef, reg_undef));
inst->mlen = nr;
inst->eot = true;
+ inst->header_present = header_present;
}
this->current_annotation = NULL;
fs_visitor::generate_fb_write(fs_inst *inst)
{
GLboolean eot = inst->eot;
+ struct brw_reg implied_header;
/* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
* move, here's g1.
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p,
- brw_message_reg(1),
- brw_vec8_grf(1, 0));
+
+ if (inst->header_present) {
+ if (intel->gen >= 6) {
+ brw_MOV(p,
+ brw_message_reg(0),
+ brw_vec8_grf(0, 0));
+ implied_header = brw_null_reg();
+ } else {
+ implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ }
+
+ brw_MOV(p,
+ brw_message_reg(1),
+ brw_vec8_grf(1, 0));
+ } else {
+ implied_header = brw_null_reg();
+ }
+
brw_pop_insn_state(p);
brw_fb_WRITE(p,
8, /* dispatch_width */
retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
0, /* base MRF */
- retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+ implied_header,
inst->target,
inst->mlen,
0,
int rlen = 4;
uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
- if (intel->gen == 5) {
+ if (intel->gen >= 5) {
switch (inst->opcode) {
case FS_OPCODE_TEX:
if (inst->shadow_compare) {
c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
- if (intel->gen == 5 && (c->prog_data.first_curbe_grf +
- c->prog_data.curb_read_length) & 1) {
- /* Align the start of the interpolation coefficients so that we can use
- * the PLN instruction.
- */
- c->prog_data.first_curbe_grf++;
- }
-
/* Map the offsets in the UNIFORM file to fixed HW regs. */
foreach_iter(exec_list_iterator, iter, this->instructions) {
fs_inst *inst = (fs_inst *)iter.get();
/* Figure out where each of the incoming setup attributes lands. */
if (intel->gen >= 6) {
for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
- if (i == FRAG_ATTRIB_WPOS ||
- (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i))) {
+ if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)) {
urb_setup[i] = urb_next++;
}
}
int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf;
int class_sizes[base_reg_count];
int class_count = 0;
-
- calculate_live_intervals();
+ int aligned_pair_class = -1;
/* Set up the register classes.
*
* time.
*/
class_sizes[class_count++] = 1;
+ if (brw->has_pln && intel->gen < 6) {
+ /* Always set up the (unaligned) pairs for gen5, so we can find
+ * them for making the aligned pair class.
+ */
+ class_sizes[class_count++] = 2;
+ }
for (int r = 1; r < this->virtual_grf_next; r++) {
int i;
break;
}
if (i == class_count) {
+ if (this->virtual_grf_sizes[r] >= base_reg_count) {
+ fprintf(stderr, "Object too large to register allocate.\n");
+ this->fail = true;
+ }
+
class_sizes[class_count++] = this->virtual_grf_sizes[r];
}
}
int ra_reg_count = 0;
int class_base_reg[class_count];
int class_reg_count[class_count];
- int classes[class_count];
+ int classes[class_count + 1];
for (int i = 0; i < class_count; i++) {
class_base_reg[i] = ra_reg_count;
* that alias base regs, or the base regs themselves for classes[0].
*/
for (int c = 0; c <= i; c++) {
- for (int i_r = 0; i_r < class_reg_count[i] - 1; i_r++) {
+ for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1));
- c_r <= MIN2(class_reg_count[c] - 1, i_r + class_sizes[i] - 1);
+ c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]);
c_r++) {
if (0) {
printf("%d/%d conflicts %d/%d\n",
- class_sizes[i], i_r,
- class_sizes[c], c_r);
+ class_sizes[i], this->first_non_payload_grf + i_r,
+ class_sizes[c], this->first_non_payload_grf + c_r);
}
ra_add_reg_conflict(regs,
}
}
+ /* Add a special class for aligned pairs, which we'll put delta_x/y
+ * in on gen5 so that we can do PLN.
+ */
+ if (brw->has_pln && intel->gen < 6) {
+ int reg_count = (base_reg_count - 1) / 2;
+ int unaligned_pair_class = 1;
+ assert(class_sizes[unaligned_pair_class] == 2);
+
+ aligned_pair_class = class_count;
+ classes[aligned_pair_class] = ra_alloc_reg_class(regs);
+ class_base_reg[aligned_pair_class] = 0;
+ class_reg_count[aligned_pair_class] = 0;
+ int start = (this->first_non_payload_grf & 1) ? 1 : 0;
+
+ for (int i = 0; i < reg_count; i++) {
+ ra_class_add_reg(regs, classes[aligned_pair_class],
+ class_base_reg[unaligned_pair_class] + i * 2 + start);
+ }
+ class_count++;
+ }
+
ra_set_finalize(regs);
struct ra_graph *g = ra_alloc_interference_graph(regs,
for (int i = 1; i < this->virtual_grf_next; i++) {
for (int c = 0; c < class_count; c++) {
if (class_sizes[c] == this->virtual_grf_sizes[i]) {
- ra_set_node_class(g, i, classes[c]);
+ if (aligned_pair_class >= 0 &&
+ this->delta_x.reg == i) {
+ ra_set_node_class(g, i, classes[aligned_pair_class]);
+ } else {
+ ra_set_node_class(g, i, classes[c]);
+ }
break;
}
}
for (int c = 0; c < class_count; c++) {
if (reg >= class_base_reg[c] &&
- reg < class_base_reg[c] + class_reg_count[c] - 1) {
+ reg < class_base_reg[c] + class_reg_count[c]) {
hw_reg = reg - class_base_reg[c];
break;
}
for (int i = 0; i < num_vars; i++) {
def[i] = 1 << 30;
- use[i] = 0;
+ use[i] = -1;
}
int ip = 0;
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
- def[inst->src[i].reg] = MIN2(def[inst->src[i].reg], eip);
use[inst->src[i].reg] = MAX2(use[inst->src[i].reg], eip);
}
}
if (inst->dst.file == GRF && inst->dst.reg != 0) {
def[inst->dst.reg] = MIN2(def[inst->dst.reg], eip);
- use[inst->dst.reg] = MAX2(use[inst->dst.reg], eip);
}
}
ip++;
}
+ talloc_free(this->virtual_grf_def);
+ talloc_free(this->virtual_grf_use);
this->virtual_grf_def = def;
this->virtual_grf_use = use;
}
+/**
+ * Attempts to move immediate constants into the immediate
+ * constant slot of following instructions.
+ *
+ * Immediate constants are a bit tricky -- they have to be in the last
+ * operand slot, you can't do abs/negate on them,
+ */
+
+bool
+fs_visitor::propagate_constants()
+{
+ bool progress = false;
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ fs_inst *inst = (fs_inst *)iter.get();
+
+ if (inst->opcode != BRW_OPCODE_MOV ||
+ inst->predicated ||
+ inst->dst.file != GRF || inst->src[0].file != IMM ||
+ inst->dst.type != inst->src[0].type)
+ continue;
+
+ /* Don't bother with cases where we should have had the
+ * operation on the constant folded in GLSL already.
+ */
+ if (inst->saturate)
+ continue;
+
+ /* Found a move of a constant to a GRF. Find anything else using the GRF
+ * before it's written, and replace it with the constant if we can.
+ */
+ exec_list_iterator scan_iter = iter;
+ scan_iter.next();
+ for (; scan_iter.has_next(); scan_iter.next()) {
+ fs_inst *scan_inst = (fs_inst *)scan_iter.get();
+
+ if (scan_inst->opcode == BRW_OPCODE_DO ||
+ scan_inst->opcode == BRW_OPCODE_WHILE ||
+ scan_inst->opcode == BRW_OPCODE_ELSE ||
+ scan_inst->opcode == BRW_OPCODE_ENDIF) {
+ break;
+ }
+
+ for (int i = 2; i >= 0; i--) {
+ if (scan_inst->src[i].file != GRF ||
+ scan_inst->src[i].reg != inst->dst.reg ||
+ scan_inst->src[i].reg_offset != inst->dst.reg_offset)
+ continue;
+
+ /* Don't bother with cases where we should have had the
+ * operation on the constant folded in GLSL already.
+ */
+ if (scan_inst->src[i].negate || scan_inst->src[i].abs)
+ continue;
+
+ switch (scan_inst->opcode) {
+ case BRW_OPCODE_MOV:
+ scan_inst->src[i] = inst->src[0];
+ progress = true;
+ break;
+
+ case BRW_OPCODE_MUL:
+ case BRW_OPCODE_ADD:
+ if (i == 1) {
+ scan_inst->src[i] = inst->src[0];
+ progress = true;
+ } else if (i == 0 && scan_inst->src[1].file != IMM) {
+ /* Fit this constant in by commuting the operands */
+ scan_inst->src[0] = scan_inst->src[1];
+ scan_inst->src[1] = inst->src[0];
+ }
+ break;
+ case BRW_OPCODE_CMP:
+ if (i == 1) {
+ scan_inst->src[i] = inst->src[0];
+ progress = true;
+ }
+ }
+ }
+
+ if (scan_inst->dst.file == GRF &&
+ scan_inst->dst.reg == inst->dst.reg &&
+ (scan_inst->dst.reg_offset == inst->dst.reg_offset ||
+ scan_inst->opcode == FS_OPCODE_TEX)) {
+ break;
+ }
+ }
+ }
+
+ return progress;
+}
+/**
+ * Must be called after calculate_live_intervales() to remove unused
+ * writes to registers -- register allocation will fail otherwise
+ * because something deffed but not used won't be considered to
+ * interfere with other regs.
+ */
+bool
+fs_visitor::dead_code_eliminate()
+{
+ bool progress = false;
+ int num_vars = this->virtual_grf_next;
+ bool dead[num_vars];
+
+ for (int i = 0; i < num_vars; i++) {
+ /* This would be ">=", but FS_OPCODE_DISCARD has a src == dst where
+ * it writes dst then reads it as src.
+ */
+ dead[i] = this->virtual_grf_def[i] > this->virtual_grf_use[i];
+
+ if (dead[i]) {
+ /* Mark off its interval so it won't interfere with anything. */
+ this->virtual_grf_def[i] = -1;
+ this->virtual_grf_use[i] = -1;
+ }
+ }
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ fs_inst *inst = (fs_inst *)iter.get();
+
+ if (inst->dst.file == GRF && dead[inst->dst.reg]) {
+ inst->remove();
+ progress = true;
+ }
+ }
+
+ return progress;
+}
+
bool
fs_visitor::virtual_grf_interferes(int a, int b)
{
int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
+ /* For dead code, just check if the def interferes with the other range. */
+ if (this->virtual_grf_use[a] == -1) {
+ return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] &&
+ this->virtual_grf_def[a] < this->virtual_grf_use[b]);
+ }
+ if (this->virtual_grf_use[b] == -1) {
+ return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] &&
+ this->virtual_grf_def[b] < this->virtual_grf_use[a]);
+ }
+
return start <= end;
}
case BRW_OPCODE_XOR:
brw_XOR(p, dst, src[0], src[1]);
break;
+ case BRW_OPCODE_NOT:
+ brw_NOT(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_ASR:
+ brw_ASR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_SHR:
+ brw_SHR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_SHL:
+ brw_SHL(p, dst, src[0], src[1]);
+ break;
case BRW_OPCODE_CMP:
brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
v.emit_fb_writes();
v.assign_curb_setup();
v.assign_urb_setup();
+
+ bool progress;
+ do {
+ progress = false;
+
+ v.calculate_live_intervals();
+ progress = v.propagate_constants() || progress;
+ progress = v.dead_code_eliminate() || progress;
+ } while (progress);
+
if (0)
v.assign_regs_trivial();
else
v.assign_regs();
}
- v.generate_code();
+ if (!v.fail)
+ v.generate_code();
assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */