}
}
+void
+fs_visitor::push_force_uncompressed()
+{
+ force_uncompressed_stack++;
+}
+
+void
+fs_visitor::pop_force_uncompressed()
+{
+ force_uncompressed_stack--;
+ assert(force_uncompressed_stack >= 0);
+}
+
+void
+fs_visitor::push_force_sechalf()
+{
+ force_sechalf_stack++;
+}
+
+void
+fs_visitor::pop_force_sechalf()
+{
+ force_sechalf_stack--;
+ assert(force_sechalf_stack >= 0);
+}
+
/**
* Returns how many MRFs an FS opcode will write over.
*
{
fs_inst *inst;
+ if (c->dispatch_width == 16) {
+ fail("Can't support (non-uniform) control flow on 16-wide\n");
+ }
+
/* Don't point the annotation at the if statement, because then it plus
* the then and else blocks get printed.
*/
{
fs_reg counter = reg_undef;
+ if (c->dispatch_width == 16) {
+ fail("Can't support (non-uniform) control flow on 16-wide\n");
+ }
+
if (ir->counter) {
this->base_ir = ir->counter;
ir->counter->accept(this);
fs_inst *list_inst = new(mem_ctx) fs_inst;
*list_inst = inst;
+ if (force_uncompressed_stack > 0)
+ list_inst->force_uncompressed = true;
+ else if (force_sechalf_stack > 0)
+ list_inst->force_sechalf = true;
+
list_inst->annotation = this->current_annotation;
list_inst->ir = this->base_ir;
this->current_annotation = "FB write header";
GLboolean header_present = GL_TRUE;
int nr = 0;
+ int reg_width = c->dispatch_width / 8;
if (intel->gen >= 6 &&
!this->kill_emitted &&
}
if (c->aa_dest_stencil_reg) {
+ push_force_uncompressed();
emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)));
+ pop_force_uncompressed();
}
/* Reserve space for color. It'll be filled in per MRT below. */
int color_mrf = nr;
- nr += 4;
+ nr += 4 * reg_width;
if (c->source_depth_to_render_target) {
+ if (intel->gen == 6 && c->dispatch_width == 16) {
+ /* For outputting oDepth on gen6, SIMD8 writes have to be
+ * used. This would require 8-wide moves of each half to
+ * message regs, kind of like pre-gen5 SIMD16 FB writes.
+ * Just bail on doing so for now.
+ */
+ fail("Missing support for simd16 depth writes on gen6\n");
+ }
+
if (c->computes_depth) {
/* Hand over gl_FragDepth. */
assert(this->frag_depth);
fs_reg depth = *(variable_storage(this->frag_depth));
- emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), depth);
} else {
/* Pass through the payload depth. */
- emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, nr),
fs_reg(brw_vec8_grf(c->source_depth_reg, 0)));
}
+ nr += reg_width;
}
if (c->dest_depth_reg) {
- emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, nr),
fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)));
+ nr += reg_width;
}
fs_reg color = reg_undef;
target);
if (this->frag_color || this->frag_data) {
for (int i = 0; i < 4; i++) {
- emit(BRW_OPCODE_MOV, fs_reg(MRF, color_mrf + i), color);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, color_mrf + i * reg_width), color);
color.reg_offset++;
}
}
brw_pop_insn_state(p);
brw_fb_WRITE(p,
- 8, /* dispatch_width */
+ c->dispatch_width,
inst->base_mrf,
implied_header,
inst->target,
void
fs_visitor::assign_curb_setup()
{
- c->prog_data.first_curbe_grf = c->nr_payload_regs;
c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
+ if (c->dispatch_width == 8) {
+ c->prog_data.first_curbe_grf = c->nr_payload_regs;
+ } else {
+ c->prog_data.first_curbe_grf_16 = c->nr_payload_regs;
+ }
/* Map the offsets in the UNIFORM file to fixed HW regs. */
foreach_iter(exec_list_iterator, iter, this->instructions) {
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == UNIFORM) {
int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
- struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
+ struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs +
constant_nr / 8,
constant_nr % 8);
void
fs_visitor::assign_urb_setup()
{
- int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
+ int urb_start = c->nr_payload_regs + c->prog_data.curb_read_length;
/* Offset all the urb_setup[] index by the actual position of the
* setup regs, now that the location of the constants has been chosen.
void
fs_visitor::generate_code()
{
- int last_native_inst = 0;
+ int last_native_inst = p->nr_insn;
const char *last_annotation_string = NULL;
ir_instruction *last_annotation_ir = NULL;
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- printf("Native code for fragment shader %d:\n",
- ctx->Shader.CurrentFragmentProgram->Name);
+ printf("Native code for fragment shader %d (%d-wide dispatch):\n",
+ ctx->Shader.CurrentFragmentProgram->Name, c->dispatch_width);
}
foreach_iter(exec_list_iterator, iter, this->instructions) {
brw_set_predicate_inverse(p, inst->predicate_inverse);
brw_set_saturate(p, inst->saturate);
+ if (inst->force_uncompressed || c->dispatch_width == 8) {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ } else if (inst->force_sechalf) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ } else {
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
+
switch (inst->opcode) {
case BRW_OPCODE_MOV:
brw_MOV(p, dst, src[0]);
}
}
-GLboolean
-brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
+bool
+fs_visitor::run()
{
- struct intel_context *intel = &brw->intel;
- struct gl_context *ctx = &intel->ctx;
- struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram;
+ uint32_t prog_offset_16 = 0;
- if (!prog)
- return GL_FALSE;
+ brw_wm_payload_setup(brw, c);
- struct brw_shader *shader =
- (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
- if (!shader)
- return GL_FALSE;
+ if (c->dispatch_width == 16) {
+ if (c->prog_data.curb_read_length) {
+ /* Haven't hooked in support for uniforms through the 16-wide
+ * version yet.
+ */
+ return GL_FALSE;
+ }
- /* We always use 8-wide mode, at least for now. For one, flow
- * control only works in 8-wide. Also, when we're fragment shader
- * bound, we're almost always under register pressure as well, so
- * 8-wide would save us from the performance cliff of spilling
- * regs.
- */
- c->dispatch_width = 8;
+ /* align to 64 byte boundary. */
+ while ((c->func.nr_insn * sizeof(struct brw_instruction)) % 64) {
+ brw_NOP(p);
+ }
- if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- printf("GLSL IR for native fragment shader %d:\n", prog->Name);
- _mesa_print_ir(shader->ir, NULL);
- printf("\n");
- }
+ /* Save off the start of this 16-wide program in case we succeed. */
+ prog_offset_16 = c->func.nr_insn * sizeof(struct brw_instruction);
- /* Now the main event: Visit the shader IR and generate our FS IR for it.
- */
- fs_visitor v(c, shader);
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
if (0) {
- v.emit_dummy_fs();
+ emit_dummy_fs();
} else {
- v.calculate_urb_setup();
+ calculate_urb_setup();
if (intel->gen < 6)
- v.emit_interpolation_setup_gen4();
+ emit_interpolation_setup_gen4();
else
- v.emit_interpolation_setup_gen6();
+ emit_interpolation_setup_gen6();
/* Generate FS IR for main(). (the visitor only descends into
* functions called "main").
*/
foreach_iter(exec_list_iterator, iter, *shader->ir) {
ir_instruction *ir = (ir_instruction *)iter.get();
- v.base_ir = ir;
- ir->accept(&v);
+ base_ir = ir;
+ ir->accept(this);
}
- v.emit_fb_writes();
+ emit_fb_writes();
- v.split_virtual_grfs();
+ split_virtual_grfs();
- v.setup_paramvalues_refs();
- v.setup_pull_constants();
+ setup_paramvalues_refs();
+ setup_pull_constants();
bool progress;
do {
progress = false;
- progress = v.remove_duplicate_mrf_writes() || progress;
+ progress = remove_duplicate_mrf_writes() || progress;
- progress = v.propagate_constants() || progress;
- progress = v.register_coalesce() || progress;
- progress = v.compute_to_mrf() || progress;
- progress = v.dead_code_eliminate() || progress;
+ progress = propagate_constants() || progress;
+ progress = register_coalesce() || progress;
+ progress = compute_to_mrf() || progress;
+ progress = dead_code_eliminate() || progress;
} while (progress);
- v.schedule_instructions();
+ schedule_instructions();
- v.assign_curb_setup();
- v.assign_urb_setup();
+ assign_curb_setup();
+ assign_urb_setup();
if (0) {
/* Debug of register spilling: Go spill everything. */
- int virtual_grf_count = v.virtual_grf_next;
+ int virtual_grf_count = virtual_grf_next;
for (int i = 1; i < virtual_grf_count; i++) {
- v.spill_reg(i);
+ spill_reg(i);
}
}
if (0)
- v.assign_regs_trivial();
+ assign_regs_trivial();
else {
- while (!v.assign_regs()) {
- if (v.failed)
+ while (!assign_regs()) {
+ if (failed)
break;
}
}
}
+ assert(force_uncompressed_stack == 0);
+ assert(force_sechalf_stack == 0);
- if (!v.failed)
- v.generate_code();
-
- assert(!v.failed); /* FINISHME: Cleanly fail, tested at link time, etc. */
+ if (!failed)
+ generate_code();
- if (v.failed)
+ if (failed)
return GL_FALSE;
- c->prog_data.total_grf = v.grf_used;
+ if (c->dispatch_width == 8) {
+ c->prog_data.total_grf = grf_used;
+ } else {
+ c->prog_data.total_grf_16 = grf_used;
+ c->prog_data.prog_offset_16 = prog_offset_16;
+ }
+
+ return !failed;
+}
- return GL_TRUE;
+bool
+brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
+{
+ struct intel_context *intel = &brw->intel;
+ struct gl_context *ctx = &intel->ctx;
+ struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram;
+
+ if (!prog)
+ return false;
+
+ struct brw_shader *shader =
+ (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
+ if (!shader)
+ return false;
+
+ if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+ printf("GLSL IR for native fragment shader %d:\n", prog->Name);
+ _mesa_print_ir(shader->ir, NULL);
+ printf("\n");
+ }
+
+ /* Now the main event: Visit the shader IR and generate our FS IR for it.
+ */
+ c->dispatch_width = 8;
+
+ fs_visitor v(c, shader);
+ if (!v.run()) {
+ /* FINISHME: Cleanly fail, test at link time, etc. */
+ assert(!"not reached");
+ return false;
+ }
+
+ if (intel->gen >= 6) {
+ c->dispatch_width = 16;
+ fs_visitor v2(c, shader);
+ v2.run();
+ }
+
+ c->prog_data.dispatch_width = 8;
+
+ return true;
}