this->current_annotation = NULL;
}
+void
+fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
+{
+ int reg_width = c->dispatch_width / 8;
+
+ if (c->dispatch_width == 8 || intel->gen == 6) {
+ /* SIMD8 write looks like:
+ * m + 0: r0
+ * m + 1: r1
+ * m + 2: g0
+ * m + 3: g1
+ *
+ * gen6 SIMD16 DP write looks like:
+ * m + 0: r0
+ * m + 1: r1
+ * m + 2: g0
+ * m + 3: g1
+ * m + 4: b0
+ * m + 5: b1
+ * m + 6: a0
+ * m + 7: a1
+ */
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width),
+ color);
+ } else {
+ /* pre-gen6 SIMD16 single source DP write looks like:
+ * m + 0: r0
+ * m + 1: g0
+ * m + 2: b0
+ * m + 3: a0
+ * m + 4: r1
+ * m + 5: g1
+ * m + 6: b1
+ * m + 7: a1
+ *
+ * By setting the high bit of the MRF register number,
+ * we could indicate that we want COMPR4 mode - instead
+ * of doing the usual destination + 1 for the second
+ * half we would get destination + 4. We would need to
+ * clue the optimizer into that, though.
+ */
+ push_force_uncompressed();
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color);
+ pop_force_uncompressed();
+
+ push_force_sechalf();
+ color.sechalf = true;
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color);
+ pop_force_sechalf();
+ color.sechalf = false;
+ }
+}
+
void
fs_visitor::emit_fb_writes()
{
target);
if (this->frag_color || this->frag_data) {
for (int i = 0; i < 4; i++) {
- emit(BRW_OPCODE_MOV, fs_reg(MRF, color_mrf + i * reg_width), color);
+ emit_color_write(i, color_mrf, color);
color.reg_offset++;
}
}
* renderbuffer.
*/
color.reg_offset += 3;
- emit(BRW_OPCODE_MOV, fs_reg(MRF, color_mrf + 3), color);
+ emit_color_write(3, color_mrf, color);
}
fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
}
}
- } else {
+ } else /* gen <= 5 */{
assert(inst->mlen >= 1);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
inst->base_mrf + 1, sechalf(src[0]),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
+
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
}
}
reg->hw_reg, reg->smear);
}
brw_reg = retype(brw_reg, reg->type);
+ if (reg->sechalf)
+ brw_reg = sechalf(brw_reg);
break;
case IMM:
switch (reg->type) {
/* Haven't hooked in support for uniforms through the 16-wide
* version yet.
*/
- return GL_FALSE;
+ return false;
}
/* align to 64 byte boundary. */
assert(force_uncompressed_stack == 0);
assert(force_sechalf_stack == 0);
- if (!failed)
- generate_code();
-
if (failed)
- return GL_FALSE;
+ return false;
+
+ generate_code();
if (c->dispatch_width == 8) {
c->prog_data.total_grf = grf_used;
return false;
}
- if (intel->gen >= 6) {
+ if (intel->gen >= 5) {
c->dispatch_width = 16;
fs_visitor v2(c, shader);
v2.run();
int type;
bool negate;
bool abs;
+ bool sechalf;
struct brw_reg fixed_hw_reg;
int smear; /* -1, or a channel of the reg to smear to all channels. */
void emit_if_gen6(ir_if *ir);
void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset);
+ void emit_color_write(int index, int first_color_mrf, fs_reg color);
void emit_fb_writes();
void emit_assignment_writes(fs_reg &l, fs_reg &r,
const glsl_type *type, bool predicated);
*/
struct brw_wm_unit_key {
- unsigned int total_grf, total_scratch;
+ unsigned int total_grf, total_grf_16, total_scratch;
unsigned int urb_entry_read_length;
unsigned int curb_entry_read_length;
unsigned int dispatch_grf_start_reg;
+ uint32_t prog_offset_16;
unsigned int curbe_offset;
/* CACHE_NEW_WM_PROG */
key->total_grf = brw->wm.prog_data->total_grf;
+ key->total_grf_16 = brw->wm.prog_data->total_grf_16;
key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
key->total_scratch = brw->wm.prog_data->total_scratch;
+ key->prog_offset_16 = brw->wm.prog_data->prog_offset_16;
+
+ if (key->prog_offset_16) {
+ /* These two fields should be the same pre-gen6, which is why we
+ * only have one hardware field to program for both dispatch
+ * widths.
+ */
+ assert(brw->wm.prog_data->first_curbe_grf ==
+ brw->wm.prog_data->first_curbe_grf_16);
+ }
/* BRW_NEW_CURBE_OFFSETS */
key->curbe_offset = brw->curbe.wm_start;
memset(&wm, 0, sizeof(wm));
wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+ wm.wm9.grf_reg_count_2 = ALIGN(key->total_grf_16, 16) / 16 - 1;
wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
+ wm.wm9.kernel_start_pointer_2 = (brw->wm.prog_bo->offset +
+ key->prog_offset_16) >> 6; /* reloc */
wm.thread1.depth_coef_urb_read_offset = 1;
wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
wm.wm5.program_computes_depth = key->computes_depth;
wm.wm5.program_uses_killpixel = key->uses_kill;
- if (key->is_glsl)
+ if (key->is_glsl) {
wm.wm5.enable_8_pix = 1;
- else
+ if (key->prog_offset_16)
+ wm.wm5.enable_16_pix = 1;
+ } else
wm.wm5.enable_16_pix = 1;
wm.wm5.max_threads = brw->wm_max_threads - 1;
brw->wm.prog_bo, wm.thread0.grf_reg_count << 1,
I915_GEM_DOMAIN_INSTRUCTION, 0);
+ if (key->prog_offset_16) {
+ drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, wm9),
+ brw->wm.prog_bo, ((wm.wm9.grf_reg_count_2 << 1) +
+ key->prog_offset_16),
+ I915_GEM_DOMAIN_INSTRUCTION, 0);
+ }
+
/* Emit scratch space relocation */
if (key->total_scratch != 0) {
drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread2),