X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_wm_emit.c;h=96fecc97ee2bf1c03eae446d3b169336db7e3be8;hb=b2ddb93ff3b8c88682634ccdef247967e31fab84;hp=f316e0cda49b205a36318057d16d3d7a0619cc43;hpb=b01ffb12ed82dcf435b186e2aff35e344a8a6ebe;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index f316e0cda49..96fecc97ee2 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -34,6 +34,23 @@ #include "brw_context.h" #include "brw_wm.h" +static GLboolean can_do_pln(struct intel_context *intel, + const struct brw_reg *deltas) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + if (!brw->has_pln) + return GL_FALSE; + + if (deltas[1].nr != deltas[0].nr + 1) + return GL_FALSE; + + if (intel->gen < 6 && ((deltas[0].nr & 1) != 0)) + return GL_FALSE; + + return GL_TRUE; +} + /* Not quite sure how correct this is - need to understand horiz * vs. vertical strides a little better. */ @@ -44,8 +61,53 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) return reg; } +/* Return the SrcReg index of the channels that can be immediate float operands + * instead of usage of PROGRAM_CONSTANT values through push/pull. + */ +GLboolean +brw_wm_arg_can_be_immediate(enum prog_opcode opcode, int arg) +{ + int opcode_array[] = { + [OPCODE_ADD] = 2, + [OPCODE_CMP] = 3, + [OPCODE_DP3] = 2, + [OPCODE_DP4] = 2, + [OPCODE_DPH] = 2, + [OPCODE_MAX] = 2, + [OPCODE_MIN] = 2, + [OPCODE_MOV] = 1, + [OPCODE_MUL] = 2, + [OPCODE_SEQ] = 2, + [OPCODE_SGE] = 2, + [OPCODE_SGT] = 2, + [OPCODE_SLE] = 2, + [OPCODE_SLT] = 2, + [OPCODE_SNE] = 2, + [OPCODE_SWZ] = 1, + [OPCODE_XPD] = 2, + }; + + /* These opcodes get broken down in a way that allow two + * args to be immediates. + */ + if (opcode == OPCODE_MAD || opcode == OPCODE_LRP) { + if (arg == 1 || arg == 2) + return GL_TRUE; + } + + if (opcode > ARRAY_SIZE(opcode_array)) + return GL_FALSE; + + return arg == opcode_array[opcode] - 1; +} -/* Payload R0: +/** + * Computes the screen-space x,y position of the pixels. + * + * This will be used by emit_delta_xy() or emit_wpos_xy() for + * interpolation of attributes.. + * + * Payload R0: * * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, * corresponding to each of the 16 execution channels. @@ -60,7 +122,6 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) * R1.7 -- ? * R1.8 -- ? */ - void emit_pixel_xy(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask) @@ -100,33 +161,58 @@ void emit_pixel_xy(struct brw_wm_compile *c, brw_pop_insn_state(p); } - +/** + * Computes the screen-space x,y distance of the pixels from the start + * vertex. + * + * This will be used in linterp or pinterp with the start vertex value + * and the Cx, Cy, and C0 coefficients passed in from the setup engine + * to produce interpolated attribute values. + */ void emit_delta_xy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0) { + struct intel_context *intel = &p->brw->intel; struct brw_reg r1 = brw_vec1_grf(1, 0); - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers. - */ - if (mask & WRITEMASK_X) { - brw_ADD(p, - dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_UW), - negate(r1)); - } - - if (mask & WRITEMASK_Y) { - brw_ADD(p, - dst[1], - retype(arg0[1], BRW_REGISTER_TYPE_UW), - negate(suboffset(r1,1))); + if (mask == 0) + return; + assert(mask == WRITEMASK_XY); + + if (intel->gen >= 6) { + /* XXX Gen6 WM doesn't have Xstart/Ystart in payload r1.0/r1.1. + Just add them with 0.0 for dst reg.. */ + r1 = brw_imm_v(0x00000000); + brw_ADD(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_UW), + r1); + brw_ADD(p, + dst[1], + retype(arg0[1], BRW_REGISTER_TYPE_UW), + r1); + return; } + + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers produced by emit_pixel_xy(). + */ + brw_ADD(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_UW), + negate(r1)); + brw_ADD(p, + dst[1], + retype(arg0[1], BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); } +/** + * Computes the pixel offset from the window origin for gl_FragCoord(). + */ void emit_wpos_xy(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask, @@ -134,23 +220,44 @@ void emit_wpos_xy(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; - /* Calculate the pixel offset from window bottom left into destination - * X and Y channels. - */ if (mask & WRITEMASK_X) { - /* X' = X - origin */ - brw_ADD(p, - dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_W), - brw_imm_d(0 - c->key.origin_x)); + if (c->fp->program.PixelCenterInteger) { + /* X' = X */ + brw_MOV(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_W)); + } else { + /* X' = X + 0.5 */ + brw_ADD(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_W), + brw_imm_f(0.5)); + } } if (mask & WRITEMASK_Y) { - /* Y' = height - (Y - origin_y) = height + origin_y - Y */ - brw_ADD(p, - dst[1], - negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), - brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); + if (c->fp->program.OriginUpperLeft) { + if (c->fp->program.PixelCenterInteger) { + /* Y' = Y */ + brw_MOV(p, + dst[1], + retype(arg0[1], BRW_REGISTER_TYPE_W)); + } else { + /* Y' = Y + 0.5 */ + brw_ADD(p, + dst[1], + retype(arg0[1], BRW_REGISTER_TYPE_W), + brw_imm_f(0.5)); + } + } else { + float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5; + + /* Y' = (height - 1) - Y + center */ + brw_ADD(p, + dst[1], + negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), + brw_imm_f(c->key.drawable_height - 1 + center_offset)); + } } } @@ -162,6 +269,16 @@ void emit_pixel_w(struct brw_wm_compile *c, const struct brw_reg *deltas) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; + struct brw_reg src; + struct brw_reg temp_dst; + + if (intel->gen >= 6) + temp_dst = dst[3]; + else + temp_dst = brw_message_reg(2); + + assert(intel->gen < 6); /* Don't need this if all you are doing is interpolating color, for * instance. @@ -172,34 +289,43 @@ void emit_pixel_w(struct brw_wm_compile *c, /* Calc 1/w - just linterp wpos[3] optimized by putting the * result straight into a message reg. */ - brw_LINE(p, brw_null_reg(), interp3, deltas[0]); - brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); + if (can_do_pln(intel, deltas)) { + brw_PLN(p, temp_dst, interp3, deltas[0]); + } else { + brw_LINE(p, brw_null_reg(), interp3, deltas[0]); + brw_MAC(p, temp_dst, suboffset(interp3, 1), deltas[1]); + } /* Calc w */ + if (intel->gen >= 6) + src = temp_dst; + else + src = brw_null_reg(); + if (c->dispatch_width == 16) { brw_math_16(p, dst[3], BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, - 2, brw_null_reg(), + 2, src, BRW_MATH_PRECISION_FULL); } else { brw_math(p, dst[3], BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, - 2, brw_null_reg(), + 2, src, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); } } } - void emit_linterp(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *deltas) { + struct intel_context *intel = &p->brw->intel; struct brw_reg interp[4]; GLuint nr = arg0[0].nr; GLuint i; @@ -211,8 +337,14 @@ void emit_linterp(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<gen >= 6) { + brw_PLN(p, dst[i], interp[i], brw_vec8_grf(2, 0)); + } else if (can_do_pln(intel, deltas)) { + brw_PLN(p, dst[i], interp[i], deltas[0]); + } else { + brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); + brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + } } } } @@ -225,10 +357,16 @@ void emit_pinterp(struct brw_compile *p, const struct brw_reg *deltas, const struct brw_reg *w) { + struct intel_context *intel = &p->brw->intel; struct brw_reg interp[4]; GLuint nr = arg0[0].nr; GLuint i; + if (intel->gen >= 6) { + emit_linterp(p, dst, mask, arg0, interp); + return; + } + interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); interp[2] = brw_vec1_grf(nr+1, 0); @@ -236,8 +374,12 @@ void emit_pinterp(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<func; + struct intel_context *intel = &p->brw->intel; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; GLuint saturate = ((mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE); + struct brw_reg src; + + if (intel->gen >= 6 && (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 || + arg0[0].file != BRW_GENERAL_REGISTER_FILE)) { + /* Gen6 math requires that source and dst horizontal stride be 1, + * and that the argument be in the GRF. + */ + src = dst[dst_chan]; + brw_MOV(p, src, arg0[0]); + } else { + src = arg0[0]; + } if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ assert(is_power_of_two(mask & WRITEMASK_XYZW)); - /* If compressed, this will write message reg 2,3 from arg0.x's 16 - * channels. - */ - brw_MOV(p, brw_message_reg(2), arg0[0]); - /* Send two messages to perform all 16 operations: */ brw_push_insn_state(p); @@ -740,7 +921,7 @@ void emit_math1(struct brw_wm_compile *c, function, saturate, 2, - brw_null_reg(), + src, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); @@ -751,7 +932,7 @@ void emit_math1(struct brw_wm_compile *c, function, saturate, 3, - brw_null_reg(), + sechalf(src), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); } @@ -767,10 +948,8 @@ void emit_math2(struct brw_wm_compile *c, const struct brw_reg *arg1) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - GLuint saturate = ((mask & SATURATE) ? - BRW_MATH_SATURATE_SATURATE : - BRW_MATH_SATURATE_NONE); if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ @@ -779,42 +958,103 @@ void emit_math2(struct brw_wm_compile *c, brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, brw_message_reg(2), arg0[0]); - if (c->dispatch_width == 16) { - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, brw_message_reg(4), sechalf(arg0[0])); - } + /* math can only operate on up to a vec8 at a time, so in + * dispatch_width==16 we have to do the second half manually. + */ + if (intel->gen >= 6) { + struct brw_reg src0 = arg0[0]; + struct brw_reg src1 = arg1[0]; + struct brw_reg temp_dst = dst[dst_chan]; + + if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) { + if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { + /* Both scalar arguments. Do scalar calc. */ + src0.hstride = BRW_HORIZONTAL_STRIDE_1; + src1.hstride = BRW_HORIZONTAL_STRIDE_1; + temp_dst.hstride = BRW_HORIZONTAL_STRIDE_1; + temp_dst.width = BRW_WIDTH_1; + + if (arg0[0].subnr != 0) { + brw_MOV(p, temp_dst, src0); + src0 = temp_dst; + + /* Ouch. We've used the temp as a dst, and we still + * need a temp to store arg1 in, because src and dst + * offsets have to be equal. Leaving this up to + * glsl2-965 to handle correctly. + */ + assert(arg1[0].subnr == 0); + } else if (arg1[0].subnr != 0) { + brw_MOV(p, temp_dst, src1); + src1 = temp_dst; + } + } else { + brw_MOV(p, temp_dst, src0); + src0 = temp_dst; + } + } else if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { + brw_MOV(p, temp_dst, src1); + src1 = temp_dst; + } - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, brw_message_reg(3), arg1[0]); - if (c->dispatch_width == 16) { - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); - } + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math2(p, + temp_dst, + function, + src0, + src1); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math2(p, + sechalf(temp_dst), + function, + sechalf(src0), + sechalf(src1)); + } - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_math(p, - dst[dst_chan], - function, - saturate, - 2, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); + /* Splat a scalar result into all the channels. */ + if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 && + arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { + temp_dst.hstride = BRW_HORIZONTAL_STRIDE_0; + temp_dst.vstride = BRW_VERTICAL_STRIDE_0; + brw_MOV(p, dst[dst_chan], temp_dst); + } + } else { + GLuint saturate = ((mask & SATURATE) ? + BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE); - /* Send two messages to perform all 16 operations: - */ - if (c->dispatch_width == 16) { - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, brw_message_reg(3), arg1[0]); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); + } + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, - offset(dst[dst_chan],1), + dst[dst_chan], function, saturate, - 4, - brw_null_reg(), + 2, + arg0[0], BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); + + /* Send two messages to perform all 16 operations: + */ + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, + offset(dst[dst_chan],1), + function, + saturate, + 4, + sechalf(arg0[0]), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + } } brw_pop_insn_state(p); } @@ -874,7 +1114,7 @@ void emit_tex(struct brw_wm_compile *c, } /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ - if (!intel->is_ironlake && c->dispatch_width == 8) + if (intel->gen < 5 && c->dispatch_width == 8) nr_texcoords = 3; /* For shadow comparisons, we have to supply u,v,r. */ @@ -892,7 +1132,7 @@ void emit_tex(struct brw_wm_compile *c, /* Fill in the shadow comparison reference value. */ if (shadow) { - if (intel->is_ironlake) { + if (intel->gen >= 5) { /* Fill in the cube map array index value. */ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); cur_mrf += mrf_per_channel; @@ -905,11 +1145,11 @@ void emit_tex(struct brw_wm_compile *c, cur_mrf += mrf_per_channel; } - if (intel->is_ironlake) { + if (intel->gen >= 5) { if (shadow) - msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG; + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; else - msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG; + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; } else { /* Note that G45 and older determines shadow compare and dispatch width * from message length for most messages. @@ -957,16 +1197,16 @@ void emit_txb(struct brw_wm_compile *c, * undefined, and trust the execution mask to keep the undefined pixels * from mattering. */ - if (c->dispatch_width == 16 || !intel->is_ironlake) { - if (intel->is_ironlake) - msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG; + if (c->dispatch_width == 16 || intel->gen < 5) { + if (intel->gen >= 5) + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; else msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; mrf_per_channel = 2; dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW); response_length = 8; } else { - msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG; + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; mrf_per_channel = 1; dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW); response_length = 4; @@ -1062,11 +1302,19 @@ static void emit_kil( struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - GLuint i; - - /* XXX - usually won't need 4 compares! - */ + GLuint i, j; + for (i = 0; i < 4; i++) { + /* Check if we've already done the comparison for this reg + * -- common when someone does KIL TEMP.wwww. + */ + for (j = 0; j < i; j++) { + if (memcmp(&arg0[j], &arg0[i], sizeof(arg0[0])) == 0) + break; + } + if (j != i) + continue; + brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0)); brw_set_predicate_control_flag_value(p, 0xff); @@ -1079,7 +1327,7 @@ static void emit_kil( struct brw_wm_compile *c, /* KIL_NV kills the pixels that are currently executing, not based on a test * of the arguments. */ -static void emit_kil_nv( struct brw_wm_compile *c ) +void emit_kil_nv( struct brw_wm_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); @@ -1098,6 +1346,7 @@ static void fire_fb_write( struct brw_wm_compile *c, GLuint eot ) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct brw_reg dst; if (c->dispatch_width == 16) @@ -1108,6 +1357,7 @@ static void fire_fb_write( struct brw_wm_compile *c, /* Pass through control information: */ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ + if (intel->gen < 6) /* gen6, use headerless for fb write */ { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ @@ -1121,6 +1371,7 @@ static void fire_fb_write( struct brw_wm_compile *c, /* Send framebuffer write message: */ /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */ brw_fb_WRITE(p, + c->dispatch_width, dst, base_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), @@ -1162,8 +1413,10 @@ void emit_fb_write(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; GLuint nr = 2; GLuint channel; + int base_reg; /* For gen6 fb write with no header, starting from color payload directly!. */ /* Reserve a space for AA - may not be needed: */ @@ -1175,14 +1428,45 @@ void emit_fb_write(struct brw_wm_compile *c, */ brw_push_insn_state(p); + if (intel->gen >= 6) + base_reg = nr; + else + base_reg = 0; + for (channel = 0; channel < 4; channel++) { - if (c->dispatch_width == 16 && brw->has_compr4) { - /* By setting the high bit of the MRF register number, we indicate + if (intel->gen >= 6) { + /* gen6 SIMD16 single source DP write looks like: + * m + 0: r0 + * m + 1: r1 + * m + 2: g0 + * m + 3: g1 + * m + 4: b0 + * m + 5: b1 + * m + 6: a0 + * m + 7: a1 + */ + if (c->dispatch_width == 16) { + brw_MOV(p, brw_message_reg(nr + channel * 2), arg0[channel]); + } else { + brw_MOV(p, brw_message_reg(nr + channel), arg0[channel]); + } + } else if (c->dispatch_width == 16 && brw->has_compr4) { + /* pre-gen6 SIMD16 single source DP write looks like: + * m + 0: r0 + * m + 1: g0 + * m + 2: b0 + * m + 3: a0 + * m + 4: r1 + * m + 5: g1 + * m + 6: b1 + * m + 7: a1 + * + * By setting the high bit of the MRF register number, we indicate * that we want COMPR4 mode - instead of doing the usual destination * + 1 for the second half we get destination + 4. */ brw_MOV(p, - brw_message_reg(nr + channel + (1 << 7)), + brw_message_reg(nr + channel + BRW_MRF_COMPR4), arg0[channel]); } else { /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ @@ -1202,7 +1486,11 @@ void emit_fb_write(struct brw_wm_compile *c, } /* skip over the regs populated above: */ - nr += 8; + if (c->dispatch_width == 16) + nr += 8; + else + nr += 4; + brw_pop_insn_state(p); if (c->key.source_depth_to_render_target) @@ -1235,11 +1523,16 @@ void emit_fb_write(struct brw_wm_compile *c, nr += 2; } + if (intel->gen >= 6) { + /* Subtract off the message header, since we send headerless. */ + nr -= 2; + } + if (!c->key.runtime_check_aads_emit) { if (c->key.aa_dest_stencil_reg) emit_aa(c, arg1, 2); - fire_fb_write(c, 0, nr, target, eot); + fire_fb_write(c, base_reg, nr, target, eot); } else { struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); @@ -1285,9 +1578,7 @@ static void emit_spill( struct brw_wm_compile *c, mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask } send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 } */ - brw_dp_WRITE_16(p, - retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW), - slot); + brw_oword_block_write_scratch(p, brw_message_reg(1), 2, slot); } @@ -1312,9 +1603,7 @@ static void emit_unspill( struct brw_wm_compile *c, send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 } */ - brw_dp_READ_16(p, - retype(vec16(reg), BRW_REGISTER_TYPE_UW), - slot); + brw_oword_block_read(p, vec16(reg), brw_message_reg(1), 2, slot); } @@ -1364,9 +1653,12 @@ static void spill_values( struct brw_wm_compile *c, void brw_wm_emit( struct brw_wm_compile *c ) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; GLuint insn; brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + if (intel->gen >= 6) + brw_set_acc_write_control(p, 1); /* Check if any of the payload regs need to be spilled: */ @@ -1461,6 +1753,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]); break; + case OPCODE_DP2: + emit_dp2(p, dst, dst_flags, args[0], args[1]); + break; + case OPCODE_DP3: emit_dp3(p, dst, dst_flags, args[0], args[1]); break; @@ -1474,7 +1770,11 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case OPCODE_TRUNC: - emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]); + for (i = 0; i < 4; i++) { + if (dst_flags & (1<opcode, inst->opcode < MAX_OPCODE ? - _mesa_opcode_string(inst->opcode) : - "unknown"); + printf("Unsupported opcode %i (%s) in fragment shader\n", + inst->opcode, inst->opcode < MAX_OPCODE ? + _mesa_opcode_string(inst->opcode) : + "unknown"); } for (i = 0; i < 4; i++) @@ -1611,12 +1915,20 @@ void brw_wm_emit( struct brw_wm_compile *c ) inst->dst[i]->spill_slot); } - if (INTEL_DEBUG & DEBUG_WM) { + /* Only properly tested on ILK */ + if (p->brw->intel.gen == 5) { + brw_remove_duplicate_mrf_moves(p); + if (c->dispatch_width == 16) + brw_remove_grf_to_mrf_moves(p); + } + + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { int i; - _mesa_printf("wm-native:\n"); - for (i = 0; i < p->nr_insn; i++) - brw_disasm(stderr, &p->store[i]); - _mesa_printf("\n"); + printf("wm-native:\n"); + for (i = 0; i < p->nr_insn; i++) + brw_disasm(stdout, &p->store[i], p->brw->intel.gen); + printf("\n"); } } +