return GL_TRUE;
}
-/* Not quite sure how correct this is - need to understand horiz
- * vs. vertical strides a little better.
- */
-static INLINE struct brw_reg sechalf( struct brw_reg reg )
-{
- if (reg.vstride)
- reg.nr++;
- return reg;
-}
-
/* Return the SrcReg index of the channels that can be immediate float operands
* instead of usage of PROGRAM_CONSTANT values through push/pull.
*/
[OPCODE_SLE] = 2,
[OPCODE_SLT] = 2,
[OPCODE_SNE] = 2,
+ [OPCODE_SWZ] = 1,
[OPCODE_XPD] = 2,
};
const struct brw_reg *arg0)
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_reg delta_x = retype(arg0[0], BRW_REGISTER_TYPE_W);
+ struct brw_reg delta_y = retype(arg0[1], BRW_REGISTER_TYPE_W);
if (mask & WRITEMASK_X) {
+ if (intel->gen >= 6) {
+ struct brw_reg delta_x_f = retype(delta_x, BRW_REGISTER_TYPE_F);
+ brw_MOV(p, delta_x_f, delta_x);
+ delta_x = delta_x_f;
+ }
+
if (c->fp->program.PixelCenterInteger) {
/* X' = X */
- brw_MOV(p,
- dst[0],
- retype(arg0[0], BRW_REGISTER_TYPE_W));
+ brw_MOV(p, dst[0], delta_x);
} else {
/* X' = X + 0.5 */
- brw_ADD(p,
- dst[0],
- retype(arg0[0], BRW_REGISTER_TYPE_W),
- brw_imm_f(0.5));
+ brw_ADD(p, dst[0], delta_x, brw_imm_f(0.5));
}
}
if (mask & WRITEMASK_Y) {
+ if (intel->gen >= 6) {
+ struct brw_reg delta_y_f = retype(delta_y, BRW_REGISTER_TYPE_F);
+ brw_MOV(p, delta_y_f, delta_y);
+ delta_y = delta_y_f;
+ }
+
if (c->fp->program.OriginUpperLeft) {
if (c->fp->program.PixelCenterInteger) {
/* Y' = Y */
- brw_MOV(p,
- dst[1],
- retype(arg0[1], BRW_REGISTER_TYPE_W));
+ brw_MOV(p, dst[1], delta_y);
} else {
- /* Y' = Y + 0.5 */
- brw_ADD(p,
- dst[1],
- retype(arg0[1], BRW_REGISTER_TYPE_W),
- brw_imm_f(0.5));
+ brw_ADD(p, dst[1], delta_y, brw_imm_f(0.5));
}
} else {
float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5;
/* Y' = (height - 1) - Y + center */
- brw_ADD(p,
- dst[1],
- negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
+ brw_ADD(p, dst[1], negate(delta_y),
brw_imm_f(c->key.drawable_height - 1 + center_offset));
}
}
BRW_MATH_SATURATE_NONE);
struct brw_reg src;
- if (intel->gen >= 6 && arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
- /* Gen6 math requires that source and dst horizontal stride be 1.
+ if (intel->gen >= 6 && ((arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 ||
+ arg0[0].file != BRW_GENERAL_REGISTER_FILE) ||
+ arg0[0].negate || arg0[0].abs)) {
+ /* Gen6 math requires that source and dst horizontal stride be 1,
+ * and that the argument be in the GRF.
*
+ * The hardware ignores source modifiers (negate and abs) on math
+ * instructions, so we also move to a temp to set those up.
*/
- src = *dst;
+ src = dst[dst_chan];
brw_MOV(p, src, arg0[0]);
} else {
src = arg0[0];
struct brw_reg temp_dst = dst[dst_chan];
if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
- if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
- /* Both scalar arguments. Do scalar calc. */
- src0.hstride = BRW_HORIZONTAL_STRIDE_1;
- src1.hstride = BRW_HORIZONTAL_STRIDE_1;
- temp_dst.hstride = BRW_HORIZONTAL_STRIDE_1;
- temp_dst.width = BRW_WIDTH_1;
-
- if (arg0[0].subnr != 0) {
- brw_MOV(p, temp_dst, src0);
- src0 = temp_dst;
-
- /* Ouch. We've used the temp as a dst, and we still
- * need a temp to store arg1 in, because src and dst
- * offsets have to be equal. Leaving this up to
- * glsl2-965 to handle correctly.
- */
- assert(arg1[0].subnr == 0);
- } else if (arg1[0].subnr != 0) {
- brw_MOV(p, temp_dst, src1);
- src1 = temp_dst;
- }
- } else {
- brw_MOV(p, temp_dst, src0);
- src0 = temp_dst;
- }
- } else if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
- brw_MOV(p, temp_dst, src1);
- src1 = temp_dst;
+ brw_MOV(p, temp_dst, src0);
+ src0 = temp_dst;
+ }
+
+ if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
+ /* This is a heinous hack to get a temporary register for use
+ * in case both arg0 and arg1 are constants. Why you're
+ * doing exponentiation on constant values in the shader, we
+ * don't know.
+ *
+ * max_wm_grf is almost surely less than the maximum GRF, and
+ * gen6 doesn't care about the number of GRFs used in a
+ * shader like pre-gen6 did.
+ */
+ struct brw_reg temp = brw_vec8_grf(c->max_wm_grf, 0);
+ brw_MOV(p, temp, src1);
+ src1 = temp;
}
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
sechalf(src0),
sechalf(src1));
}
-
- /* Splat a scalar result into all the channels. */
- if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 &&
- arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
- temp_dst.hstride = BRW_HORIZONTAL_STRIDE_0;
- temp_dst.vstride = BRW_VERTICAL_STRIDE_0;
- brw_MOV(p, dst[dst_chan], temp_dst);
- }
} else {
GLuint saturate = ((mask & SATURATE) ?
BRW_MATH_SATURATE_SATURATE :
if (intel->gen < 5 && c->dispatch_width == 8)
nr_texcoords = 3;
- /* For shadow comparisons, we have to supply u,v,r. */
- if (shadow)
- nr_texcoords = 3;
+ if (shadow) {
+ if (intel->gen < 7) {
+ /* For shadow comparisons, we have to supply u,v,r. */
+ nr_texcoords = 3;
+ } else {
+ /* On Ivybridge, the shadow comparitor comes first. Just load it. */
+ brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
+ cur_mrf += mrf_per_channel;
+ }
+ }
/* Emit the texcoords. */
for (i = 0; i < nr_texcoords; i++) {
+ if (c->key.gl_clamp_mask[i] & (1 << sampler))
+ brw_set_saturate(p, true);
+
if (emit & (1<<i))
brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
else
brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
cur_mrf += mrf_per_channel;
+
+ brw_set_saturate(p, false);
}
/* Fill in the shadow comparison reference value. */
- if (shadow) {
+ if (shadow && intel->gen < 7) {
if (intel->gen >= 5) {
/* Fill in the cube map array index value. */
brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
if (intel->gen >= 5) {
if (shadow)
- msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
else
- msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
} else {
/* Note that G45 and older determines shadow compare and dispatch width
* from message length for most messages.
*/
if (c->dispatch_width == 16 || intel->gen < 5) {
if (intel->gen >= 5)
- msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
else
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
mrf_per_channel = 2;
dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
response_length = 8;
} else {
- msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
mrf_per_channel = 1;
dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
response_length = 4;
struct brw_reg *arg0)
{
struct brw_compile *p = &c->func;
- struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_reg pixelmask;
GLuint i, j;
+ if (intel->gen >= 6)
+ pixelmask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+ else
+ pixelmask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
for (i = 0; i < 4; i++) {
/* Check if we've already done the comparison for this reg
* -- common when someone does KIL TEMP.wwww.
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
brw_set_predicate_control_flag_value(p, 0xff);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_AND(p, r0uw, brw_flag_reg(), r0uw);
+ brw_AND(p, pixelmask, brw_flag_reg(), pixelmask);
brw_pop_insn_state(p);
}
}
-/* KIL_NV kills the pixels that are currently executing, not based on a test
- * of the arguments.
- */
-void emit_kil_nv( struct brw_wm_compile *c )
-{
- struct brw_compile *p = &c->func;
- struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
-
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
- brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
- brw_pop_insn_state(p);
-}
-
static void fire_fb_write( struct brw_wm_compile *c,
GLuint base_reg,
GLuint nr,
{
struct brw_compile *p = &c->func;
struct intel_context *intel = &p->brw->intel;
- struct brw_reg dst;
-
- if (c->dispatch_width == 16)
- dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
- else
- dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
/* Pass through control information:
+ *
+ * Gen6 has done m1 mov in emit_fb_write() for current SIMD16 case.
*/
/* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
- if (intel->gen < 6) /* gen6, use headerless for fb write */
+ if (intel->gen < 6)
{
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
/* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
brw_fb_WRITE(p,
c->dispatch_width,
- dst,
base_reg,
retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
target,
nr,
0,
- eot);
+ eot,
+ GL_TRUE);
}
GLuint reg )
{
struct brw_compile *p = &c->func;
- GLuint comp = c->key.aa_dest_stencil_reg / 2;
- GLuint off = c->key.aa_dest_stencil_reg % 2;
+ GLuint comp = c->aa_dest_stencil_reg / 2;
+ GLuint off = c->aa_dest_stencil_reg % 2;
struct brw_reg aa = offset(arg1[comp], off);
brw_push_insn_state(p);
struct intel_context *intel = &brw->intel;
GLuint nr = 2;
GLuint channel;
- int base_reg; /* For gen6 fb write with no header, starting from color payload directly!. */
/* Reserve a space for AA - may not be needed:
*/
- if (c->key.aa_dest_stencil_reg)
+ if (c->aa_dest_stencil_reg)
nr += 1;
/* I don't really understand how this achieves the color interleave
*/
brw_push_insn_state(p);
- if (intel->gen >= 6)
- base_reg = nr;
- else
- base_reg = 0;
+ if (c->key.clamp_fragment_color)
+ brw_set_saturate(p, 1);
for (channel = 0; channel < 4; channel++) {
if (intel->gen >= 6) {
}
}
}
+
+ brw_set_saturate(p, 0);
+
/* skip over the regs populated above:
*/
if (c->dispatch_width == 16)
brw_pop_insn_state(p);
- if (c->key.source_depth_to_render_target)
+ if (c->source_depth_to_render_target)
{
- if (c->key.computes_depth)
+ if (c->computes_depth)
brw_MOV(p, brw_message_reg(nr), arg2[2]);
else
brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
nr += 2;
}
- if (c->key.dest_depth_reg)
+ if (c->dest_depth_reg)
{
- GLuint comp = c->key.dest_depth_reg / 2;
- GLuint off = c->key.dest_depth_reg % 2;
+ GLuint comp = c->dest_depth_reg / 2;
+ GLuint off = c->dest_depth_reg % 2;
if (off != 0) {
brw_push_insn_state(p);
}
if (intel->gen >= 6) {
- /* Subtract off the message header, since we send headerless. */
- nr -= 2;
+ /* Load the message header. There's no implied move from src0
+ * to the base mrf on gen6.
+ */
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, retype(brw_message_reg(0), BRW_REGISTER_TYPE_UD),
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+ brw_pop_insn_state(p);
+
+ if (target != 0) {
+ brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+ 0,
+ 2), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(target));
+ }
}
- if (!c->key.runtime_check_aads_emit) {
- if (c->key.aa_dest_stencil_reg)
+ if (!c->runtime_check_aads_emit) {
+ if (c->aa_dest_stencil_reg)
emit_aa(c, arg1, 2);
- fire_fb_write(c, base_reg, nr, target, eot);
+ fire_fb_write(c, 0, nr, target, eot);
}
else {
struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
emit_kil(c, args[0]);
break;
- case OPCODE_KIL_NV:
- emit_kil_nv(c);
- break;
-
default:
printf("Unsupported opcode %i (%s) in fragment shader\n",
inst->opcode, inst->opcode < MAX_OPCODE ?
brw_remove_grf_to_mrf_moves(p);
}
- if (INTEL_DEBUG & DEBUG_WM) {
+ if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
int i;
printf("wm-native:\n");