#include "brw_context.h"
#include "brw_wm.h"
-static GLboolean can_do_pln(struct intel_context *intel,
- const struct brw_reg *deltas)
+static bool
+can_do_pln(struct intel_context *intel, const struct brw_reg *deltas)
{
struct brw_context *brw = brw_context(&intel->ctx);
if (!brw->has_pln)
- return GL_FALSE;
+ return false;
if (deltas[1].nr != deltas[0].nr + 1)
- return GL_FALSE;
+ return false;
if (intel->gen < 6 && ((deltas[0].nr & 1) != 0))
- return GL_FALSE;
+ return false;
- return GL_TRUE;
-}
-
-/* Not quite sure how correct this is - need to understand horiz
- * vs. vertical strides a little better.
- */
-static INLINE struct brw_reg sechalf( struct brw_reg reg )
-{
- if (reg.vstride)
- reg.nr++;
- return reg;
+ return true;
}
/* Return the SrcReg index of the channels that can be immediate float operands
* instead of usage of PROGRAM_CONSTANT values through push/pull.
*/
-GLboolean
+bool
brw_wm_arg_can_be_immediate(enum prog_opcode opcode, int arg)
{
int opcode_array[] = {
[OPCODE_SLE] = 2,
[OPCODE_SLT] = 2,
[OPCODE_SNE] = 2,
+ [OPCODE_SWZ] = 1,
[OPCODE_XPD] = 2,
};
*/
if (opcode == OPCODE_MAD || opcode == OPCODE_LRP) {
if (arg == 1 || arg == 2)
- return GL_TRUE;
+ return true;
}
if (opcode > ARRAY_SIZE(opcode_array))
- return GL_FALSE;
+ return false;
return arg == opcode_array[opcode] - 1;
}
GLuint mask,
const struct brw_reg *arg0)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_reg r1 = brw_vec1_grf(1, 0);
if (mask == 0)
assert(mask == WRITEMASK_XY);
+ if (intel->gen >= 6) {
+ /* XXX Gen6 WM doesn't have Xstart/Ystart in payload r1.0/r1.1.
+ Just add them with 0.0 for dst reg.. */
+ r1 = brw_imm_v(0x00000000);
+ brw_ADD(p,
+ dst[0],
+ retype(arg0[0], BRW_REGISTER_TYPE_UW),
+ r1);
+ brw_ADD(p,
+ dst[1],
+ retype(arg0[1], BRW_REGISTER_TYPE_UW),
+ r1);
+ return;
+ }
+
/* Calc delta X,Y by subtracting origin in r1 from the pixel
* centers produced by emit_pixel_xy().
*/
const struct brw_reg *arg0)
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_reg delta_x = retype(arg0[0], BRW_REGISTER_TYPE_W);
+ struct brw_reg delta_y = retype(arg0[1], BRW_REGISTER_TYPE_W);
if (mask & WRITEMASK_X) {
+ if (intel->gen >= 6) {
+ struct brw_reg delta_x_f = retype(delta_x, BRW_REGISTER_TYPE_F);
+ brw_MOV(p, delta_x_f, delta_x);
+ delta_x = delta_x_f;
+ }
+
if (c->fp->program.PixelCenterInteger) {
/* X' = X */
- brw_MOV(p,
- dst[0],
- retype(arg0[0], BRW_REGISTER_TYPE_W));
+ brw_MOV(p, dst[0], delta_x);
} else {
/* X' = X + 0.5 */
- brw_ADD(p,
- dst[0],
- retype(arg0[0], BRW_REGISTER_TYPE_W),
- brw_imm_f(0.5));
+ brw_ADD(p, dst[0], delta_x, brw_imm_f(0.5));
}
}
if (mask & WRITEMASK_Y) {
+ if (intel->gen >= 6) {
+ struct brw_reg delta_y_f = retype(delta_y, BRW_REGISTER_TYPE_F);
+ brw_MOV(p, delta_y_f, delta_y);
+ delta_y = delta_y_f;
+ }
+
if (c->fp->program.OriginUpperLeft) {
if (c->fp->program.PixelCenterInteger) {
/* Y' = Y */
- brw_MOV(p,
- dst[1],
- retype(arg0[1], BRW_REGISTER_TYPE_W));
+ brw_MOV(p, dst[1], delta_y);
} else {
- /* Y' = Y + 0.5 */
- brw_ADD(p,
- dst[1],
- retype(arg0[1], BRW_REGISTER_TYPE_W),
- brw_imm_f(0.5));
+ brw_ADD(p, dst[1], delta_y, brw_imm_f(0.5));
}
} else {
float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5;
/* Y' = (height - 1) - Y + center */
- brw_ADD(p,
- dst[1],
- negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
+ brw_ADD(p, dst[1], negate(delta_y),
brw_imm_f(c->key.drawable_height - 1 + center_offset));
}
}
{
struct brw_compile *p = &c->func;
struct intel_context *intel = &p->brw->intel;
+ struct brw_reg src;
+ struct brw_reg temp_dst;
+
+ if (intel->gen >= 6)
+ temp_dst = dst[3];
+ else
+ temp_dst = brw_message_reg(2);
+
+ assert(intel->gen < 6);
/* Don't need this if all you are doing is interpolating color, for
* instance.
* result straight into a message reg.
*/
if (can_do_pln(intel, deltas)) {
- brw_PLN(p, brw_message_reg(2), interp3, deltas[0]);
+ brw_PLN(p, temp_dst, interp3, deltas[0]);
} else {
brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
- brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
+ brw_MAC(p, temp_dst, suboffset(interp3, 1), deltas[1]);
}
/* Calc w */
+ if (intel->gen >= 6)
+ src = temp_dst;
+ else
+ src = brw_null_reg();
+
if (c->dispatch_width == 16) {
brw_math_16(p, dst[3],
BRW_MATH_FUNCTION_INV,
- BRW_MATH_SATURATE_NONE,
- 2, brw_null_reg(),
+ 2, src,
BRW_MATH_PRECISION_FULL);
} else {
brw_math(p, dst[3],
BRW_MATH_FUNCTION_INV,
- BRW_MATH_SATURATE_NONE,
- 2, brw_null_reg(),
+ 2, src,
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
}
}
}
-
void emit_linterp(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- if (can_do_pln(intel, deltas)) {
+ if (intel->gen >= 6) {
+ brw_PLN(p, dst[i], interp[i], brw_vec8_grf(2, 0));
+ } else if (can_do_pln(intel, deltas)) {
brw_PLN(p, dst[i], interp[i], deltas[0]);
} else {
brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
GLuint nr = arg0[0].nr;
GLuint i;
+ if (intel->gen >= 6) {
+ emit_linterp(p, dst, mask, arg0, interp);
+ return;
+ }
+
interp[0] = brw_vec1_grf(nr, 0);
interp[1] = brw_vec1_grf(nr, 4);
interp[2] = brw_vec1_grf(nr+1, 0);
* between each other. We could probably do it like ddx and swizzle the right
* order later, but bail for now and just produce
* ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ *
+ * The negate_value boolean is used to negate the d/dy computation for FBOs,
+ * since they place the origin at the upper left instead of the lower left.
*/
void emit_ddxy(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
- GLboolean is_ddx,
- const struct brw_reg *arg0)
+ bool is_ddx,
+ const struct brw_reg *arg0,
+ bool negate_value)
{
int i;
struct brw_reg src0, src1;
BRW_HORIZONTAL_STRIDE_0,
BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
}
- brw_ADD(p, dst[i], src0, negate(src1));
+ if (negate_value)
+ brw_ADD(p, dst[i], src1, negate(src0));
+ else
+ brw_ADD(p, dst[i], src0, negate(src1));
}
}
if (mask & SATURATE)
}
}
+void emit_sign(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MOV(p, dst[i], brw_imm_f(0.0));
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
+ brw_MOV(p, dst[i], brw_imm_f(-1.0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, arg0[i], brw_imm_f(0));
+ brw_MOV(p, dst[i], brw_imm_f(1.0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ }
+}
+
void emit_max(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
}
+void emit_dp2(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
+{
+ int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return; /* Do not emit dead code */
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MAC(p, dst[dst_chan], arg0[1], arg1[1]);
+ brw_set_saturate(p, 0);
+}
+
+
void emit_dp3(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
const struct brw_reg *arg0,
const struct brw_reg *arg1)
{
- int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+ int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
const struct brw_reg *arg0,
const struct brw_reg *arg1)
{
- int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+ int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
const struct brw_reg *arg0,
const struct brw_reg *arg1)
{
- const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+ const int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
const struct brw_reg *arg0)
{
struct brw_compile *p = &c->func;
- int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
- GLuint saturate = ((mask & SATURATE) ?
- BRW_MATH_SATURATE_SATURATE :
- BRW_MATH_SATURATE_NONE);
+ struct intel_context *intel = &p->brw->intel;
+ int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
+ struct brw_reg src;
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
assert(is_power_of_two(mask & WRITEMASK_XYZW));
- /* If compressed, this will write message reg 2,3 from arg0.x's 16
- * channels.
- */
- brw_MOV(p, brw_message_reg(2), arg0[0]);
+ if (intel->gen >= 6 && ((arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 ||
+ arg0[0].file != BRW_GENERAL_REGISTER_FILE) ||
+ arg0[0].negate || arg0[0].abs)) {
+ /* Gen6 math requires that source and dst horizontal stride be 1,
+ * and that the argument be in the GRF.
+ *
+ * The hardware ignores source modifiers (negate and abs) on math
+ * instructions, so we also move to a temp to set those up.
+ */
+ src = dst[dst_chan];
+ brw_MOV(p, src, arg0[0]);
+ } else {
+ src = arg0[0];
+ }
/* Send two messages to perform all 16 operations:
*/
brw_push_insn_state(p);
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math(p,
dst[dst_chan],
function,
- saturate,
2,
- brw_null_reg(),
+ src,
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
brw_math(p,
offset(dst[dst_chan],1),
function,
- saturate,
3,
- brw_null_reg(),
+ sechalf(src),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
}
const struct brw_reg *arg1)
{
struct brw_compile *p = &c->func;
- int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
- GLuint saturate = ((mask & SATURATE) ?
- BRW_MATH_SATURATE_SATURATE :
- BRW_MATH_SATURATE_NONE);
+ struct intel_context *intel = &p->brw->intel;
+ int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
brw_push_insn_state(p);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p, brw_message_reg(2), arg0[0]);
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
- }
+ /* math can only operate on up to a vec8 at a time, so in
+ * dispatch_width==16 we have to do the second half manually.
+ */
+ if (intel->gen >= 6) {
+ struct brw_reg src0 = arg0[0];
+ struct brw_reg src1 = arg1[0];
+ struct brw_reg temp_dst = dst[dst_chan];
+
+ if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
+ brw_MOV(p, temp_dst, src0);
+ src0 = temp_dst;
+ }
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p, brw_message_reg(3), arg1[0]);
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
- }
+ if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
+ /* This is a heinous hack to get a temporary register for use
+ * in case both arg0 and arg1 are constants. Why you're
+ * doing exponentiation on constant values in the shader, we
+ * don't know.
+ *
+ * max_wm_grf is almost surely less than the maximum GRF, and
+ * gen6 doesn't care about the number of GRFs used in a
+ * shader like pre-gen6 did.
+ */
+ struct brw_reg temp = brw_vec8_grf(c->max_wm_grf, 0);
+ brw_MOV(p, temp, src1);
+ src1 = temp;
+ }
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_math(p,
- dst[dst_chan],
- function,
- saturate,
- 2,
- brw_null_reg(),
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math2(p,
+ temp_dst,
+ function,
+ src0,
+ src1);
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math2(p,
+ sechalf(temp_dst),
+ function,
+ sechalf(src0),
+ sechalf(src1));
+ }
+ } else {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, brw_message_reg(3), arg1[0]);
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
+ }
- /* Send two messages to perform all 16 operations:
- */
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math(p,
- offset(dst[dst_chan],1),
+ dst[dst_chan],
function,
- saturate,
- 4,
- brw_null_reg(),
+ 2,
+ arg0[0],
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
+
+ /* Send two messages to perform all 16 operations:
+ */
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p,
+ offset(dst[dst_chan],1),
+ function,
+ 4,
+ sechalf(arg0[0]),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+ }
}
brw_pop_insn_state(p);
}
struct brw_reg depth_payload,
GLuint tex_idx,
GLuint sampler,
- GLboolean shadow)
+ bool shadow)
{
struct brw_compile *p = &c->func;
struct intel_context *intel = &p->brw->intel;
nr_texcoords = 1;
break;
case TEXTURE_2D_INDEX:
+ case TEXTURE_1D_ARRAY_INDEX:
case TEXTURE_RECT_INDEX:
+ case TEXTURE_EXTERNAL_INDEX:
emit = WRITEMASK_XY;
nr_texcoords = 2;
break;
case TEXTURE_3D_INDEX:
+ case TEXTURE_2D_ARRAY_INDEX:
case TEXTURE_CUBE_INDEX:
emit = WRITEMASK_XYZ;
nr_texcoords = 3;
if (intel->gen < 5 && c->dispatch_width == 8)
nr_texcoords = 3;
- /* For shadow comparisons, we have to supply u,v,r. */
- if (shadow)
- nr_texcoords = 3;
+ if (shadow) {
+ if (intel->gen < 7) {
+ /* For shadow comparisons, we have to supply u,v,r. */
+ nr_texcoords = 3;
+ } else {
+ /* On Ivybridge, the shadow comparitor comes first. Just load it. */
+ brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
+ cur_mrf += mrf_per_channel;
+ }
+ }
/* Emit the texcoords. */
for (i = 0; i < nr_texcoords; i++) {
+ if (c->key.tex.gl_clamp_mask[i] & (1 << sampler))
+ brw_set_saturate(p, true);
+
if (emit & (1<<i))
brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
else
brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
cur_mrf += mrf_per_channel;
+
+ brw_set_saturate(p, false);
}
/* Fill in the shadow comparison reference value. */
- if (shadow) {
- if (intel->gen == 5) {
+ if (shadow && intel->gen < 7) {
+ if (intel->gen >= 5) {
/* Fill in the cube map array index value. */
brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
cur_mrf += mrf_per_channel;
cur_mrf += mrf_per_channel;
}
- if (intel->gen == 5) {
+ if (intel->gen >= 5) {
if (shadow)
- msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
else
- msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
} else {
/* Note that G45 and older determines shadow compare and dispatch width
* from message length for most messages.
msg_type,
response_length,
cur_mrf - 1,
- 0,
1,
- simd_mode);
+ simd_mode,
+ BRW_SAMPLER_RETURN_FORMAT_FLOAT32);
}
* from mattering.
*/
if (c->dispatch_width == 16 || intel->gen < 5) {
- if (intel->gen == 5)
- msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
+ if (intel->gen >= 5)
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
else
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
mrf_per_channel = 2;
dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
response_length = 8;
} else {
- msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
mrf_per_channel = 1;
dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
response_length = 4;
break;
case TEXTURE_2D_INDEX:
case TEXTURE_RECT_INDEX:
+ case TEXTURE_EXTERNAL_INDEX:
brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
msg_type,
response_length,
msgLength,
- 0,
1,
- BRW_SAMPLER_SIMD_MODE_SIMD16);
+ BRW_SAMPLER_SIMD_MODE_SIMD16,
+ BRW_SAMPLER_RETURN_FORMAT_FLOAT32);
}
struct brw_reg *arg0)
{
struct brw_compile *p = &c->func;
- struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_reg pixelmask;
GLuint i, j;
+ if (intel->gen >= 6)
+ pixelmask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+ else
+ pixelmask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
for (i = 0; i < 4; i++) {
/* Check if we've already done the comparison for this reg
* -- common when someone does KIL TEMP.wwww.
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
brw_set_predicate_control_flag_value(p, 0xff);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_AND(p, r0uw, brw_flag_reg(), r0uw);
+ brw_AND(p, pixelmask, brw_flag_reg(), pixelmask);
brw_pop_insn_state(p);
}
}
-/* KIL_NV kills the pixels that are currently executing, not based on a test
- * of the arguments.
- */
-static void emit_kil_nv( struct brw_wm_compile *c )
-{
- struct brw_compile *p = &c->func;
- struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
-
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
- brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
- brw_pop_insn_state(p);
-}
-
static void fire_fb_write( struct brw_wm_compile *c,
GLuint base_reg,
GLuint nr,
GLuint eot )
{
struct brw_compile *p = &c->func;
- struct brw_reg dst;
-
- if (c->dispatch_width == 16)
- dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
- else
- dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+ struct intel_context *intel = &p->brw->intel;
+ uint32_t msg_control;
/* Pass through control information:
+ *
+ * Gen6 has done m1 mov in emit_fb_write() for current SIMD16 case.
*/
/* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
+ if (intel->gen < 6)
{
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
brw_pop_insn_state(p);
}
+ if (c->dispatch_width == 16)
+ msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
+ else
+ msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
+
/* Send framebuffer write message: */
/* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
brw_fb_WRITE(p,
- dst,
+ c->dispatch_width,
base_reg,
retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+ msg_control,
target,
nr,
0,
- eot);
+ eot,
+ true);
}
GLuint reg )
{
struct brw_compile *p = &c->func;
- GLuint comp = c->key.aa_dest_stencil_reg / 2;
- GLuint off = c->key.aa_dest_stencil_reg % 2;
+ GLuint comp = c->aa_dest_stencil_reg / 2;
+ GLuint off = c->aa_dest_stencil_reg % 2;
struct brw_reg aa = offset(arg1[comp], off);
brw_push_insn_state(p);
{
struct brw_compile *p = &c->func;
struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
GLuint nr = 2;
GLuint channel;
/* Reserve a space for AA - may not be needed:
*/
- if (c->key.aa_dest_stencil_reg)
+ if (c->aa_dest_stencil_reg)
nr += 1;
/* I don't really understand how this achieves the color interleave
*/
brw_push_insn_state(p);
+ if (c->key.clamp_fragment_color)
+ brw_set_saturate(p, 1);
+
for (channel = 0; channel < 4; channel++) {
- if (c->dispatch_width == 16 && brw->has_compr4) {
- /* By setting the high bit of the MRF register number, we indicate
+ if (intel->gen >= 6) {
+ /* gen6 SIMD16 single source DP write looks like:
+ * m + 0: r0
+ * m + 1: r1
+ * m + 2: g0
+ * m + 3: g1
+ * m + 4: b0
+ * m + 5: b1
+ * m + 6: a0
+ * m + 7: a1
+ */
+ if (c->dispatch_width == 16) {
+ brw_MOV(p, brw_message_reg(nr + channel * 2), arg0[channel]);
+ } else {
+ brw_MOV(p, brw_message_reg(nr + channel), arg0[channel]);
+ }
+ } else if (c->dispatch_width == 16 && brw->has_compr4) {
+ /* pre-gen6 SIMD16 single source DP write looks like:
+ * m + 0: r0
+ * m + 1: g0
+ * m + 2: b0
+ * m + 3: a0
+ * m + 4: r1
+ * m + 5: g1
+ * m + 6: b1
+ * m + 7: a1
+ *
+ * By setting the high bit of the MRF register number, we indicate
* that we want COMPR4 mode - instead of doing the usual destination
* + 1 for the second half we get destination + 4.
*/
brw_MOV(p,
- brw_message_reg(nr + channel + (1 << 7)),
+ brw_message_reg(nr + channel + BRW_MRF_COMPR4),
arg0[channel]);
} else {
/* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
}
}
}
+
+ brw_set_saturate(p, 0);
+
/* skip over the regs populated above:
*/
- nr += 8;
+ if (c->dispatch_width == 16)
+ nr += 8;
+ else
+ nr += 4;
+
brw_pop_insn_state(p);
- if (c->key.source_depth_to_render_target)
+ if (c->source_depth_to_render_target)
{
- if (c->key.computes_depth)
+ if (c->computes_depth)
brw_MOV(p, brw_message_reg(nr), arg2[2]);
else
brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
nr += 2;
}
- if (c->key.dest_depth_reg)
+ if (c->dest_depth_reg)
{
- GLuint comp = c->key.dest_depth_reg / 2;
- GLuint off = c->key.dest_depth_reg % 2;
+ GLuint comp = c->dest_depth_reg / 2;
+ GLuint off = c->dest_depth_reg % 2;
if (off != 0) {
brw_push_insn_state(p);
nr += 2;
}
- if (!c->key.runtime_check_aads_emit) {
- if (c->key.aa_dest_stencil_reg)
+ if (intel->gen >= 6) {
+ /* Load the message header. There's no implied move from src0
+ * to the base mrf on gen6.
+ */
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, retype(brw_message_reg(0), BRW_REGISTER_TYPE_UD),
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+ brw_pop_insn_state(p);
+
+ if (target != 0) {
+ brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+ 0,
+ 2), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(target));
+ }
+ }
+
+ if (!c->runtime_check_aads_emit) {
+ if (c->aa_dest_stencil_reg)
emit_aa(c, arg1, 2);
fire_fb_write(c, 0, nr, target, eot);
else {
struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
struct brw_reg ip = brw_ip_reg();
- struct brw_instruction *jmp;
+ int jmp;
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
get_element_ud(brw_vec8_grf(1,0), 6),
brw_imm_ud(1<<26));
- jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)) - p->store;
{
emit_aa(c, arg1, 2);
fire_fb_write(c, 0, nr, target, eot);
mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
*/
- brw_dp_WRITE_16(p,
- retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
- slot);
+ brw_oword_block_write_scratch(p, brw_message_reg(1), 2, slot);
}
send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
*/
- brw_dp_READ_16(p,
- retype(vec16(reg), BRW_REGISTER_TYPE_UW),
- slot);
+ brw_oword_block_read(p, vec16(reg), brw_message_reg(1), 2, slot);
}
emit_spill(c, values[i].hw_reg, values[i].spill_slot);
}
-#define BRW_MRF_NUM 16
-#define BRW_SIZE_OF_REG 32
-
-static INLINE
-GLboolean brw_is_arithmetic_inst(const struct brw_instruction *inst)
-{
- switch (inst->header.opcode) {
- case BRW_OPCODE_MOV:
- case BRW_OPCODE_SEL:
- case BRW_OPCODE_NOT:
- case BRW_OPCODE_AND:
- case BRW_OPCODE_OR:
- case BRW_OPCODE_XOR:
- case BRW_OPCODE_SHR:
- case BRW_OPCODE_SHL:
- case BRW_OPCODE_RSR:
- case BRW_OPCODE_RSL:
- case BRW_OPCODE_ADD:
- case BRW_OPCODE_MUL:
- case BRW_OPCODE_AVG:
- case BRW_OPCODE_FRC:
- case BRW_OPCODE_RNDU:
- case BRW_OPCODE_RNDD:
- case BRW_OPCODE_RNDE:
- case BRW_OPCODE_RNDZ:
- case BRW_OPCODE_MAC:
- case BRW_OPCODE_MACH:
- case BRW_OPCODE_LINE:
- return GL_TRUE;
- default:
- return GL_FALSE;
- }
-}
-
-static const struct {
- char *name;
- int nsrc;
- int ndst;
-} inst_opcode[128] = {
- [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
-
- [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
-
- [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
-
- [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
- [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
- [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
- [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
- [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
- [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
- [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
-};
-
-static const GLuint inst_stride[7] = {
- [0] = 0,
- [1] = 1,
- [2] = 2,
- [3] = 4,
- [4] = 8,
- [5] = 16,
- [6] = 32
-};
-
-static const GLuint inst_type_size[8] = {
- [0] = 4,
- [1] = 4,
- [2] = 2,
- [3] = 2,
- [4] = 1,
- [5] = 1,
- [7] = 4
-};
-
-#define BRW_MAX_OFFSET(x0,x1) ((x0) > (x1) ? (x0) : (x1))
-#define BRW_MIN_OFFSET(x0,x1) ((x0) < (x1) ? (x0) : (x1));
-
-static INLINE GLboolean
-brw_is_grf_written(const struct brw_instruction *inst,
- int reg_index, int size,
- int gen)
-{
- if (inst_opcode[inst->header.opcode].ndst == 0)
- return GL_FALSE;
-
- if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT)
- if (inst->bits1.ia1.dest_reg_file == BRW_GENERAL_REGISTER_FILE)
- return GL_TRUE;
-
- if (inst->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE)
- return GL_FALSE;
-
- const int reg_start = reg_index * BRW_SIZE_OF_REG;
- const int reg_end = reg_start + size;
-
- const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type];
- const int write_start = inst->bits1.da1.dest_reg_nr*BRW_SIZE_OF_REG
- + inst->bits1.da1.dest_subreg_nr;
- int length, write_end;
-
- /* SEND is specific */
- if (inst->header.opcode == BRW_OPCODE_SEND) {
- if (gen >= 5)
- length = inst->bits3.generic_gen5.response_length*BRW_SIZE_OF_REG;
- else
- length = inst->bits3.generic.response_length*BRW_SIZE_OF_REG;
- }
- else {
- length = 1 << inst->header.execution_size;
- length *= type_size;
- length *= inst->bits1.da1.dest_horiz_stride;
- }
-
- /* If the two intervals intersect, we overwrite the register */
- write_end = write_start + length;
- const int left = BRW_MAX_OFFSET(write_start, reg_start);
- const int right = BRW_MIN_OFFSET(write_end, reg_end);
-
- return left < right;
-}
-
-/* Specific path for message register since we need to handle the compr4 case */
-static INLINE GLboolean
-brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size)
-{
- if (inst_opcode[inst->header.opcode].ndst == 0)
- return GL_FALSE;
-
- if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT)
- if (inst->bits1.ia1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE)
- return GL_TRUE;
-
- if (inst->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE)
- return GL_FALSE;
-
- const int reg_start = reg_index * BRW_SIZE_OF_REG;
- const int reg_end = reg_start + size;
-
- const int mrf_index = inst->bits1.da1.dest_reg_nr & 0x0f;
- const int is_compr4 = inst->bits1.da1.dest_reg_nr & 0xf0;
- const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type];
-
- /* We use compr4 with a size != 16 elements. Strange, we conservatively
- * consider that we are writing the register.
- */
- if (is_compr4 && inst->header.execution_size != BRW_EXECUTE_16)
- return GL_TRUE;
-
- GLboolean is_written = GL_FALSE;
-
- /* Here we write mrf_{i} and mrf_{i+4}. So we read two times 8 elements */
- if (is_compr4) {
- const int length = 8 * type_size * inst->bits1.da1.dest_horiz_stride;
-
- /* First 8-way register */
- const int write_start0 = mrf_index*BRW_SIZE_OF_REG
- + inst->bits1.da1.dest_subreg_nr;
- const int write_end0 = write_start0 + length;
-
- /* Second 8-way register */
- const int write_start1 = (mrf_index+4)*BRW_SIZE_OF_REG
- + inst->bits1.da1.dest_subreg_nr;
- const int write_end1 = write_start1 + length;
-
- /* If the two intervals intersect, we overwrite the register */
- const int left0 = BRW_MAX_OFFSET(write_start0, reg_start);
- const int right0 = BRW_MIN_OFFSET(write_end0, reg_end);
- const int left1 = BRW_MAX_OFFSET(write_start1, reg_start);
- const int right1 = BRW_MIN_OFFSET(write_end1, reg_end);
-
- is_written = left0 < right0 || left1 < right1;
- }
- else {
- int length;
- length = 1 << inst->header.execution_size;
- length *= type_size;
- length *= inst->bits1.da1.dest_horiz_stride;
-
- /* If the two intervals intersect, we write into the register */
- const int write_start = inst->bits1.da1.dest_reg_nr*BRW_SIZE_OF_REG
- + inst->bits1.da1.dest_subreg_nr;
- const int write_end = write_start + length;
- const int left = BRW_MAX_OFFSET(write_start, reg_start);
- const int right = BRW_MIN_OFFSET(write_end, reg_end);;
-
- is_written = left < right;
- }
-
- /* SEND may perform an implicit mov to a mrf register */
- if (is_written == GL_FALSE &&
- inst->header.opcode == BRW_OPCODE_SEND &&
- inst->bits1.da1.src0_reg_file != 0) {
-
- const int mrf_start = inst->header.destreg__conditionalmod;
- const int write_start = mrf_start * BRW_SIZE_OF_REG;
- const int write_end = write_start + BRW_SIZE_OF_REG;
- const int left = BRW_MAX_OFFSET(write_start, reg_start);
- const int right = BRW_MIN_OFFSET(write_end, reg_end);;
- is_written = left < right;
- }
-
- return is_written;
-}
-
-static INLINE GLboolean
-brw_is_mrf_read(const struct brw_instruction *inst,
- int reg_index, int size, int gen)
-{
- if (inst->header.opcode != BRW_OPCODE_SEND)
- return GL_FALSE;
- if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT)
- return GL_TRUE;
-
- const int reg_start = reg_index*BRW_SIZE_OF_REG;
- const int reg_end = reg_start + size;
-
- int length, read_start, read_end;
- if (gen >= 5)
- length = inst->bits3.generic_gen5.msg_length*BRW_SIZE_OF_REG;
- else
- length = inst->bits3.generic.msg_length*BRW_SIZE_OF_REG;
-
- /* Look if SEND uses an implicit mov. In that case, we read one less register
- * (but we write it)
- */
- if (inst->bits1.da1.src0_reg_file != 0)
- read_start = inst->header.destreg__conditionalmod;
- else {
- length--;
- read_start = inst->header.destreg__conditionalmod + 1;
- }
- read_start *= BRW_SIZE_OF_REG;
- read_end = read_start + length;
-
- const int left = BRW_MAX_OFFSET(read_start, reg_start);
- const int right = BRW_MIN_OFFSET(read_end, reg_end);
-
- return left < right;
-}
-
-static INLINE GLboolean
-brw_is_grf_read(const struct brw_instruction *inst, int reg_index, int size)
-{
- int i, j;
- if (inst_opcode[inst->header.opcode].nsrc == 0)
- return GL_FALSE;
-
- /* Look at first source. We must take into account register regions to
- * monitor carefully the read. Note that we are a bit too conservative here
- * since we do not take into account the fact that some complete registers
- * may be skipped
- */
- if (inst_opcode[inst->header.opcode].nsrc >= 1) {
-
- if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT)
- if (inst->bits1.ia1.src0_reg_file == BRW_GENERAL_REGISTER_FILE)
- return GL_TRUE;
- if (inst->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE)
- return GL_FALSE;
-
- const int reg_start = reg_index*BRW_SIZE_OF_REG;
- const int reg_end = reg_start + size;
-
- /* See if at least one of this element intersects the interval */
- const int type_size = inst_type_size[inst->bits1.da1.src0_reg_type];
- const int elem_num = 1 << inst->header.execution_size;
- const int width = 1 << inst->bits2.da1.src0_width;
- const int row_num = elem_num >> inst->bits2.da1.src0_width;
- const int hs = type_size*inst_stride[inst->bits2.da1.src0_horiz_stride];
- const int vs = type_size*inst_stride[inst->bits2.da1.src0_vert_stride];
- int row_start = inst->bits2.da1.src0_reg_nr*BRW_SIZE_OF_REG
- + inst->bits2.da1.src0_subreg_nr;
- for (j = 0; j < row_num; ++j) {
- int write_start = row_start;
- for (i = 0; i < width; ++i) {
- const int write_end = write_start + type_size;
- const int left = write_start > reg_start ? write_start : reg_start;
- const int right = write_end < reg_end ? write_end : reg_end;
- if (left < right)
- return GL_TRUE;
- write_start += hs;
- }
- row_start += vs;
- }
- }
-
- /* Second src register */
- if (inst_opcode[inst->header.opcode].nsrc >= 2) {
-
- if (inst->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT)
- if (inst->bits1.ia1.src1_reg_file == BRW_GENERAL_REGISTER_FILE)
- return GL_TRUE;
- if (inst->bits1.da1.src1_reg_file != BRW_GENERAL_REGISTER_FILE)
- return GL_FALSE;
-
- const int reg_start = reg_index*BRW_SIZE_OF_REG;
- const int reg_end = reg_start + size;
-
- /* See if at least one of this element intersects the interval */
- const int type_size = inst_type_size[inst->bits1.da1.src1_reg_type];
- const int elem_num = 1 << inst->header.execution_size;
- const int width = 1 << inst->bits3.da1.src1_width;
- const int row_num = elem_num >> inst->bits3.da1.src1_width;
- const int hs = type_size*inst_stride[inst->bits3.da1.src1_horiz_stride];
- const int vs = type_size*inst_stride[inst->bits3.da1.src1_vert_stride];
- int row_start = inst->bits3.da1.src1_reg_nr*BRW_SIZE_OF_REG
- + inst->bits3.da1.src1_subreg_nr;
- for (j = 0; j < row_num; ++j) {
- int write_start = row_start;
- for (i = 0; i < width; ++i) {
- const int write_end = write_start + type_size;
- const int left = write_start > reg_start ? write_start : reg_start;
- const int right = write_end < reg_end ? write_end : reg_end;
- if (left < right)
- return GL_TRUE;
- write_start += hs;
- }
- row_start += vs;
- }
- }
-
- return GL_FALSE;
-}
-
-static INLINE GLboolean
-brw_is_control_done(const struct brw_instruction *mov) {
- return
- mov->header.dependency_control != 0 ||
- mov->header.thread_control != 0 ||
- mov->header.mask_control != 0 ||
- mov->header.saturate != 0 ||
- mov->header.debug_control != 0;
-}
-
-static INLINE GLboolean
-brw_is_predicated(const struct brw_instruction *mov) {
- return mov->header.predicate_control != 0;
-}
-
-static INLINE GLboolean
-brw_is_grf_to_mrf_mov(const struct brw_instruction *mov,
- int *mrf_index,
- int *grf_index,
- GLboolean *is_compr4)
-{
- if (brw_is_predicated(mov) ||
- brw_is_control_done(mov) ||
- mov->header.debug_control != 0)
- return GL_FALSE;
-
- if (mov->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT ||
- mov->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE ||
- mov->bits1.da1.dest_reg_type != 7 ||
- mov->bits1.da1.dest_horiz_stride != 1 ||
- mov->bits1.da1.dest_subreg_nr != 0)
- return GL_FALSE;
-
- if (mov->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT ||
- mov->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE ||
- mov->bits1.da1.src0_reg_type != 7 ||
- mov->bits2.da1.src0_width != 3 ||
- mov->bits2.da1.src0_horiz_stride != 1 ||
- mov->bits2.da1.src0_vert_stride != 4 ||
- mov->bits2.da1.src0_subreg_nr != 0 ||
- mov->bits2.da1.src0_abs != 0 ||
- mov->bits2.da1.src0_negate != 0)
- return GL_FALSE;
-
- *grf_index = mov->bits2.da1.src0_reg_nr;
- *mrf_index = mov->bits1.da1.dest_reg_nr & 0x0f;
- *is_compr4 = (mov->bits1.da1.dest_reg_nr & 0xf0) != 0;
- return GL_TRUE;
-}
-
-static INLINE GLboolean
-brw_is_grf_straight_write(const struct brw_instruction *inst, int grf_index)
-{
- /* remark: no problem to predicate a SEL instruction */
- if ((!brw_is_predicated(inst) || inst->header.opcode == BRW_OPCODE_SEL) &&
- brw_is_control_done(inst) == GL_FALSE &&
- inst->header.execution_size == 4 &&
- inst->header.access_mode == BRW_ALIGN_1 &&
- inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT &&
- inst->bits1.da1.dest_reg_file == BRW_GENERAL_REGISTER_FILE &&
- inst->bits1.da1.dest_reg_type == 7 &&
- inst->bits1.da1.dest_horiz_stride == 1 &&
- inst->bits1.da1.dest_reg_nr == grf_index &&
- inst->bits1.da1.dest_subreg_nr == 0 &&
- brw_is_arithmetic_inst(inst))
- return GL_TRUE;
-
- return GL_FALSE;
-}
-
-static INLINE GLboolean
-brw_inst_are_equal(const struct brw_instruction *src0,
- const struct brw_instruction *src1)
-{
- const GLuint *field0 = (GLuint *) src0;
- const GLuint *field1 = (GLuint *) src1;
- return field0[0] == field1[0] &&
- field0[1] == field1[1] &&
- field0[2] == field1[2] &&
- field0[3] == field1[3];
-}
-
-static INLINE void
-brw_inst_copy(struct brw_instruction *dst,
- const struct brw_instruction *src)
-{
- GLuint *field_dst = (GLuint *) dst;
- const GLuint *field_src = (GLuint *) src;
- field_dst[0] = field_src[0];
- field_dst[1] = field_src[1];
- field_dst[2] = field_src[2];
- field_dst[3] = field_src[3];
-}
-
-static void brw_remove_inst(struct brw_compile *p, const GLboolean *removeInst)
-{
- int i, nr_insn = 0, to = 0, from = 0;
-
- for (from = 0; from < p->nr_insn; ++from) {
- if (removeInst[from])
- continue;
- if(to != from)
- brw_inst_copy(p->store + to, p->store + from);
- to++;
- }
-
- for (i = 0; i < p->nr_insn; ++i)
- if (removeInst[i] == GL_FALSE)
- nr_insn++;
- p->nr_insn = nr_insn;
-}
-
-/* The gen code emitter generates a lot of duplications in the mrf-to-grf moves.
- * Here, we monitor same mov mrf-to-grf instrutions and remove them as soon as
- * none of the two operands have been written
- */
-static void brw_remove_duplicate_mrf_moves(struct brw_wm_compile *c)
-{
- struct brw_compile *p = &c->func;
- const int gen = p->brw->intel.gen;
- int i, j;
-
- GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn);
- for (i = 0; i < p->nr_insn; i++) {
- if (removeInst[i])
- continue;
-
- const struct brw_instruction *mov = p->store + i;
- int mrf_index, grf_index;
- GLboolean is_compr4;
-
- /* Only consider _straight_ grf-to-mrf moves */
- if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4))
- continue;
-
- const int mrf_index0 = mrf_index;
- const int mrf_index1 = is_compr4 ? mrf_index0+4 : mrf_index0+1;
- const int simd16_size = 2 * BRW_SIZE_OF_REG;
-
- for (j = i + 1; j < p->nr_insn; j++) {
- const struct brw_instruction *inst = p->store + j;
-
- if (brw_inst_are_equal(mov, inst)) {
- removeInst[j] = GL_TRUE;
- continue;
- }
-
- if (brw_is_grf_written(inst, grf_index, simd16_size, gen) ||
- brw_is_mrf_written(inst, mrf_index0, BRW_SIZE_OF_REG) ||
- brw_is_mrf_written(inst, mrf_index1, BRW_SIZE_OF_REG))
- break;
- }
- }
-
- brw_remove_inst(p, removeInst);
- free(removeInst);
-}
-
-static void brw_remove_mrf_to_grf_moves(struct brw_wm_compile *c)
-{
- int i, j, prev;
- struct brw_compile *p = &c->func;
- struct brw_context *brw = p->brw;
- const int gen = brw->intel.gen;
- const int simd16_size = 2*BRW_SIZE_OF_REG;
-
- if (c->dispatch_width != 16 || brw->has_compr4 == GL_FALSE)
- return;
-
- GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn);
- assert(removeInst);
-
- for (i = 0; i < p->nr_insn; i++) {
- if (removeInst[i])
- continue;
-
- struct brw_instruction *grf_inst = NULL;
- const struct brw_instruction *mov = p->store + i;
- int mrf_index, grf_index;
- GLboolean is_compr4;
-
- /* Only consider _straight_ grf-to-mrf moves */
- if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4))
- continue;
-
- /* Using comp4 enables a stride of 4 for this instruction */
- const int mrf_index0 = mrf_index;
- const int mrf_index1 = is_compr4 ? mrf_index+4 : mrf_index+1;
-
- /* Look where the register has been set */
- prev = i;
- GLboolean potential_remove = GL_FALSE;
- while (prev--) {
-
- /* If _one_ instruction writes the grf, we try to remove the mov */
- struct brw_instruction *inst = p->store + prev;
- if (brw_is_grf_straight_write(inst, grf_index)) {
- potential_remove = GL_TRUE;
- grf_inst = inst;
- break;
- }
-
- }
-
- if (potential_remove == GL_FALSE)
- continue;
- removeInst[i] = GL_TRUE;
-
- /* Monitor first the section of code between the grf computation and the
- * mov. Here we cannot read or write both mrf and grf register
- */
- for (j = prev + 1; j < i; ++j) {
- struct brw_instruction *inst = p->store + j;
- if (removeInst[j])
- continue;
- if (brw_is_grf_written(inst, grf_index, simd16_size, gen) ||
- brw_is_grf_read(inst, grf_index, simd16_size) ||
- brw_is_mrf_written(inst, mrf_index0, BRW_SIZE_OF_REG) ||
- brw_is_mrf_written(inst, mrf_index1, BRW_SIZE_OF_REG) ||
- brw_is_mrf_read(inst, mrf_index0, BRW_SIZE_OF_REG, gen) ||
- brw_is_mrf_read(inst, mrf_index1, BRW_SIZE_OF_REG, gen)) {
- removeInst[i] = GL_FALSE;
- break;
- }
- }
-
- /* After the mov, we can read or write the mrf. If the grf is overwritten,
- * we are done
- */
- for (j = i + 1; j < p->nr_insn; ++j) {
- struct brw_instruction *inst = p->store + j;
- if (removeInst[j])
- continue;
-
- if (brw_is_grf_read(inst, grf_index, simd16_size)) {
- removeInst[i] = GL_FALSE;
- break;
- }
-
- if (brw_is_grf_straight_write(inst, grf_index))
- break;
- }
-
- /* Note that with the top down traversal, we can safely pacth the mov
- * instruction
- */
- if (removeInst[i]) {
- grf_inst->bits1.da1.dest_reg_file = mov->bits1.da1.dest_reg_file;
- grf_inst->bits1.da1.dest_reg_nr = mov->bits1.da1.dest_reg_nr;
- }
- }
-
- brw_remove_inst(p, removeInst);
- free(removeInst);
-}
/* Emit the fragment program instructions here.
*/
void brw_wm_emit( struct brw_wm_compile *c )
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
GLuint insn;
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ if (intel->gen >= 6)
+ brw_set_acc_write_control(p, 1);
/* Check if any of the payload regs need to be spilled:
*/
break;
case OPCODE_DDX:
- emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
+ emit_ddxy(p, dst, dst_flags, true, args[0], false);
break;
case OPCODE_DDY:
- emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
+ /* Make sure fp->program.UsesDFdy flag got set (otherwise there's no
+ * guarantee that c->key.render_to_fbo is set).
+ */
+ assert(c->fp->program.UsesDFdy);
+ emit_ddxy(p, dst, dst_flags, false, args[0], c->key.render_to_fbo);
+ break;
+
+ case OPCODE_DP2:
+ emit_dp2(p, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_DP3:
break;
case OPCODE_TRUNC:
- emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
+ for (i = 0; i < 4; i++) {
+ if (dst_flags & (1<<i)) {
+ brw_RNDZ(p, dst[i], args[0][i]);
+ }
+ }
break;
case OPCODE_LRP:
emit_sne(p, dst, dst_flags, args[0], args[1]);
break;
+ case OPCODE_SSG:
+ emit_sign(p, dst, dst_flags, args[0]);
+ break;
+
case OPCODE_LIT:
emit_lit(c, dst, dst_flags, args[0]);
break;
emit_kil(c, args[0]);
break;
- case OPCODE_KIL_NV:
- emit_kil_nv(c);
- break;
-
default:
printf("Unsupported opcode %i (%s) in fragment shader\n",
inst->opcode, inst->opcode < MAX_OPCODE ?
/* Only properly tested on ILK */
if (p->brw->intel.gen == 5) {
- brw_remove_duplicate_mrf_moves(c);
- brw_remove_mrf_to_grf_moves(c);
+ brw_remove_duplicate_mrf_moves(p);
+ if (c->dispatch_width == 16)
+ brw_remove_grf_to_mrf_moves(p);
}
- if (INTEL_DEBUG & DEBUG_WM) {
+ if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
int i;
printf("wm-native:\n");
for (i = 0; i < p->nr_insn; i++)
- brw_disasm(stderr, &p->store[i], p->brw->intel.gen);
+ brw_disasm(stdout, &p->store[i], p->brw->intel.gen);
printf("\n");
}
}