#include "main/macros.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-#include "shader/prog_optimize.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "program/prog_optimize.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_wm.h"
{
int i;
+ if (INTEL_DEBUG & DEBUG_GLSL_FORCE)
+ return GL_TRUE;
+
for (i = 0; i < fp->Base.NumInstructions; i++) {
const struct prog_instruction *inst = &fp->Base.Instructions[i];
switch (inst->Opcode) {
*/
static void prealloc_reg(struct brw_wm_compile *c)
{
+ struct intel_context *intel = &c->func.brw->intel;
int i, j;
struct brw_reg reg;
int urb_read_length = 0;
for (j = 0; j < 4; j++)
set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
}
- if (c->key.vp_outputs_written & (1 << i)) {
+ if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
reg_index += 2;
}
}
}
}
+ for (i = 0; i < c->nr_fp_insns; i++) {
+ const struct prog_instruction *inst = &c->prog_instructions[i];
+
+ switch (inst->Opcode) {
+ case WM_DELTAXY:
+ /* Allocate WM_DELTAXY destination on G45/GM45 to an
+ * even-numbered GRF if possible so that we can use the PLN
+ * instruction.
+ */
+ if (inst->DstReg.WriteMask == WRITEMASK_XY &&
+ !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited &&
+ !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited &&
+ (IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) {
+ int grf;
+
+ for (grf = c->first_free_grf & ~1;
+ grf < BRW_WM_MAX_GRF;
+ grf += 2)
+ {
+ if (!c->used_grf[grf] && !c->used_grf[grf + 1]) {
+ c->used_grf[grf] = GL_TRUE;
+ c->used_grf[grf + 1] = GL_TRUE;
+ c->first_free_grf = grf + 2; /* a guess */
+
+ set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0,
+ brw_vec8_grf(grf, 0));
+ set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1,
+ brw_vec8_grf(grf + 1, 0));
+ break;
+ }
+ }
+ }
+ default:
+ break;
+ }
+ }
+
/* An instruction may reference up to three constants.
* They'll be found in these registers.
* XXX alloc these on demand!
const GLuint nr = 1;
const GLuint component = GET_SWZ(src->Swizzle, channel);
- /* Extended swizzle terms */
- if (component == SWIZZLE_ZERO) {
- return brw_imm_f(0.0F);
- }
- else if (component == SWIZZLE_ONE) {
- return brw_imm_f(1.0F);
+ /* Only one immediate value can be used per native opcode, and it
+ * has be in the src1 slot, so not all Mesa instructions will get
+ * to take advantage of immediate constants.
+ */
+ if (brw_wm_arg_can_be_immediate(inst->Opcode, srcRegIndex)) {
+ const struct gl_program_parameter_list *params;
+
+ params = c->fp->program.Base.Parameters;
+
+ /* Extended swizzle terms */
+ if (component == SWIZZLE_ZERO) {
+ return brw_imm_f(0.0F);
+ } else if (component == SWIZZLE_ONE) {
+ if (src->Negate)
+ return brw_imm_f(-1.0F);
+ else
+ return brw_imm_f(1.0F);
+ }
+
+ if (src->File == PROGRAM_CONSTANT) {
+ float f = params->ParameterValues[src->Index][component];
+
+ if (src->Abs)
+ f = fabs(f);
+ if (src->Negate)
+ f = -f;
+
+ return brw_imm_f(f);
+ }
}
if (c->fp->use_const_buffer &&
}
}
-/* Workaround for using brw_wm_emit.c's emit functions, which expect
- * destination regs to be uniquely written. Moves arguments out to
- * temporaries as necessary for instructions which use their destination as
- * a temporary.
- */
-static void
-unalias3(struct brw_wm_compile *c,
- void (*func)(struct brw_compile *c,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1,
- const struct brw_reg *arg2),
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1,
- const struct brw_reg *arg2)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg tmp_arg0[4], tmp_arg1[4], tmp_arg2[4];
- int i, j;
- int mark = mark_tmps(c);
-
- for (j = 0; j < 4; j++) {
- tmp_arg0[j] = arg0[j];
- tmp_arg1[j] = arg1[j];
- tmp_arg2[j] = arg2[j];
- }
-
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- for (j = 0; j < 4; j++) {
- if (arg0[j].file == dst[i].file &&
- dst[i].nr == arg0[j].nr) {
- tmp_arg0[j] = alloc_tmp(c);
- brw_MOV(p, tmp_arg0[j], arg0[j]);
- }
- if (arg1[j].file == dst[i].file &&
- dst[i].nr == arg1[j].nr) {
- tmp_arg1[j] = alloc_tmp(c);
- brw_MOV(p, tmp_arg1[j], arg1[j]);
- }
- if (arg2[j].file == dst[i].file &&
- dst[i].nr == arg2[j].nr) {
- tmp_arg2[j] = alloc_tmp(c);
- brw_MOV(p, tmp_arg2[j], arg2[j]);
- }
- }
- }
- }
-
- func(p, dst, mask, tmp_arg0, tmp_arg1, tmp_arg2);
-
- release_tmps(c, mark);
-}
-
-/* Workaround for using brw_wm_emit.c's emit functions, which expect
- * destination regs to be uniquely written. Moves arguments out to
- * temporaries as necessary for instructions which use their destination as
- * a temporary.
- */
-static void
-unalias2(struct brw_wm_compile *c,
- void (*func)(struct brw_compile *c,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1),
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg tmp_arg0[4], tmp_arg1[4];
- int i, j;
- int mark = mark_tmps(c);
-
- for (j = 0; j < 4; j++) {
- tmp_arg0[j] = arg0[j];
- tmp_arg1[j] = arg1[j];
- }
-
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- for (j = 0; j < 4; j++) {
- if (arg0[j].file == dst[i].file &&
- dst[i].nr == arg0[j].nr) {
- tmp_arg0[j] = alloc_tmp(c);
- brw_MOV(p, tmp_arg0[j], arg0[j]);
- }
- if (arg1[j].file == dst[i].file &&
- dst[i].nr == arg1[j].nr) {
- tmp_arg1[j] = alloc_tmp(c);
- brw_MOV(p, tmp_arg1[j], arg1[j]);
- }
- }
- }
- }
-
- func(p, dst, mask, tmp_arg0, tmp_arg1);
-
- release_tmps(c, mark);
-}
-
static void emit_arl(struct brw_wm_compile *c,
const struct prog_instruction *inst)
{
struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
brw_AND(p, depth, c->emit_mask_reg, depth);
brw_pop_insn_state(p);
}
release_tmps( c, mark );
}
-
-/* TODO
- BIAS on SIMD8 not working yet...
- */
-static void emit_txb(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg dst[4], src[4], payload_reg;
- /* Note: TexSrcUnit was already looked up through SamplerTextures[] */
- const GLuint unit = inst->TexSrcUnit;
- GLuint i;
- GLuint msg_type;
-
- assert(unit < BRW_MAX_TEX_UNIT);
-
- payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
-
- for (i = 0; i < 4; i++)
- dst[i] = get_dst_reg(c, inst, i);
- for (i = 0; i < 4; i++)
- src[i] = get_src_reg(c, inst, 0, i);
-
- switch (inst->TexSrcTarget) {
- case TEXTURE_1D_INDEX:
- brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */
- brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */
- brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */
- break;
- case TEXTURE_2D_INDEX:
- case TEXTURE_RECT_INDEX:
- brw_MOV(p, brw_message_reg(2), src[0]);
- brw_MOV(p, brw_message_reg(3), src[1]);
- brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
- break;
- case TEXTURE_3D_INDEX:
- case TEXTURE_CUBE_INDEX:
- brw_MOV(p, brw_message_reg(2), src[0]);
- brw_MOV(p, brw_message_reg(3), src[1]);
- brw_MOV(p, brw_message_reg(4), src[2]);
- break;
- default:
- /* invalid target */
- abort();
- }
- brw_MOV(p, brw_message_reg(5), src[3]); /* bias */
- brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
-
- if (BRW_IS_IGDNG(p->brw)) {
- msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
- } else {
- /* Does it work well on SIMD8? */
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
- }
-
- brw_SAMPLE(p,
- retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
- 1, /* msg_reg_nr */
- retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
- SURF_INDEX_TEXTURE(unit),
- unit, /* sampler */
- inst->DstReg.WriteMask, /* writemask */
- msg_type, /* msg_type */
- 4, /* response_length */
- 4, /* msg_length */
- 0, /* eot */
- 1,
- BRW_SAMPLER_SIMD_MODE_SIMD8);
-}
-
-
-static void emit_tex(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg dst[4], src[4], payload_reg;
- /* Note: TexSrcUnit was already looked up through SamplerTextures[] */
- const GLuint unit = inst->TexSrcUnit;
- GLuint msg_len;
- GLuint i, nr;
- GLuint emit;
- GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0;
- GLuint msg_type;
-
- assert(unit < BRW_MAX_TEX_UNIT);
-
- payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
-
- for (i = 0; i < 4; i++)
- dst[i] = get_dst_reg(c, inst, i);
- for (i = 0; i < 4; i++)
- src[i] = get_src_reg(c, inst, 0, i);
-
- switch (inst->TexSrcTarget) {
- case TEXTURE_1D_INDEX:
- emit = WRITEMASK_X;
- nr = 1;
- break;
- case TEXTURE_2D_INDEX:
- case TEXTURE_RECT_INDEX:
- emit = WRITEMASK_XY;
- nr = 2;
- break;
- case TEXTURE_3D_INDEX:
- case TEXTURE_CUBE_INDEX:
- emit = WRITEMASK_XYZ;
- nr = 3;
- break;
- default:
- /* invalid target */
- abort();
- }
- msg_len = 1;
-
- /* move/load S, T, R coords */
- for (i = 0; i < nr; i++) {
- static const GLuint swz[4] = {0,1,2,2};
- if (emit & (1<<i))
- brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
- else
- brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
- msg_len += 1;
- }
-
- if (shadow) {
- brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
- brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */
- }
-
- if (BRW_IS_IGDNG(p->brw)) {
- if (shadow)
- msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG;
- else
- msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG;
- } else {
- /* Does it work for shadow on SIMD8 ? */
- msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
- }
-
- brw_SAMPLE(p,
- retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
- 1, /* msg_reg_nr */
- retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
- SURF_INDEX_TEXTURE(unit),
- unit, /* sampler */
- inst->DstReg.WriteMask, /* writemask */
- msg_type, /* msg_type */
- 4, /* response_length */
- shadow ? 6 : 4, /* msg_length */
- 0, /* eot */
- 1,
- BRW_SAMPLER_SIMD_MODE_SIMD8);
-
- if (shadow)
- brw_MOV(p, dst[3], brw_imm_f(1.0));
-}
-
-
/**
* Resolve subroutine calls after code emit is done.
*/
get_argument_regs(struct brw_wm_compile *c,
const struct prog_instruction *inst,
int index,
+ struct brw_reg *dst,
struct brw_reg *regs,
int mask)
{
- int i;
+ struct brw_compile *p = &c->func;
+ int i, j;
for (i = 0; i < 4; i++) {
- if (mask & (1 << i))
+ if (mask & (1 << i)) {
regs[i] = get_src_reg(c, inst, index, i);
+
+ /* Unalias destination registers from our sources. */
+ if (regs[i].file == BRW_GENERAL_REGISTER_FILE) {
+ for (j = 0; j < 4; j++) {
+ if (memcmp(®s[i], &dst[j], sizeof(regs[0])) == 0) {
+ struct brw_reg tmp = alloc_tmp(c);
+ brw_MOV(p, tmp, regs[i]);
+ regs[i] = tmp;
+ break;
+ }
+ }
+ }
+ }
}
}
static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
{
+ struct intel_context *intel = &brw->intel;
#define MAX_IF_DEPTH 32
#define MAX_LOOP_DEPTH 32
struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
int dst_flags;
struct brw_reg args[3][4], dst[4];
int j;
+ int mark = mark_tmps( c );
c->cur_inst = i;
#if 0
- _mesa_printf("Inst %d: ", i);
+ printf("Inst %d: ", i);
_mesa_print_instruction(inst);
#endif
}
}
for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++)
- get_argument_regs(c, inst, j, args[j], WRITEMASK_XYZW);
+ get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW);
dst_flags = inst->DstReg.WriteMask;
if (inst->SaturateMode == SATURATE_ZERO_ONE)
else
brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
- dst_flags = inst->DstReg.WriteMask;
- if (inst->SaturateMode == SATURATE_ZERO_ONE)
- dst_flags |= SATURATE;
-
switch (inst->Opcode) {
case WM_PIXELXY:
emit_pixel_xy(c, dst, dst_flags);
emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
break;
case OPCODE_LRP:
- unalias3(c, emit_lrp,
- dst, dst_flags, args[0], args[1], args[2]);
+ emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
break;
case OPCODE_TRUNC:
emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
case OPCODE_LG2:
emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
break;
+ case OPCODE_CMP:
+ emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
case OPCODE_MIN:
- unalias2(c, emit_min, dst, dst_flags, args[0], args[1]);
+ emit_min(p, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_MAX:
- unalias2(c, emit_max, dst, dst_flags, args[0], args[1]);
+ emit_max(p, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_DDX:
case OPCODE_DDY:
emit_noise4(c, inst);
break;
case OPCODE_TEX:
- emit_tex(c, inst);
+ emit_tex(c, dst, dst_flags, args[0],
+ get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH,
+ 0, 1, 0, 0),
+ inst->TexSrcTarget,
+ inst->TexSrcUnit,
+ (c->key.shadowtex_mask & (1 << inst->TexSrcUnit)) != 0);
break;
case OPCODE_TXB:
- emit_txb(c, inst);
+ emit_txb(c, dst, dst_flags, args[0],
+ get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH,
+ 0, 1, 0, 0),
+ inst->TexSrcTarget,
+ c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]);
break;
case OPCODE_KIL_NV:
emit_kil(c);
if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
break;
case OPCODE_ELSE:
+ assert(if_depth > 0);
if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
break;
case OPCODE_ENDIF:
struct brw_instruction *inst0, *inst1;
GLuint br = 1;
- if (BRW_IS_IGDNG(brw))
+ if (intel->gen == 5)
br = 2;
-
+
+ assert(loop_depth > 0);
loop_depth--;
inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
/* patch all the BREAK/CONT instructions from last BGNLOOP */
while (inst0 > loop_inst[loop_depth]) {
inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+ if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+ inst0->bits3.if_else.jump_count == 0) {
inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
inst0->bits3.if_else.pop_count = 0;
}
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst0->bits3.if_else.jump_count == 0) {
inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
inst0->bits3.if_else.pop_count = 0;
}
}
break;
default:
- _mesa_printf("unsupported IR in fragment shader %d\n",
- inst->Opcode);
+ printf("unsupported opcode %d (%s) in fragment shader\n",
+ inst->Opcode, inst->Opcode < MAX_OPCODE ?
+ _mesa_opcode_string(inst->Opcode) : "unknown");
}
+ /* Release temporaries containing any unaliased source regs. */
+ release_tmps( c, mark );
+
if (inst->CondUpdate)
brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
else
post_wm_emit(c);
if (INTEL_DEBUG & DEBUG_WM) {
- _mesa_printf("wm-native:\n");
+ printf("wm-native:\n");
for (i = 0; i < p->nr_insn; i++)
- brw_disasm(stderr, &p->store[i]);
- _mesa_printf("\n");
+ brw_disasm(stderr, &p->store[i], intel->gen);
+ printf("\n");
}
}
void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
{
if (INTEL_DEBUG & DEBUG_WM) {
- _mesa_printf("brw_wm_glsl_emit:\n");
+ printf("brw_wm_glsl_emit:\n");
}
/* initial instruction translation/simplification */