for (i = 0; i < fp->Base.NumInstructions; i++) {
const struct prog_instruction *inst = &fp->Base.Instructions[i];
switch (inst->Opcode) {
+ case OPCODE_ARL:
case OPCODE_IF:
- case OPCODE_TRUNC:
case OPCODE_ENDIF:
case OPCODE_CAL:
case OPCODE_BRK:
for (r = 0; r < BRW_WM_MAX_GRF; r++) {
assert(c->used_grf[r]);
}
- /*printf("Really out of temp regs!\n");*/
- return 60;
+
+ /* really, no free GRF regs found */
+ if (!c->out_of_regs) {
+ /* print warning once per compilation */
+ _mesa_warning(NULL, "i965: ran out of registers for fragment program");
+ c->out_of_regs = GL_TRUE;
+ }
+
+ return -1;
}
/* if we need to allocate another temp, grow the tmp_regs[] array */
if (c->tmp_index == c->tmp_max) {
- c->tmp_regs[ c->tmp_max++ ] = alloc_grf(c);
+ int r = alloc_grf(c);
+ if (r < 0) {
+ /*printf("Out of temps in %s\n", __FUNCTION__);*/
+ r = 50; /* XXX random register! */
+ }
+ c->tmp_regs[ c->tmp_max++ ] = r;
}
/* form the GRF register */
}
assert(index < 256);
+ assert(component < 4);
+
/* see if we've already allocated a HW register for this Mesa register */
if (c->wm_regs[file][index][component].inited) {
/* yes, re-use */
else {
/* no, allocate new register */
int grf = alloc_grf(c);
+ /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
if (grf < 0) {
/* totally out of temps */
- grf = 70; /* XXX !!!! */
+ grf = 51; /* XXX random register! */
}
reg = brw_vec8_grf(grf, 0);
{
int i, j;
struct brw_reg reg;
- int nr_interp_regs = 0;
+ int urb_read_length = 0;
GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;
GLuint reg_index = 0;
}
/* fragment shader inputs */
- for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
- if (inputs & (1<<i)) {
- nr_interp_regs++;
- reg = brw_vec8_grf(reg_index, 0);
- for (j = 0; j < 4; j++)
- set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
- reg_index += 2;
- }
+ for (i = 0; i < VERT_RESULT_MAX; i++) {
+ int fp_input;
+
+ if (i >= VERT_RESULT_VAR0)
+ fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0;
+ else if (i <= VERT_RESULT_TEX7)
+ fp_input = i;
+ else
+ fp_input = -1;
+
+ if (fp_input >= 0 && inputs & (1 << fp_input)) {
+ urb_read_length = reg_index;
+ reg = brw_vec8_grf(reg_index, 0);
+ for (j = 0; j < 4; j++)
+ set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
+ }
+ if (c->key.vp_outputs_written & (1 << i)) {
+ reg_index += 2;
+ }
}
c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
- c->prog_data.urb_read_length = nr_interp_regs * 2;
+ c->prog_data.urb_read_length = urb_read_length;
c->prog_data.curb_read_length = c->nr_creg;
c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
reg_index++;
for (i = 0; i < reg_index; i++)
prealloc_grf(c, i);
+ /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
+ prealloc_grf(c, 126);
+ prealloc_grf(c, 127);
+
/* An instruction may reference up to three constants.
* They'll be found in these registers.
* XXX alloc these on demand!
}
#if 0
printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer);
- printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index);
+ printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index);
#endif
}
/* need to fetch the constant now */
brw_dp_READ_4(p,
c->current_const[i].reg, /* writeback dest */
- 1, /* msg_reg */
src->RelAddr, /* relative indexing? */
16 * src->Index, /* byte offset */
SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */
const GLuint nr = 1;
const GLuint component = GET_SWZ(src->Swizzle, channel);
+ /* Extended swizzle terms */
+ if (component == SWIZZLE_ZERO) {
+ return brw_imm_f(0.0F);
+ }
+ else if (component == SWIZZLE_ONE) {
+ return brw_imm_f(1.0F);
+ }
+
if (c->fp->use_const_buffer &&
(src->File == PROGRAM_STATE_VAR ||
src->File == PROGRAM_CONSTANT ||
}
if (c->key.dest_depth_reg) {
- GLuint comp = c->key.dest_depth_reg / 2;
- GLuint off = c->key.dest_depth_reg % 2;
+ const GLuint comp = c->key.dest_depth_reg / 2;
+ const GLuint off = c->key.dest_depth_reg % 2;
- assert(comp == 1);
- assert(off == 0);
-#if 0
- /* XXX do we need this code? comp always 1, off always 0, it seems */
if (off != 0) {
+ /* XXX this code needs review/testing */
+ struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp);
+ struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1);
+
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
+ brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1));
/* 2nd half? */
- brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
+ brw_MOV(p, brw_message_reg(nr+1), arg1_1);
brw_pop_insn_state(p);
}
else
-#endif
{
- struct brw_reg src = get_src_reg(c, inst, 1, 1);
- brw_MOV(p, brw_message_reg(nr), src);
+ struct brw_reg src = get_src_reg(c, inst, 1, 1);
+ brw_MOV(p, brw_message_reg(nr), src);
}
nr += 2;
}
struct brw_reg dst[4], src[4], payload_reg;
GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
GLuint i;
+ GLuint msg_type;
payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
}
brw_MOV(p, brw_message_reg(5), src[3]); /* bias */
brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
+
+ if (BRW_IS_IGDNG(p->brw)) {
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG;
+ } else {
+ /* Does it work well on SIMD8? */
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+ }
+
brw_SAMPLE(p,
retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
1, /* msg_reg_nr */
SURF_INDEX_TEXTURE(unit),
unit, /* sampler */
inst->DstReg.WriteMask, /* writemask */
- BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, /* msg_type */
+ msg_type, /* msg_type */
4, /* response_length */
4, /* msg_length */
- 0); /* eot */
+ 0, /* eot */
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD8);
}
GLuint i, nr;
GLuint emit;
GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0;
+ GLuint msg_type;
payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */
}
+ if (BRW_IS_IGDNG(p->brw)) {
+ if (shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG;
+ } else {
+ /* Does it work for shadow on SIMD8 ? */
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
+ }
+
brw_SAMPLE(p,
retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
1, /* msg_reg_nr */
SURF_INDEX_TEXTURE(unit),
unit, /* sampler */
inst->DstReg.WriteMask, /* writemask */
- BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, /* msg_type */
+ msg_type, /* msg_type */
4, /* response_length */
shadow ? 6 : 4, /* msg_length */
- 0); /* eot */
+ 0, /* eot */
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD8);
if (shadow)
brw_MOV(p, dst[3], brw_imm_f(1.0));
static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
{
-#define MAX_IFSN 32
+#define MAX_IF_DEPTH 32
#define MAX_LOOP_DEPTH 32
- struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH];
- struct brw_instruction *inst0, *inst1;
- int i, if_insn = 0, loop_insn = 0;
+ struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
+ GLuint i, if_depth = 0, loop_depth = 0;
struct brw_compile *p = &c->func;
struct brw_indirect stack_index = brw_indirect(0, 0);
+ c->out_of_regs = GL_FALSE;
+
prealloc_reg(c);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
emit_trunc(c, inst);
break;
case OPCODE_MOV:
+ case OPCODE_SWZ:
emit_mov(c, inst);
break;
case OPCODE_DP3:
emit_kil(c);
break;
case OPCODE_IF:
- assert(if_insn < MAX_IFSN);
- if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
+ assert(if_depth < MAX_IF_DEPTH);
+ if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
break;
case OPCODE_ELSE:
- if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]);
+ if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
break;
case OPCODE_ENDIF:
- assert(if_insn > 0);
- brw_ENDIF(p, if_inst[--if_insn]);
+ assert(if_depth > 0);
+ brw_ENDIF(p, if_inst[--if_depth]);
break;
case OPCODE_BGNSUB:
brw_save_label(p, inst->Comment, p->nr_insn);
break;
case OPCODE_BGNLOOP:
/* XXX may need to invalidate the current_constant regs */
- loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8);
+ loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
break;
case OPCODE_BRK:
brw_BREAK(p);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case OPCODE_ENDLOOP:
- loop_insn--;
- inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]);
- /* patch all the BREAK instructions from
- last BEGINLOOP */
- while (inst0 > loop_inst[loop_insn]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK) {
- inst0->bits3.if_else.jump_count = inst1 - inst0 + 1;
+ {
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(brw))
+ br = 2;
+
+ loop_depth--;
+ inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+ /* patch all the BREAK/CONT instructions from last BGNLOOP */
+ while (inst0 > loop_inst[loop_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
inst0->bits3.if_else.pop_count = 0;
- } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
- inst0->bits3.if_else.jump_count = inst1 - inst0;
+ }
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
inst0->bits3.if_else.pop_count = 0;
- }
- }
- break;
+ }
+ }
+ }
+ break;
default:
_mesa_printf("unsupported IR in fragment shader %d\n",
inst->Opcode);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
post_wm_emit(c);
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ _mesa_printf("wm-native:\n");
+ for (i = 0; i < p->nr_insn; i++)
+ brw_disasm(stderr, &p->store[i]);
+ _mesa_printf("\n");
+ }
}