/* See emit_vertex_write() for where the VUE's overhead on top of the
* attributes comes from.
*/
- if (intel->gen >= 6) {
+ if (intel->gen >= 7) {
+ int header_regs = 2;
+ if (c->key.nr_userclip)
+ header_regs += 2;
+
+ /* Each attribute is 16 bytes (1 vec4), so dividing by 4 gives us the
+ * number of 64-byte (512-bit) units.
+ */
+ c->prog_data.urb_entry_size = (attributes_in_vue + header_regs + 3) / 4;
+ } else if (intel->gen == 6) {
int header_regs = 2;
if (c->key.nr_userclip)
header_regs += 2;
struct brw_reg arg0 )
{
struct brw_compile *p = &c->func;
- struct brw_instruction *if_insn;
struct brw_reg tmp = dst;
GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
* BRW_EXECUTE_1 for all comparisions.
*/
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0));
- if_insn = brw_IF(p, BRW_EXECUTE_8);
+ brw_IF(p, BRW_EXECUTE_8);
{
brw_MOV(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0,0));
brw_swizzle1(arg0, 3),
BRW_MATH_PRECISION_PARTIAL);
}
-
- brw_ENDIF(p, if_insn);
+ brw_ENDIF(p);
release_tmp(c, tmp);
}
if (component >= 0) {
params = c->vp->program.Base.Parameters;
- f = params->ParameterValues[src->Index][component];
+ f = params->ParameterValues[src->Index][component].f;
if (src->Abs)
f = fabs(f);
}
}
+static int
+align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
+{
+ struct intel_context *intel = &brw->intel;
+
+ if (intel->gen >= 6) {
+ /* URB data written (does not include the message header reg) must
+ * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
+ * section 5.4.3.2.2: URB_INTERLEAVED.
+ *
+ * URB entries are allocated on a multiple of 1024 bits, so an
+ * extra 128 bits written here to make the end align to 256 is
+ * no problem.
+ */
+ if ((mlen % 2) != 1)
+ mlen++;
+ }
+
+ return mlen;
+}
/**
* Post-vertex-program processing. Send the results to the URB.
else
m = brw_message_reg(4);
- brw_DP4(p, brw_writemask(m, (1 << (i & 7))),pos, c->userplane[i]);
+ brw_DP4(p, brw_writemask(m, (1 << (i & 3))),pos, c->userplane[i]);
}
}
} else if ((c->prog_data.outputs_written &
eot = (c->first_overflow_output == 0);
- msg_len = c->nr_outputs + 2 + len_vertex_header;
- if (intel->gen >= 6) {
- /* interleaved urb write message length for gen6 should be multiple of 2 */
- if ((msg_len % 2) != 0)
- msg_len++;
- }
+ /* Message header, plus VUE header, plus the (first set of) outputs. */
+ msg_len = 1 + len_vertex_header + c->nr_outputs;
+ msg_len = align_interleaved_urb_mlen(brw, msg_len);
+ /* Any outputs beyond BRW_MAX_MRF should be past first_overflow_output */
+ msg_len = MIN2(msg_len, (BRW_MAX_MRF - 1)),
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
c->r0, /* src */
0, /* allocate */
1, /* used */
- MIN2(msg_len - 1, (BRW_MAX_MRF - 1)), /* msg len */
+ msg_len,
0, /* response len */
eot, /* eot */
eot, /* writes complete */
c->r0, /* src */
0, /* allocate */
1, /* used */
- mrf, /* msg len */
+ align_interleaved_urb_mlen(brw, mrf),
0, /* response len */
1, /* eot */
1, /* writes complete */
if (val.address_mode != BRW_ADDRESS_DIRECT)
return GL_FALSE;
+ if (val.negate || val.abs)
+ return GL_FALSE;
+
switch (prev_insn->header.opcode) {
case BRW_OPCODE_MOV:
case BRW_OPCODE_MAC:
}
}
+static void
+brw_vs_rescale_gl_fixed(struct brw_vs_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ int i;
+
+ for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+ if (!(c->prog_data.inputs_read & (1 << i)))
+ continue;
+
+ if (c->key.gl_fixed_input_size[i] != 0) {
+ struct brw_reg reg = c->regs[PROGRAM_INPUT][i];
+
+ brw_MUL(p,
+ brw_writemask(reg, (1 << c->key.gl_fixed_input_size[i]) - 1),
+ reg, brw_imm_f(1.0 / 65536.0));
+ }
+ }
+}
+
/* Emit the vertex program instructions here.
*/
-void brw_vs_emit(struct brw_vs_compile *c )
+void brw_old_vs_emit(struct brw_vs_compile *c )
{
#define MAX_IF_DEPTH 32
#define MAX_LOOP_DEPTH 32
struct brw_context *brw = p->brw;
struct intel_context *intel = &brw->intel;
const GLuint nr_insns = c->vp->program.Base.NumInstructions;
- GLuint insn, if_depth = 0, loop_depth = 0;
- struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH] = { 0 };
+ GLuint insn, loop_depth = 0;
+ struct brw_instruction *loop_inst[MAX_LOOP_DEPTH] = { 0 };
int if_depth_in_loop[MAX_LOOP_DEPTH];
const struct brw_indirect stack_index = brw_indirect(0, 0);
GLuint index;
*/
brw_vs_alloc_regs(c);
+ brw_vs_rescale_gl_fixed(c);
+
if (c->needs_stack)
brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
const struct prog_src_register *src = &inst->SrcReg[i];
index = src->Index;
file = src->File;
- if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
- args[i] = c->output_regs[index].reg;
- else
+ if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) {
+ /* Can't just make get_arg "do the right thing" here because
+ * other callers of get_arg and get_src_reg don't expect any
+ * special behavior for the c->output_regs[index].used_in_src
+ * case.
+ */
+ args[i] = c->output_regs[index].reg;
+ args[i].dw1.bits.swizzle =
+ BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0),
+ GET_SWZ(src->Swizzle, 1),
+ GET_SWZ(src->Swizzle, 2),
+ GET_SWZ(src->Swizzle, 3));
+
+ /* Note this is ok for non-swizzle ARB_vp instructions */
+ args[i].negate = src->Negate ? 1 : 0;
+ } else
args[i] = get_arg(c, inst, i);
}
index = inst->DstReg.Index;
file = inst->DstReg.File;
if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
- dst = c->output_regs[index].reg;
+ /* Can't just make get_dst "do the right thing" here because other
+ * callers of get_dst don't expect any special behavior for the
+ * c->output_regs[index].used_in_src case.
+ */
+ dst = brw_writemask(c->output_regs[index].reg, inst->DstReg.WriteMask);
else
dst = get_dst(c, inst->DstReg);
case OPCODE_XPD:
emit_xpd(p, dst, args[0], args[1]);
break;
- case OPCODE_IF:
- assert(if_depth < MAX_IF_DEPTH);
- if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
+ case OPCODE_IF: {
+ struct brw_instruction *if_inst = brw_IF(p, BRW_EXECUTE_8);
/* Note that brw_IF smashes the predicate_control field. */
- if_inst[if_depth]->header.predicate_control = get_predicate(inst);
+ if_inst->header.predicate_control = get_predicate(inst);
if_depth_in_loop[loop_depth]++;
- if_depth++;
break;
+ }
case OPCODE_ELSE:
clear_current_const(c);
- assert(if_depth > 0);
- if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
+ brw_ELSE(p);
break;
case OPCODE_ENDIF:
clear_current_const(c);
- assert(if_depth > 0);
- brw_ENDIF(p, if_inst[--if_depth]);
+ brw_ENDIF(p);
if_depth_in_loop[loop_depth]--;
break;
case OPCODE_BGNLOOP:
* instructions. Instead, we directly modify the header
* of the last (already stored) instruction.
*/
- if (inst->DstReg.File == PROGRAM_OUTPUT) {
+ if (inst->DstReg.File == PROGRAM_OUTPUT &&
+ c->key.clamp_vertex_color) {
if ((inst->DstReg.Index == VERT_RESULT_COL0)
|| (inst->DstReg.Index == VERT_RESULT_COL1)
|| (inst->DstReg.Index == VERT_RESULT_BFC0)