* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
+
+#include "pipe/p_shader_tokens.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
-#include "main/macros.h"
-#include "shader/program.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
#include "brw_context.h"
#include "brw_vs.h"
+#include "brw_debug.h"
+#include "brw_disasm.h"
+
+/* Choose one of the 4 vec4's which can be packed into each 16-wide reg.
+ */
+static INLINE struct brw_reg brw_vec4_grf_repeat( GLuint reg, GLuint slot )
+{
+ int nr = reg + slot/2;
+ int subnr = (slot%2) * 4;
+
+ return stride(brw_vec4_grf(nr, subnr), 0, 4, 1);
+}
static struct brw_reg get_tmp( struct brw_vs_compile *c )
}
+static boolean is_position_output( struct brw_vs_compile *c,
+ unsigned vs_output )
+{
+ const struct brw_vertex_shader *vs = c->vp;
+ unsigned semantic = vs->info.output_semantic_name[vs_output];
+ unsigned index = vs->info.output_semantic_index[vs_output];
+
+ return (semantic == TGSI_SEMANTIC_POSITION &&
+ index == 0);
+}
+
+
+static boolean find_output_slot( struct brw_vs_compile *c,
+ unsigned vs_output,
+ unsigned *fs_input_slot )
+{
+ const struct brw_vertex_shader *vs = c->vp;
+ unsigned semantic = vs->info.output_semantic_name[vs_output];
+ unsigned index = vs->info.output_semantic_index[vs_output];
+ unsigned i;
+
+ for (i = 0; i < c->key.fs_signature.nr_inputs; i++) {
+ if (c->key.fs_signature.input[i].semantic == semantic &&
+ c->key.fs_signature.input[i].semantic_index == index) {
+ *fs_input_slot = i;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+
/**
* Preallocate GRF register before code emit.
* Do things as simply as possible. Allocate and populate all regs
*/
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
{
- GLuint i, reg = 0, mrf;
+ GLuint i, reg = 0, subreg = 0, mrf;
int attributes_in_vue;
/* Determine whether to use a real constant buffer or use a block
* works if everything fits in the GRF.
* XXX this heuristic/check may need some fine tuning...
*/
- if (c->vp->program.Base.Parameters->NumParameters +
- c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF)
+ if (c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1 +
+ c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + 1 +
+ c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 1 + 21 > BRW_MAX_GRF)
c->vp->use_const_buffer = GL_TRUE;
- else
+ else {
+ /* XXX: immediates can go elsewhere if necessary:
+ */
+ assert(c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + 1 +
+ c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 1 + 21 <= BRW_MAX_GRF);
+
c->vp->use_const_buffer = GL_FALSE;
+ }
/*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
/* User clip planes from curbe:
*/
if (c->key.nr_userclip) {
- for (i = 0; i < c->key.nr_userclip; i++) {
- c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1);
+ /* Skip over fixed planes: Or never read them into vs unit?
+ */
+ subreg += 6;
+
+ for (i = 0; i < c->key.nr_userclip; i++, subreg++) {
+ c->userplane[i] =
+ stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
}
/* Deal with curbe alignment:
*/
- reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;
+ subreg = align(subreg, 2);
+ /*reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;*/
}
- /* Vertex program parameters from curbe:
+
+ /* Immediates: always in the curbe.
+ *
+ * XXX: Can try to encode some immediates as brw immediates
+ * XXX: Make sure ureg sets minimal immediate size and respect it
+ * here.
*/
- if (c->vp->use_const_buffer) {
- /* get constants from a real constant buffer */
- c->prog_data.curb_read_length = 0;
- c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */
+ for (i = 0; i < c->vp->info.immediate_count; i++, subreg++) {
+ c->regs[TGSI_FILE_IMMEDIATE][i] =
+ stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
}
- else {
- /* use a section of the GRF for constants */
- GLuint nr_params = c->vp->program.Base.Parameters->NumParameters;
- for (i = 0; i < nr_params; i++) {
- c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+ c->prog_data.nr_params = c->vp->info.immediate_count * 4;
+
+
+ /* Vertex constant buffer.
+ *
+ * Constants from the buffer can be either cached in the curbe or
+ * loaded as needed from the actual constant buffer.
+ */
+ if (!c->vp->use_const_buffer) {
+ GLuint nr_params = c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1;
+
+ for (i = 0; i < nr_params; i++, subreg++) {
+ c->regs[TGSI_FILE_CONSTANT][i] =
+ stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
}
- reg += (nr_params + 1) / 2;
- c->prog_data.curb_read_length = reg - 1;
- c->prog_data.nr_params = nr_params * 4;
+ c->prog_data.nr_params += nr_params * 4;
}
+ /* All regs allocated
+ */
+ reg += (subreg + 1) / 2;
+ c->prog_data.curb_read_length = reg - 1;
+
+
/* Allocate input regs:
*/
- c->nr_inputs = 0;
- for (i = 0; i < VERT_ATTRIB_MAX; i++) {
- if (c->prog_data.inputs_read & (1 << i)) {
- c->nr_inputs++;
- c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0);
- reg++;
- }
+ c->nr_inputs = c->vp->info.num_inputs;
+ for (i = 0; i < c->nr_inputs; i++) {
+ c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
}
+
/* If there are no inputs, we'll still be reading one attribute's worth
* because it's required -- see urb_read_length setting.
*/
if (c->nr_inputs == 0)
reg++;
+
+
/* Allocate outputs. The non-position outputs go straight into message regs.
*/
- c->nr_outputs = 0;
- c->first_output = reg;
- c->first_overflow_output = 0;
+ c->nr_outputs = c->prog_data.nr_outputs;
- if (BRW_IS_IGDNG(c->func.brw))
- mrf = 8;
+ if (c->chipset.is_igdng)
+ mrf = 8;
else
- mrf = 4;
-
- for (i = 0; i < VERT_RESULT_MAX; i++) {
- if (c->prog_data.outputs_written & (1 << i)) {
- c->nr_outputs++;
- assert(i < Elements(c->regs[PROGRAM_OUTPUT]));
- if (i == VERT_RESULT_HPOS) {
- c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
- reg++;
- }
- else if (i == VERT_RESULT_PSIZ) {
- c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
- reg++;
- mrf++; /* just a placeholder? XXX fix later stages & remove this */
- }
- else {
- if (mrf < 16) {
- c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf);
- mrf++;
- }
- else {
- /* too many vertex results to fit in MRF, use GRF for overflow */
- if (!c->first_overflow_output)
- c->first_overflow_output = i;
- c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
- reg++;
- }
- }
+ mrf = 4;
+
+
+ if (c->key.fs_signature.nr_inputs > BRW_MAX_MRF) {
+ c->overflow_grf_start = reg;
+ c->overflow_count = c->key.fs_signature.nr_inputs - BRW_MAX_MRF;
+ reg += c->overflow_count;
+ }
+
+ /* XXX: need to access vertex output semantics here:
+ */
+ for (i = 0; i < c->nr_outputs; i++) {
+ unsigned slot;
+
+ /* XXX: Put output position in slot zero always. Clipper, etc,
+ * need access to this reg.
+ */
+ if (is_position_output(c, i)) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); /* copy to mrf 0 */
+ reg++;
+ }
+ else if (find_output_slot(c, i, &slot)) {
+
+ if (0 /* is_psize_output(c, i) */ ) {
+ /* c->psize_out.grf = reg; */
+ /* c->psize_out.mrf = i; */
+ }
+
+ /* The first (16-4) outputs can go straight into the message regs.
+ */
+ if (slot + mrf < BRW_MAX_MRF) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(slot + mrf);
+ }
+ else {
+ int grf = c->overflow_grf_start + slot - BRW_MAX_MRF;
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(grf, 0);
+ }
+ }
+ else {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_null_reg();
}
}
/* Allocate program temporaries:
*/
- for (i = 0; i < c->vp->program.Base.NumTemporaries; i++) {
- c->regs[PROGRAM_TEMPORARY][i] = brw_vec8_grf(reg, 0);
+
+ for (i = 0; i < c->vp->info.file_max[TGSI_FILE_TEMPORARY]+1; i++) {
+ c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0);
reg++;
}
/* Address reg(s). Don't try to use the internal address reg until
* deref time.
*/
- for (i = 0; i < c->vp->program.Base.NumAddressRegs; i++) {
- c->regs[PROGRAM_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE,
+ for (i = 0; i < c->vp->info.file_max[TGSI_FILE_ADDRESS]+1; i++) {
+ c->regs[TGSI_FILE_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE,
reg,
0,
BRW_REGISTER_TYPE_D,
BRW_WIDTH_8,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XXXX,
- WRITEMASK_X);
+ BRW_WRITEMASK_X);
reg++;
}
}
}
+#if 0
for (i = 0; i < 128; i++) {
if (c->output_regs[i].used_in_src) {
c->output_regs[i].reg = brw_vec8_grf(reg, 0);
reg++;
}
}
+#endif
- c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
- reg += 2;
+ if (c->vp->has_flow_control) {
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
+ reg += 2;
+ }
/* Some opcodes need an internal temporary:
*/
* vertex urb, so is half the amount:
*/
c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2;
+
/* Setting this field to 0 leads to undefined behavior according to the
* the VS_STATE docs. Our VUEs will always have at least one attribute
* sitting in them, even if it's padding.
*/
attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
- if (BRW_IS_IGDNG(c->func.brw))
- c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
+ if (c->chipset.is_igdng)
+ c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
else
- c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
+ c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
c->prog_data.total_grf = reg;
- if (INTEL_DEBUG & DEBUG_VS) {
- _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
- _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
- _mesa_printf("%s reg = %d\n", __FUNCTION__, reg);
+ if (BRW_DEBUG & DEBUG_VS) {
+ debug_printf("%s NumAddrRegs %d\n", __FUNCTION__,
+ c->vp->info.file_max[TGSI_FILE_ADDRESS]+1);
+ debug_printf("%s NumTemps %d\n", __FUNCTION__,
+ c->vp->info.file_max[TGSI_FILE_TEMPORARY]+1);
+ debug_printf("%s reg = %d\n", __FUNCTION__, reg);
}
}
struct brw_compile *p = &c->func;
- if (dst.dw1.bits.writemask & WRITEMASK_X) {
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_X) {
struct brw_reg tmp = get_tmp(c);
struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D);
/* Adjust exponent for floating point:
* exp += 127
*/
- brw_ADD(p, brw_writemask(tmp_d, WRITEMASK_X), tmp_d, brw_imm_d(127));
+ brw_ADD(p, brw_writemask(tmp_d, BRW_WRITEMASK_X), tmp_d, brw_imm_d(127));
/* Install exponent and sign.
* Excess drops off the edge:
*/
- brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), WRITEMASK_X),
+ brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), BRW_WRITEMASK_X),
tmp_d, brw_imm_d(23));
release_tmp(c, tmp);
}
- if (dst.dw1.bits.writemask & WRITEMASK_Y) {
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_Y) {
/* result[1] = arg0.x - floor(arg0.x) */
- brw_FRC(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0, 0));
+ brw_FRC(p, brw_writemask(dst, BRW_WRITEMASK_Y), brw_swizzle1(arg0, 0));
}
- if (dst.dw1.bits.writemask & WRITEMASK_Z) {
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z) {
/* As with the LOG instruction, we might be better off just
* doing a taylor expansion here, seeing as we have to do all
* the prep work.
*/
emit_math1(c,
BRW_MATH_FUNCTION_EXP,
- brw_writemask(dst, WRITEMASK_Z),
+ brw_writemask(dst, BRW_WRITEMASK_Z),
brw_swizzle1(arg0, 0),
BRW_MATH_PRECISION_FULL);
}
- if (dst.dw1.bits.writemask & WRITEMASK_W) {
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_W) {
/* result[3] = 1.0; */
- brw_MOV(p, brw_writemask(dst, WRITEMASK_W), brw_imm_f(1));
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_W), brw_imm_f(1));
}
}
* result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
* result[1].i = (x.i & ((1<<23)-1) + (127<<23)
*/
- if (dst.dw1.bits.writemask & WRITEMASK_XZ) {
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_XZ) {
brw_AND(p,
- brw_writemask(tmp_ud, WRITEMASK_X),
+ brw_writemask(tmp_ud, BRW_WRITEMASK_X),
brw_swizzle1(arg0_ud, 0),
brw_imm_ud((1U<<31)-1));
brw_SHR(p,
- brw_writemask(tmp_ud, WRITEMASK_X),
+ brw_writemask(tmp_ud, BRW_WRITEMASK_X),
tmp_ud,
brw_imm_ud(23));
brw_ADD(p,
- brw_writemask(tmp, WRITEMASK_X),
+ brw_writemask(tmp, BRW_WRITEMASK_X),
retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */
brw_imm_d(-127));
}
- if (dst.dw1.bits.writemask & WRITEMASK_YZ) {
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_YZ) {
brw_AND(p,
- brw_writemask(tmp_ud, WRITEMASK_Y),
+ brw_writemask(tmp_ud, BRW_WRITEMASK_Y),
brw_swizzle1(arg0_ud, 0),
brw_imm_ud((1<<23)-1));
brw_OR(p,
- brw_writemask(tmp_ud, WRITEMASK_Y),
+ brw_writemask(tmp_ud, BRW_WRITEMASK_Y),
tmp_ud,
brw_imm_ud(127<<23));
}
- if (dst.dw1.bits.writemask & WRITEMASK_Z) {
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z) {
/* result[2] = result[0] + LOG2(result[1]); */
/* Why bother? The above is just a hint how to do this with a
*/
emit_math1(c,
BRW_MATH_FUNCTION_LOG,
- brw_writemask(tmp, WRITEMASK_Z),
+ brw_writemask(tmp, BRW_WRITEMASK_Z),
brw_swizzle1(tmp, 1),
BRW_MATH_PRECISION_FULL);
brw_ADD(p,
- brw_writemask(tmp, WRITEMASK_Z),
+ brw_writemask(tmp, BRW_WRITEMASK_Z),
brw_swizzle1(tmp, 2),
brw_swizzle1(tmp, 0));
}
- if (dst.dw1.bits.writemask & WRITEMASK_W) {
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_W) {
/* result[3] = 1.0; */
- brw_MOV(p, brw_writemask(tmp, WRITEMASK_W), brw_imm_f(1));
+ brw_MOV(p, brw_writemask(tmp, BRW_WRITEMASK_W), brw_imm_f(1));
}
if (need_tmp) {
/* There must be a better way to do this:
*/
- if (dst.dw1.bits.writemask & WRITEMASK_X)
- brw_MOV(p, brw_writemask(dst, WRITEMASK_X), brw_imm_f(1.0));
- if (dst.dw1.bits.writemask & WRITEMASK_Y)
- brw_MUL(p, brw_writemask(dst, WRITEMASK_Y), arg0, arg1);
- if (dst.dw1.bits.writemask & WRITEMASK_Z)
- brw_MOV(p, brw_writemask(dst, WRITEMASK_Z), arg0);
- if (dst.dw1.bits.writemask & WRITEMASK_W)
- brw_MOV(p, brw_writemask(dst, WRITEMASK_W), arg1);
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_X)
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_X), brw_imm_f(1.0));
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_Y)
+ brw_MUL(p, brw_writemask(dst, BRW_WRITEMASK_Y), arg0, arg1);
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z)
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_Z), arg0);
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_W)
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_W), arg1);
}
if (need_tmp)
tmp = get_tmp(c);
- brw_MOV(p, brw_writemask(dst, WRITEMASK_YZ), brw_imm_f(0));
- brw_MOV(p, brw_writemask(dst, WRITEMASK_XW), brw_imm_f(1));
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_YZ), brw_imm_f(0));
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_XW), brw_imm_f(1));
/* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
* to get all channels active inside the IF. In the clipping code
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0));
if_insn = brw_IF(p, BRW_EXECUTE_8);
{
- brw_MOV(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0,0));
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_Y), brw_swizzle1(arg0,0));
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0));
- brw_MOV(p, brw_writemask(tmp, WRITEMASK_Z), brw_swizzle1(arg0,1));
+ brw_MOV(p, brw_writemask(tmp, BRW_WRITEMASK_Z), brw_swizzle1(arg0,1));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
emit_math2(c,
BRW_MATH_FUNCTION_POW,
- brw_writemask(dst, WRITEMASK_Z),
+ brw_writemask(dst, BRW_WRITEMASK_Z),
brw_swizzle1(tmp, 2),
brw_swizzle1(arg0, 3),
BRW_MATH_PRECISION_PARTIAL);
static struct brw_reg
get_constant(struct brw_vs_compile *c,
- const struct prog_instruction *inst,
- GLuint argIndex)
+ GLuint argIndex,
+ GLuint index,
+ GLboolean relAddr)
{
- const struct prog_src_register *src = &inst->SrcReg[argIndex];
struct brw_compile *p = &c->func;
struct brw_reg const_reg;
struct brw_reg const2_reg;
- const GLboolean relAddr = src->RelAddr;
assert(argIndex < 3);
- if (c->current_const[argIndex].index != src->Index || relAddr) {
- struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
+ if (c->current_const[argIndex].index != index || relAddr) {
+ struct brw_reg addrReg = c->regs[TGSI_FILE_ADDRESS][0];
- c->current_const[argIndex].index = src->Index;
+ c->current_const[argIndex].index = index;
#if 0
printf(" fetch const[%d] for arg %d into reg %d\n",
- src->Index, argIndex, c->current_const[argIndex].reg.nr);
+ src.Index, argIndex, c->current_const[argIndex].reg.nr);
#endif
/* need to fetch the constant now */
brw_dp_READ_4_vs(p,
0, /* oword */
relAddr, /* relative indexing? */
addrReg, /* address register */
- 16 * src->Index, /* byte offset */
+ 16 * index, /* byte offset */
SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
);
1, /* oword */
relAddr, /* relative indexing? */
addrReg, /* address register */
- 16 * src->Index, /* byte offset */
+ 16 * index, /* byte offset */
SURF_INDEX_VERT_CONST_BUFFER
);
}
}
+#if 0
/* TODO: relative addressing!
*/
static struct brw_reg get_reg( struct brw_vs_compile *c,
- gl_register_file file,
+ enum tgsi_file_type file,
GLuint index )
{
switch (file) {
- case PROGRAM_TEMPORARY:
- case PROGRAM_INPUT:
- case PROGRAM_OUTPUT:
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_OUTPUT:
+ case TGSI_FILE_CONSTANT:
assert(c->regs[file][index].nr != 0);
return c->regs[file][index];
- case PROGRAM_STATE_VAR:
- case PROGRAM_CONSTANT:
- case PROGRAM_UNIFORM:
- assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
- return c->regs[PROGRAM_STATE_VAR][index];
- case PROGRAM_ADDRESS:
+
+ case TGSI_FILE_ADDRESS:
assert(index == 0);
return c->regs[file][index];
- case PROGRAM_UNDEFINED: /* undef values */
+ case TGSI_FILE_NULL: /* undef values */
return brw_null_reg();
- case PROGRAM_LOCAL_PARAM:
- case PROGRAM_ENV_PARAM:
- case PROGRAM_WRITE_ONLY:
default:
assert(0);
return brw_null_reg();
}
}
+#endif
+
/**
* Indirect addressing: get reg[[arg] + offset].
{
struct brw_compile *p = &c->func;
struct brw_reg tmp = vec4(get_tmp(c));
- struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+ struct brw_reg addr_reg = c->regs[TGSI_FILE_ADDRESS][0];
struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
struct brw_reg indirect = brw_vec4_indirect(0,0);
*/
static struct brw_reg
get_src_reg( struct brw_vs_compile *c,
- const struct prog_instruction *inst,
- GLuint argIndex )
+ GLuint argIndex,
+ GLuint file,
+ GLint index,
+ GLboolean relAddr )
{
- const GLuint file = inst->SrcReg[argIndex].File;
- const GLint index = inst->SrcReg[argIndex].Index;
- const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr;
switch (file) {
- case PROGRAM_TEMPORARY:
- case PROGRAM_INPUT:
- case PROGRAM_OUTPUT:
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_OUTPUT:
if (relAddr) {
return deref(c, c->regs[file][0], index);
}
return c->regs[file][index];
}
- case PROGRAM_STATE_VAR:
- case PROGRAM_CONSTANT:
- case PROGRAM_UNIFORM:
- case PROGRAM_ENV_PARAM:
+ case TGSI_FILE_IMMEDIATE:
+ return c->regs[file][index];
+
+ case TGSI_FILE_CONSTANT:
if (c->vp->use_const_buffer) {
- return get_constant(c, inst, argIndex);
+ return get_constant(c, argIndex, index, relAddr);
}
else if (relAddr) {
- return deref(c, c->regs[PROGRAM_STATE_VAR][0], index);
+ return deref(c, c->regs[TGSI_FILE_CONSTANT][0], index);
}
else {
- assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
- return c->regs[PROGRAM_STATE_VAR][index];
+ assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0);
+ return c->regs[TGSI_FILE_CONSTANT][index];
}
- case PROGRAM_ADDRESS:
+ case TGSI_FILE_ADDRESS:
assert(index == 0);
return c->regs[file][index];
- case PROGRAM_UNDEFINED:
+ case TGSI_FILE_NULL:
/* this is a normal case since we loop over all three src args */
return brw_null_reg();
- case PROGRAM_LOCAL_PARAM:
- case PROGRAM_WRITE_ONLY:
default:
assert(0);
return brw_null_reg();
/**
* Return the brw reg for the given instruction's src argument.
- * Will return mangled results for SWZ op. The emit_swz() function
- * ignores this result and recalculates taking extended swizzles into
- * account.
*/
static struct brw_reg get_arg( struct brw_vs_compile *c,
- const struct prog_instruction *inst,
+ const struct tgsi_full_src_register *src,
GLuint argIndex )
{
- const struct prog_src_register *src = &inst->SrcReg[argIndex];
struct brw_reg reg;
- if (src->File == PROGRAM_UNDEFINED)
+ if (src->Register.File == TGSI_FILE_NULL)
return brw_null_reg();
- reg = get_src_reg(c, inst, argIndex);
+ reg = get_src_reg(c, argIndex,
+ src->Register.File,
+ src->Register.Index,
+ src->Register.Indirect);
/* Convert 3-bit swizzle to 2-bit.
*/
- reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0),
- GET_SWZ(src->Swizzle, 1),
- GET_SWZ(src->Swizzle, 2),
- GET_SWZ(src->Swizzle, 3));
+ reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->Register.SwizzleX,
+ src->Register.SwizzleY,
+ src->Register.SwizzleZ,
+ src->Register.SwizzleW);
+
+ reg.negate = src->Register.Negate ? 1 : 0;
- /* Note this is ok for non-swizzle instructions:
+ /* XXX: abs, absneg
*/
- reg.negate = src->Negate ? 1 : 0;
return reg;
}
* Get brw register for the given program dest register.
*/
static struct brw_reg get_dst( struct brw_vs_compile *c,
- struct prog_dst_register dst )
+ unsigned file,
+ unsigned index,
+ unsigned writemask )
{
struct brw_reg reg;
- switch (dst.File) {
- case PROGRAM_TEMPORARY:
- case PROGRAM_OUTPUT:
- assert(c->regs[dst.File][dst.Index].nr != 0);
- reg = c->regs[dst.File][dst.Index];
+ switch (file) {
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_OUTPUT:
+ assert(c->regs[file][index].nr != 0);
+ reg = c->regs[file][index];
break;
- case PROGRAM_ADDRESS:
- assert(dst.Index == 0);
- reg = c->regs[dst.File][dst.Index];
+ case TGSI_FILE_ADDRESS:
+ assert(index == 0);
+ reg = c->regs[file][index];
break;
- case PROGRAM_UNDEFINED:
+ case TGSI_FILE_NULL:
/* we may hit this for OPCODE_END, OPCODE_KIL, etc */
reg = brw_null_reg();
break;
reg = brw_null_reg();
}
- reg.dw1.bits.writemask = dst.WriteMask;
+ reg.dw1.bits.writemask = writemask;
return reg;
}
-static void emit_swz( struct brw_vs_compile *c,
- struct brw_reg dst,
- const struct prog_instruction *inst)
-{
- const GLuint argIndex = 0;
- const struct prog_src_register src = inst->SrcReg[argIndex];
- struct brw_compile *p = &c->func;
- GLuint zeros_mask = 0;
- GLuint ones_mask = 0;
- GLuint src_mask = 0;
- GLubyte src_swz[4];
- GLboolean need_tmp = (src.Negate &&
- dst.file != BRW_GENERAL_REGISTER_FILE);
- struct brw_reg tmp = dst;
- GLuint i;
-
- if (need_tmp)
- tmp = get_tmp(c);
-
- for (i = 0; i < 4; i++) {
- if (dst.dw1.bits.writemask & (1<<i)) {
- GLubyte s = GET_SWZ(src.Swizzle, i);
- switch (s) {
- case SWIZZLE_X:
- case SWIZZLE_Y:
- case SWIZZLE_Z:
- case SWIZZLE_W:
- src_mask |= 1<<i;
- src_swz[i] = s;
- break;
- case SWIZZLE_ZERO:
- zeros_mask |= 1<<i;
- break;
- case SWIZZLE_ONE:
- ones_mask |= 1<<i;
- break;
- }
- }
- }
-
- /* Do src first, in case dst aliases src:
- */
- if (src_mask) {
- struct brw_reg arg0;
-
- arg0 = get_src_reg(c, inst, argIndex);
-
- arg0 = brw_swizzle(arg0,
- src_swz[0], src_swz[1],
- src_swz[2], src_swz[3]);
-
- brw_MOV(p, brw_writemask(tmp, src_mask), arg0);
- }
-
- if (zeros_mask)
- brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0));
-
- if (ones_mask)
- brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1));
-
- if (src.Negate)
- brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp));
-
- if (need_tmp) {
- brw_MOV(p, dst, tmp);
- release_tmp(c, tmp);
- }
-}
/**
{
struct brw_compile *p = &c->func;
struct brw_reg m0 = brw_message_reg(0);
- struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS];
+ struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS];
struct brw_reg ndc;
int eot;
+ int i;
GLuint len_vertext_header = 2;
- if (c->key.copy_edgeflag) {
- brw_MOV(p,
- get_reg(c, PROGRAM_OUTPUT, VERT_RESULT_EDGE),
- get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG));
- }
-
/* Build ndc coords */
ndc = get_tmp(c);
/* ndc = 1.0 / pos.w */
emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
/* ndc.xyz = pos * ndc */
- brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc);
+ brw_MUL(p, brw_writemask(ndc, BRW_WRITEMASK_XYZ), pos, ndc);
/* Update the header for point size, user clipping flags, and -ve rhw
* workaround.
*/
- if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) ||
- c->key.nr_userclip || BRW_IS_965(p->brw))
+ if (c->prog_data.writes_psiz ||
+ c->key.nr_userclip ||
+ c->chipset.is_965)
{
struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
GLuint i;
brw_set_access_mode(p, BRW_ALIGN_16);
- if (c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) {
- struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ];
- brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
- brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
+ if (c->prog_data.writes_psiz) {
+ struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_PSIZ];
+ brw_MUL(p, brw_writemask(header1, BRW_WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
+ brw_AND(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
}
for (i = 0; i < c->key.nr_userclip; i++) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
- brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<i));
+ brw_OR(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(1<<i));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
* Later, clipping will detect ucp[6] and ensure the primitive is
* clipped against all fixed planes.
*/
- if (BRW_IS_965(p->brw)) {
+ if (c->chipset.is_965) {
brw_CMP(p,
vec8(brw_null_reg()),
BRW_CONDITIONAL_L,
brw_swizzle1(ndc, 3),
brw_imm_f(0));
- brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
+ brw_OR(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(1<<6));
brw_MOV(p, ndc, brw_imm_f(0));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
brw_set_access_mode(p, BRW_ALIGN_1);
brw_MOV(p, offset(m0, 2), ndc);
- if (BRW_IS_IGDNG(p->brw)) {
+ if (c->chipset.is_igdng) {
/* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */
brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */
/* m4, m5 contain the distances from vertex to the user clip planeXXX.
len_vertext_header = 2;
}
- eot = (c->first_overflow_output == 0);
+ eot = (c->overflow_count == 0);
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
0, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
- if (c->first_overflow_output > 0) {
- /* Not all of the vertex outputs/results fit into the MRF.
- * Move the overflowed attributes from the GRF to the MRF and
- * issue another brw_urb_WRITE().
- */
+ /* Not all of the vertex outputs/results fit into the MRF.
+ * Move the overflowed attributes from the GRF to the MRF and
+ * issue another brw_urb_WRITE().
+ */
+ for (i = 0; i < c->overflow_count; i += BRW_MAX_MRF) {
+ unsigned nr = MIN2(c->overflow_count - i, BRW_MAX_MRF);
+ GLuint j;
+
+ eot = (i + nr >= c->overflow_count);
+
/* XXX I'm not 100% sure about which MRF regs to use here. Starting
* at mrf[4] atm...
*/
- GLuint i, mrf = 0;
- for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) {
- if (c->prog_data.outputs_written & (1 << i)) {
- /* move from GRF to MRF */
- brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]);
- mrf++;
- }
+ for (j = 0; j < nr; j++) {
+ brw_MOV(p, brw_message_reg(4+j),
+ brw_vec8_grf(c->overflow_grf_start + i + j, 0));
}
brw_urb_WRITE(p,
c->r0, /* src */
0, /* allocate */
1, /* used */
- mrf+1, /* msg len */
+ nr+1, /* msg len */
0, /* response len */
- 1, /* eot */
- 1, /* writes complete */
- BRW_MAX_MRF-1, /* urb destination offset */
+ eot, /* eot */
+ eot, /* writes complete */
+ i-1, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
}
}
}
static uint32_t
-get_predicate(const struct prog_instruction *inst)
+get_predicate(const struct tgsi_full_instruction *inst)
{
- if (inst->DstReg.CondMask == COND_TR)
+ /* XXX: disabling for now
+ */
+#if 0
+ if (inst->dst.CondMask == COND_TR)
return BRW_PREDICATE_NONE;
/* All of GLSL only produces predicates for COND_NE and one channel per
* predicate on that. We can probably support this, but it won't
* necessarily be easy.
*/
- assert(inst->DstReg.CondMask == COND_NE);
+/* assert(inst->dst.CondMask == COND_NE); */
- switch (inst->DstReg.CondSwizzle) {
+ switch (inst->dst.CondSwizzle) {
case SWIZZLE_XXXX:
return BRW_PREDICATE_ALIGN16_REPLICATE_X;
case SWIZZLE_YYYY:
case SWIZZLE_WWWW:
return BRW_PREDICATE_ALIGN16_REPLICATE_W;
default:
- _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n",
- inst->DstReg.CondMask);
+ debug_printf("Unexpected predicate: 0x%08x\n",
+ inst->dst.CondMask);
return BRW_PREDICATE_NORMAL;
}
+#else
+ return BRW_PREDICATE_NORMAL;
+#endif
}
+static void emit_insn(struct brw_vs_compile *c,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned opcode = inst->Instruction.Opcode;
+ unsigned label = inst->Label.Label;
+ struct brw_compile *p = &c->func;
+ struct brw_reg args[3], dst;
+ GLuint i;
+
+#if 0
+ printf("%d: ", insn);
+ _mesa_print_instruction(inst);
+#endif
+
+ /* Get argument regs.
+ */
+ for (i = 0; i < 3; i++) {
+ args[i] = get_arg(c, &inst->Src[i], i);
+ }
+
+ /* Get dest regs. Note that it is possible for a reg to be both
+ * dst and arg, given the static allocation of registers. So
+ * care needs to be taken emitting multi-operation instructions.
+ */
+ dst = get_dst(c,
+ inst->Dst[0].Register.File,
+ inst->Dst[0].Register.Index,
+ inst->Dst[0].Register.WriteMask);
+
+ /* XXX: saturate
+ */
+ if (inst->Instruction.Saturate != TGSI_SAT_NONE) {
+ debug_printf("Unsupported saturate in vertex shader");
+ }
+
+ switch (opcode) {
+ case TGSI_OPCODE_ABS:
+ brw_MOV(p, dst, brw_abs(args[0]));
+ break;
+ case TGSI_OPCODE_ADD:
+ brw_ADD(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_COS:
+ emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_DP3:
+ brw_DP3(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_DP4:
+ brw_DP4(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_DPH:
+ brw_DPH(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_NRM:
+ emit_nrm(c, dst, args[0], 3);
+ break;
+ case TGSI_OPCODE_NRM4:
+ emit_nrm(c, dst, args[0], 4);
+ break;
+ case TGSI_OPCODE_DST:
+ unalias2(c, dst, args[0], args[1], emit_dst_noalias);
+ break;
+ case TGSI_OPCODE_EXP:
+ unalias1(c, dst, args[0], emit_exp_noalias);
+ break;
+ case TGSI_OPCODE_EX2:
+ emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_ARL:
+ emit_arl(c, dst, args[0]);
+ break;
+ case TGSI_OPCODE_FLR:
+ brw_RNDD(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_FRC:
+ brw_FRC(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ unalias1(c, dst, args[0], emit_log_noalias);
+ break;
+ case TGSI_OPCODE_LG2:
+ emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_LIT:
+ unalias1(c, dst, args[0], emit_lit_noalias);
+ break;
+ case TGSI_OPCODE_LRP:
+ unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);
+ break;
+ case TGSI_OPCODE_MAD:
+ brw_MOV(p, brw_acc_reg(), args[2]);
+ brw_MAC(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_MAX:
+ emit_max(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_MIN:
+ emit_min(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_MOV:
+ brw_MOV(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_MUL:
+ brw_MUL(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_POW:
+ emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_RCP:
+ emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_RSQ:
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst,
+ brw_swizzle(args[0], 0,0,0,0), BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_SEQ:
+ emit_seq(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SIN:
+ emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_SNE:
+ emit_sne(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SGE:
+ emit_sge(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SGT:
+ emit_sgt(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SLT:
+ emit_slt(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SLE:
+ emit_sle(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SUB:
+ brw_ADD(p, dst, args[0], negate(args[1]));
+ break;
+ case TGSI_OPCODE_TRUNC:
+ /* round toward zero */
+ brw_RNDZ(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_XPD:
+ emit_xpd(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_IF:
+ assert(c->if_depth < MAX_IF_DEPTH);
+ c->if_inst[c->if_depth] = brw_IF(p, BRW_EXECUTE_8);
+ /* Note that brw_IF smashes the predicate_control field. */
+ c->if_inst[c->if_depth]->header.predicate_control = get_predicate(inst);
+ c->if_depth++;
+ break;
+ case TGSI_OPCODE_ELSE:
+ c->if_inst[c->if_depth-1] = brw_ELSE(p, c->if_inst[c->if_depth-1]);
+ break;
+ case TGSI_OPCODE_ENDIF:
+ assert(c->if_depth > 0);
+ brw_ENDIF(p, c->if_inst[--c->if_depth]);
+ break;
+ case TGSI_OPCODE_BGNLOOP:
+ c->loop_inst[c->loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
+ break;
+ case TGSI_OPCODE_BRK:
+ brw_set_predicate_control(p, get_predicate(inst));
+ brw_BREAK(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case TGSI_OPCODE_CONT:
+ brw_set_predicate_control(p, get_predicate(inst));
+ brw_CONT(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case TGSI_OPCODE_ENDLOOP:
+ {
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ c->loop_depth--;
+
+ if (c->chipset.is_igdng)
+ br = 2;
+
+ inst0 = inst1 = brw_WHILE(p, c->loop_inst[c->loop_depth]);
+ /* patch all the BREAK/CONT instructions from last BEGINLOOP */
+ while (inst0 > c->loop_inst[c->loop_depth]) {
+ inst0--;
+ if (inst0->header.opcode == TGSI_OPCODE_BRK) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ else if (inst0->header.opcode == TGSI_OPCODE_CONT) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ }
+ }
+ break;
+ case TGSI_OPCODE_BRA:
+ brw_set_predicate_control(p, get_predicate(inst));
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case TGSI_OPCODE_CAL:
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_ADD(p, deref_1d(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_ADD(p, get_addr_reg(c->stack_index),
+ get_addr_reg(c->stack_index), brw_imm_d(4));
+ brw_save_call(p, label, p->nr_insn);
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ break;
+ case TGSI_OPCODE_RET:
+ brw_ADD(p, get_addr_reg(c->stack_index),
+ get_addr_reg(c->stack_index), brw_imm_d(-4));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, brw_ip_reg(), deref_1d(c->stack_index, 0));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ break;
+ case TGSI_OPCODE_END:
+ c->end_offset = p->nr_insn;
+ /* this instruction will get patched later to jump past subroutine
+ * code, etc.
+ */
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ break;
+ case TGSI_OPCODE_BGNSUB:
+ brw_save_label(p, p->nr_insn, p->nr_insn);
+ break;
+ case TGSI_OPCODE_ENDSUB:
+ /* no-op */
+ break;
+ default:
+ debug_printf("Unsupported opcode %i (%s) in vertex shader",
+ opcode,
+ tgsi_get_opcode_name(opcode));
+ }
+
+ /* Set the predication update on the last instruction of the native
+ * instruction sequence.
+ *
+ * This would be problematic if it was set on a math instruction,
+ * but that shouldn't be the case with the current GLSL compiler.
+ */
+#if 0
+ /* XXX: disabled
+ */
+ if (inst->CondUpdate) {
+ struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1];
+
+ assert(hw_insn->header.destreg__conditionalmod == 0);
+ hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ;
+ }
+#endif
+
+ release_tmps(c);
+}
+
+
/* Emit the vertex program instructions here.
*/
-void brw_vs_emit(struct brw_vs_compile *c )
+void brw_vs_emit(struct brw_vs_compile *c)
{
-#define MAX_IF_DEPTH 32
-#define MAX_LOOP_DEPTH 32
struct brw_compile *p = &c->func;
- struct brw_context *brw = p->brw;
- const GLuint nr_insns = c->vp->program.Base.NumInstructions;
- GLuint insn, if_depth = 0, loop_depth = 0;
- GLuint end_offset = 0;
+ const struct tgsi_token *tokens = c->vp->tokens;
struct brw_instruction *end_inst, *last_inst;
- struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
- const struct brw_indirect stack_index = brw_indirect(0, 0);
- GLuint index;
- GLuint file;
-
- if (INTEL_DEBUG & DEBUG_VS) {
- _mesa_printf("vs-mesa:\n");
- _mesa_print_program(&c->vp->program.Base);
- _mesa_printf("\n");
- }
+ struct tgsi_parse_context parse;
+ struct tgsi_full_instruction *inst;
+
+ if (BRW_DEBUG & DEBUG_VS)
+ tgsi_dump(c->vp->tokens, 0);
+
+ c->stack_index = brw_indirect(0, 0);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_access_mode(p, BRW_ALIGN_16);
- /* Message registers can't be read, so copy the output into GRF register
- if they are used in source registers */
- for (insn = 0; insn < nr_insns; insn++) {
- GLuint i;
- struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
- for (i = 0; i < 3; i++) {
- struct prog_src_register *src = &inst->SrcReg[i];
- GLuint index = src->Index;
- GLuint file = src->File;
- if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS)
- c->output_regs[index].used_in_src = GL_TRUE;
- }
- }
/* Static register allocation
*/
brw_vs_alloc_regs(c);
- brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
- for (insn = 0; insn < nr_insns; insn++) {
-
- const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
- struct brw_reg args[3], dst;
- GLuint i;
-
-#if 0
- printf("%d: ", insn);
- _mesa_print_instruction(inst);
-#endif
+ if (c->vp->has_flow_control) {
+ brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack));
+ }
- /* Get argument regs. SWZ is special and does this itself.
- */
- if (inst->Opcode != OPCODE_SWZ)
- for (i = 0; i < 3; i++) {
- const struct prog_src_register *src = &inst->SrcReg[i];
- index = src->Index;
- file = src->File;
- if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
- args[i] = c->output_regs[index].reg;
- else
- args[i] = get_arg(c, inst, i);
- }
-
- /* Get dest regs. Note that it is possible for a reg to be both
- * dst and arg, given the static allocation of registers. So
- * care needs to be taken emitting multi-operation instructions.
- */
- index = inst->DstReg.Index;
- file = inst->DstReg.File;
- if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
- dst = c->output_regs[index].reg;
- else
- dst = get_dst(c, inst->DstReg);
-
- if (inst->SaturateMode != SATURATE_OFF) {
- _mesa_problem(NULL, "Unsupported saturate %d in vertex shader",
- inst->SaturateMode);
- }
+ /* Instructions
+ */
+ tgsi_parse_init( &parse, tokens );
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+ tgsi_parse_token( &parse );
- switch (inst->Opcode) {
- case OPCODE_ABS:
- brw_MOV(p, dst, brw_abs(args[0]));
- break;
- case OPCODE_ADD:
- brw_ADD(p, dst, args[0], args[1]);
- break;
- case OPCODE_COS:
- emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL);
- break;
- case OPCODE_DP3:
- brw_DP3(p, dst, args[0], args[1]);
- break;
- case OPCODE_DP4:
- brw_DP4(p, dst, args[0], args[1]);
- break;
- case OPCODE_DPH:
- brw_DPH(p, dst, args[0], args[1]);
- break;
- case OPCODE_NRM3:
- emit_nrm(c, dst, args[0], 3);
- break;
- case OPCODE_NRM4:
- emit_nrm(c, dst, args[0], 4);
- break;
- case OPCODE_DST:
- unalias2(c, dst, args[0], args[1], emit_dst_noalias);
- break;
- case OPCODE_EXP:
- unalias1(c, dst, args[0], emit_exp_noalias);
- break;
- case OPCODE_EX2:
- emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
- break;
- case OPCODE_ARL:
- emit_arl(c, dst, args[0]);
- break;
- case OPCODE_FLR:
- brw_RNDD(p, dst, args[0]);
- break;
- case OPCODE_FRC:
- brw_FRC(p, dst, args[0]);
- break;
- case OPCODE_LOG:
- unalias1(c, dst, args[0], emit_log_noalias);
- break;
- case OPCODE_LG2:
- emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
- break;
- case OPCODE_LIT:
- unalias1(c, dst, args[0], emit_lit_noalias);
- break;
- case OPCODE_LRP:
- unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);
- break;
- case OPCODE_MAD:
- brw_MOV(p, brw_acc_reg(), args[2]);
- brw_MAC(p, dst, args[0], args[1]);
- break;
- case OPCODE_MAX:
- emit_max(p, dst, args[0], args[1]);
- break;
- case OPCODE_MIN:
- emit_min(p, dst, args[0], args[1]);
- break;
- case OPCODE_MOV:
- brw_MOV(p, dst, args[0]);
- break;
- case OPCODE_MUL:
- brw_MUL(p, dst, args[0], args[1]);
- break;
- case OPCODE_POW:
- emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL);
- break;
- case OPCODE_RCP:
- emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
- break;
- case OPCODE_RSQ:
- emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
break;
- case OPCODE_SEQ:
- emit_seq(p, dst, args[0], args[1]);
- break;
- case OPCODE_SIN:
- emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL);
- break;
- case OPCODE_SNE:
- emit_sne(p, dst, args[0], args[1]);
- break;
- case OPCODE_SGE:
- emit_sge(p, dst, args[0], args[1]);
- break;
- case OPCODE_SGT:
- emit_sgt(p, dst, args[0], args[1]);
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ inst = &parse.FullToken.FullInstruction;
+ emit_insn( c, inst );
break;
- case OPCODE_SLT:
- emit_slt(p, dst, args[0], args[1]);
- break;
- case OPCODE_SLE:
- emit_sle(p, dst, args[0], args[1]);
- break;
- case OPCODE_SUB:
- brw_ADD(p, dst, args[0], negate(args[1]));
- break;
- case OPCODE_SWZ:
- /* The args[0] value can't be used here as it won't have
- * correctly encoded the full swizzle:
- */
- emit_swz(c, dst, inst);
- break;
- case OPCODE_TRUNC:
- /* round toward zero */
- brw_RNDZ(p, dst, args[0]);
- break;
- case OPCODE_XPD:
- emit_xpd(p, dst, args[0], args[1]);
- break;
- case OPCODE_IF:
- assert(if_depth < MAX_IF_DEPTH);
- if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
- /* Note that brw_IF smashes the predicate_control field. */
- if_inst[if_depth]->header.predicate_control = get_predicate(inst);
- if_depth++;
- break;
- case OPCODE_ELSE:
- if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
- break;
- case OPCODE_ENDIF:
- assert(if_depth > 0);
- brw_ENDIF(p, if_inst[--if_depth]);
- break;
- case OPCODE_BGNLOOP:
- loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
- break;
- case OPCODE_BRK:
- brw_set_predicate_control(p, get_predicate(inst));
- brw_BREAK(p);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- break;
- case OPCODE_CONT:
- brw_set_predicate_control(p, get_predicate(inst));
- brw_CONT(p);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- break;
- case OPCODE_ENDLOOP:
- {
- struct brw_instruction *inst0, *inst1;
- GLuint br = 1;
-
- loop_depth--;
-
- if (BRW_IS_IGDNG(brw))
- br = 2;
-
- inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
- /* patch all the BREAK/CONT instructions from last BEGINLOOP */
- while (inst0 > loop_inst[loop_depth]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
- inst0->bits3.if_else.pop_count = 0;
- }
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
- inst0->bits3.if_else.pop_count = 0;
- }
- }
- }
- break;
- case OPCODE_BRA:
- brw_set_predicate_control(p, get_predicate(inst));
- brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- break;
- case OPCODE_CAL:
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
- brw_set_access_mode(p, BRW_ALIGN_16);
- brw_ADD(p, get_addr_reg(stack_index),
- get_addr_reg(stack_index), brw_imm_d(4));
- brw_save_call(p, inst->Comment, p->nr_insn);
- brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
- break;
- case OPCODE_RET:
- brw_ADD(p, get_addr_reg(stack_index),
- get_addr_reg(stack_index), brw_imm_d(-4));
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0));
- brw_set_access_mode(p, BRW_ALIGN_16);
- break;
- case OPCODE_END:
- end_offset = p->nr_insn;
- /* this instruction will get patched later to jump past subroutine
- * code, etc.
- */
- brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
- break;
- case OPCODE_PRINT:
- /* no-op */
- break;
- case OPCODE_BGNSUB:
- brw_save_label(p, inst->Comment, p->nr_insn);
- break;
- case OPCODE_ENDSUB:
- /* no-op */
- break;
- default:
- _mesa_problem(NULL, "Unsupported opcode %i (%s) in vertex shader",
- inst->Opcode, inst->Opcode < MAX_OPCODE ?
- _mesa_opcode_string(inst->Opcode) :
- "unknown");
- }
- /* Set the predication update on the last instruction of the native
- * instruction sequence.
- *
- * This would be problematic if it was set on a math instruction,
- * but that shouldn't be the case with the current GLSL compiler.
- */
- if (inst->CondUpdate) {
- struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1];
-
- assert(hw_insn->header.destreg__conditionalmod == 0);
- hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ;
- }
-
- if ((inst->DstReg.File == PROGRAM_OUTPUT)
- && (inst->DstReg.Index != VERT_RESULT_HPOS)
- && c->output_regs[inst->DstReg.Index].used_in_src) {
- brw_MOV(p, get_dst(c, inst->DstReg), dst);
- }
-
- /* Result color clamping.
- *
- * When destination register is an output register and
- * it's primary/secondary front/back color, we have to clamp
- * the result to [0,1]. This is done by enabling the
- * saturation bit for the last instruction.
- *
- * We don't use brw_set_saturate() as it modifies
- * p->current->header.saturate, which affects all the subsequent
- * instructions. Instead, we directly modify the header
- * of the last (already stored) instruction.
- */
- if (inst->DstReg.File == PROGRAM_OUTPUT) {
- if ((inst->DstReg.Index == VERT_RESULT_COL0)
- || (inst->DstReg.Index == VERT_RESULT_COL1)
- || (inst->DstReg.Index == VERT_RESULT_BFC0)
- || (inst->DstReg.Index == VERT_RESULT_BFC1)) {
- p->store[p->nr_insn-1].header.saturate = 1;
- }
+ default:
+ assert( 0 );
}
-
- release_tmps(c);
}
+ tgsi_parse_free( &parse );
- end_inst = &p->store[end_offset];
+ end_inst = &p->store[c->end_offset];
last_inst = &p->store[p->nr_insn];
/* The END instruction will be patched to jump to this code */
post_vs_emit(c, end_inst, last_inst);
- if (INTEL_DEBUG & DEBUG_VS) {
- int i;
-
- _mesa_printf("vs-native:\n");
- for (i = 0; i < p->nr_insn; i++)
- brw_disasm(stderr, &p->store[i]);
- _mesa_printf("\n");
+ if (BRW_DEBUG & DEBUG_VS) {
+ debug_printf("vs-native:\n");
+ brw_disasm(stderr, p->store, p->nr_insn);
}
}