#include "util/u_memory.h"
#include "util/u_math.h"
-#include "tgsi/tgsi_ureg.h"
-#include "tgsi/tgsi_ureg_parse.h"
+#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_info.h"
#include "brw_context.h"
#include "brw_vs.h"
#include "brw_debug.h"
+#include "brw_disasm.h"
+/* Choose one of the 4 vec4's which can be packed into each 16-wide reg.
+ */
+static INLINE struct brw_reg brw_vec4_grf_repeat( GLuint reg, GLuint slot )
+{
+ int nr = reg + slot/2;
+ int subnr = (slot%2) * 4;
+
+ return stride(brw_vec4_grf(nr, subnr), 0, 4, 1);
+}
static struct brw_reg get_tmp( struct brw_vs_compile *c )
}
+static boolean is_position_output( struct brw_vs_compile *c,
+ unsigned vs_output )
+{
+ struct brw_vertex_shader *vs = c->vp;
+
+ if (vs_output == c->prog_data.output_edgeflag) {
+ return FALSE;
+ }
+ else {
+ unsigned semantic = vs->info.output_semantic_name[vs_output];
+ unsigned index = vs->info.output_semantic_index[vs_output];
+
+ return (semantic == TGSI_SEMANTIC_POSITION &&
+ index == 0);
+ }
+}
+
+
+static boolean find_output_slot( struct brw_vs_compile *c,
+ unsigned vs_output,
+ unsigned *fs_input_slot )
+{
+ struct brw_vertex_shader *vs = c->vp;
+
+ if (vs_output == c->prog_data.output_edgeflag) {
+ *fs_input_slot = c->key.fs_signature.nr_inputs;
+ return TRUE;
+ }
+ else {
+ unsigned semantic = vs->info.output_semantic_name[vs_output];
+ unsigned index = vs->info.output_semantic_index[vs_output];
+ unsigned i;
+
+ for (i = 0; i < c->key.fs_signature.nr_inputs; i++) {
+ if (c->key.fs_signature.input[i].semantic == semantic &&
+ c->key.fs_signature.input[i].semantic_index == index) {
+ *fs_input_slot = i;
+ return TRUE;
+ }
+ }
+ }
+
+ return FALSE;
+}
+
+
/**
* Preallocate GRF register before code emit.
* Do things as simply as possible. Allocate and populate all regs
*/
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
{
- GLuint i, reg = 0, mrf;
+ GLuint i, reg = 0, subreg = 0, mrf;
int attributes_in_vue;
/* Determine whether to use a real constant buffer or use a block
* works if everything fits in the GRF.
* XXX this heuristic/check may need some fine tuning...
*/
- if (c->vp->info.file_max[TGSI_FILE_CONSTANT] +
- c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 21 > BRW_MAX_GRF)
+ if (c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1 +
+ c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + 1 +
+ c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 1 + 21 > BRW_MAX_GRF)
c->vp->use_const_buffer = GL_TRUE;
- else
+ else {
+ /* XXX: immediates can go elsewhere if necessary:
+ */
+ assert(c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + 1 +
+ c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 1 + 21 <= BRW_MAX_GRF);
+
c->vp->use_const_buffer = GL_FALSE;
+ }
/*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
/* User clip planes from curbe:
*/
if (c->key.nr_userclip) {
- for (i = 0; i < c->key.nr_userclip; i++) {
- c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1);
+ /* Skip over fixed planes: Or never read them into vs unit?
+ */
+ subreg += 6;
+
+ for (i = 0; i < c->key.nr_userclip; i++, subreg++) {
+ c->userplane[i] =
+ stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
}
/* Deal with curbe alignment:
*/
- reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;
+ subreg = align(subreg, 2);
+ /*reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;*/
}
- /* Vertex program parameters from curbe:
+
+ /* Immediates: always in the curbe.
+ *
+ * XXX: Can try to encode some immediates as brw immediates
+ * XXX: Make sure ureg sets minimal immediate size and respect it
+ * here.
*/
- if (c->vp->use_const_buffer) {
- /* get constants from a real constant buffer */
- c->prog_data.curb_read_length = 0;
- c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */
+ for (i = 0; i < c->vp->info.immediate_count; i++, subreg++) {
+ c->regs[TGSI_FILE_IMMEDIATE][i] =
+ stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
}
- else {
- /* use a section of the GRF for constants */
+ c->prog_data.nr_params = c->vp->info.immediate_count * 4;
+
+
+ /* Vertex constant buffer.
+ *
+ * Constants from the buffer can be either cached in the curbe or
+ * loaded as needed from the actual constant buffer.
+ */
+ if (!c->vp->use_const_buffer) {
GLuint nr_params = c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1;
- for (i = 0; i < nr_params; i++) {
- c->regs[TGSI_FILE_CONSTANT][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+
+ for (i = 0; i < nr_params; i++, subreg++) {
+ c->regs[TGSI_FILE_CONSTANT][i] =
+ stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
}
- reg += (nr_params + 1) / 2;
- c->prog_data.curb_read_length = reg - 1;
- c->prog_data.nr_params = nr_params * 4;
+
+ c->prog_data.nr_params += nr_params * 4;
}
+ /* All regs allocated
+ */
+ reg += (subreg + 1) / 2;
+ c->prog_data.curb_read_length = reg - 1;
+
+
/* Allocate input regs:
*/
c->nr_inputs = c->vp->info.num_inputs;
if (c->nr_inputs == 0)
reg++;
+
+
/* Allocate outputs. The non-position outputs go straight into message regs.
*/
- c->nr_outputs = 0;
- c->first_output = reg;
- c->first_overflow_output = 0;
+ c->nr_outputs = c->prog_data.nr_outputs;
if (c->chipset.is_igdng)
mrf = 8;
else
mrf = 4;
+
+ if (c->key.fs_signature.nr_inputs > BRW_MAX_MRF) {
+ c->overflow_grf_start = reg;
+ c->overflow_count = c->key.fs_signature.nr_inputs - BRW_MAX_MRF;
+ reg += c->overflow_count;
+ }
+
/* XXX: need to access vertex output semantics here:
*/
- c->nr_outputs = c->prog_data.nr_outputs;
- for (i = 0; i < c->prog_data.nr_outputs; i++) {
- assert(i < Elements(c->regs[TGSI_FILE_OUTPUT]));
+ for (i = 0; i < c->nr_outputs; i++) {
+ unsigned slot;
- /* XXX: Hardwire position to zero:
+ /* XXX: Put output position in slot zero always. Clipper, etc,
+ * need access to this reg.
*/
- if (i == 0) {
- c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
+ if (is_position_output(c, i)) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); /* copy to mrf 0 */
reg++;
}
- /* XXX: disable psiz:
- */
- else if (0) {
- c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
- reg++;
- mrf++; /* just a placeholder? XXX fix later stages & remove this */
- }
- else if (mrf < 16) {
- c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
- mrf++;
+ else if (find_output_slot(c, i, &slot)) {
+
+ if (0 /* is_psize_output(c, i) */ ) {
+ /* c->psize_out.grf = reg; */
+ /* c->psize_out.mrf = i; */
+ }
+
+ /* The first (16-4) outputs can go straight into the message regs.
+ */
+ if (slot + mrf < BRW_MAX_MRF) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(slot + mrf);
+ }
+ else {
+ int grf = c->overflow_grf_start + slot - BRW_MAX_MRF;
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(grf, 0);
+ }
}
else {
- /* too many vertex results to fit in MRF, use GRF for overflow */
- if (!c->first_overflow_output)
- c->first_overflow_output = i;
- c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
- reg++;
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_null_reg();
}
}
}
}
+#if 0
for (i = 0; i < 128; i++) {
if (c->output_regs[i].used_in_src) {
c->output_regs[i].reg = brw_vec8_grf(reg, 0);
reg++;
}
}
+#endif
- c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
- reg += 2;
+ if (c->vp->has_flow_control) {
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
+ reg += 2;
+ }
/* Some opcodes need an internal temporary:
*/
static struct brw_reg
get_constant(struct brw_vs_compile *c,
- const struct ureg_instruction *inst,
- GLuint argIndex)
+ GLuint argIndex,
+ GLuint index,
+ GLboolean relAddr)
{
- const struct ureg_src src = inst->src[argIndex];
struct brw_compile *p = &c->func;
struct brw_reg const_reg;
struct brw_reg const2_reg;
- const GLboolean relAddr = src.Indirect;
assert(argIndex < 3);
- if (c->current_const[argIndex].index != src.Index || relAddr) {
+ if (c->current_const[argIndex].index != index || relAddr) {
struct brw_reg addrReg = c->regs[TGSI_FILE_ADDRESS][0];
- c->current_const[argIndex].index = src.Index;
+ c->current_const[argIndex].index = index;
#if 0
printf(" fetch const[%d] for arg %d into reg %d\n",
0, /* oword */
relAddr, /* relative indexing? */
addrReg, /* address register */
- 16 * src.Index, /* byte offset */
+ 16 * index, /* byte offset */
SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
);
1, /* oword */
relAddr, /* relative indexing? */
addrReg, /* address register */
- 16 * src.Index, /* byte offset */
+ 16 * index, /* byte offset */
SURF_INDEX_VERT_CONST_BUFFER
);
}
*/
static struct brw_reg
get_src_reg( struct brw_vs_compile *c,
- const struct ureg_instruction *inst,
- GLuint argIndex )
+ GLuint argIndex,
+ GLuint file,
+ GLint index,
+ GLboolean relAddr )
{
- const GLuint file = inst->src[argIndex].File;
- const GLint index = inst->src[argIndex].Index;
- const GLboolean relAddr = inst->src[argIndex].Indirect;
switch (file) {
case TGSI_FILE_TEMPORARY:
return c->regs[file][index];
}
+ case TGSI_FILE_IMMEDIATE:
+ return c->regs[file][index];
+
case TGSI_FILE_CONSTANT:
if (c->vp->use_const_buffer) {
- return get_constant(c, inst, argIndex);
+ return get_constant(c, argIndex, index, relAddr);
}
else if (relAddr) {
return deref(c, c->regs[TGSI_FILE_CONSTANT][0], index);
* Return the brw reg for the given instruction's src argument.
*/
static struct brw_reg get_arg( struct brw_vs_compile *c,
- const struct ureg_instruction *inst,
+ const struct tgsi_full_src_register *src,
GLuint argIndex )
{
- const struct ureg_src src = inst->src[argIndex];
struct brw_reg reg;
- if (src.File == TGSI_FILE_NULL)
+ if (src->SrcRegister.File == TGSI_FILE_NULL)
return brw_null_reg();
- reg = get_src_reg(c, inst, argIndex);
+ reg = get_src_reg(c, argIndex,
+ src->SrcRegister.File,
+ src->SrcRegister.Index,
+ src->SrcRegister.Indirect);
/* Convert 3-bit swizzle to 2-bit.
*/
- reg.dw1.bits.swizzle = BRW_SWIZZLE4(src.SwizzleX,
- src.SwizzleY,
- src.SwizzleZ,
- src.SwizzleW);
+ reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SrcRegister.SwizzleX,
+ src->SrcRegister.SwizzleY,
+ src->SrcRegister.SwizzleZ,
+ src->SrcRegister.SwizzleW);
- /* Note this is ok for non-swizzle instructions:
+ reg.negate = src->SrcRegister.Negate ? 1 : 0;
+
+ /* XXX: abs, absneg
*/
- reg.negate = src.Negate ? 1 : 0;
return reg;
}
* Get brw register for the given program dest register.
*/
static struct brw_reg get_dst( struct brw_vs_compile *c,
- struct ureg_dst dst )
+ unsigned file,
+ unsigned index,
+ unsigned writemask )
{
struct brw_reg reg;
- switch (dst.File) {
+ switch (file) {
case TGSI_FILE_TEMPORARY:
case TGSI_FILE_OUTPUT:
- assert(c->regs[dst.File][dst.Index].nr != 0);
- reg = c->regs[dst.File][dst.Index];
+ assert(c->regs[file][index].nr != 0);
+ reg = c->regs[file][index];
break;
case TGSI_FILE_ADDRESS:
- assert(dst.Index == 0);
- reg = c->regs[dst.File][dst.Index];
+ assert(index == 0);
+ reg = c->regs[file][index];
break;
case TGSI_FILE_NULL:
/* we may hit this for OPCODE_END, OPCODE_KIL, etc */
reg = brw_null_reg();
}
- reg.dw1.bits.writemask = dst.WriteMask;
+ reg.dw1.bits.writemask = writemask;
return reg;
}
struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS];
struct brw_reg ndc;
int eot;
+ int i;
GLuint len_vertext_header = 2;
if (c->key.copy_edgeflag) {
- assert(0);
brw_MOV(p,
- get_reg(c, TGSI_FILE_OUTPUT, 0),
- get_reg(c, TGSI_FILE_INPUT, 0));
+ get_reg(c, TGSI_FILE_OUTPUT, c->prog_data.output_edgeflag),
+ brw_imm_f(1));
}
/* Build ndc coords */
len_vertext_header = 2;
}
- eot = (c->first_overflow_output == 0);
+ eot = (c->overflow_count == 0);
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
0, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
- if (c->first_overflow_output > 0) {
- /* Not all of the vertex outputs/results fit into the MRF.
- * Move the overflowed attributes from the GRF to the MRF and
- * issue another brw_urb_WRITE().
- */
+ /* Not all of the vertex outputs/results fit into the MRF.
+ * Move the overflowed attributes from the GRF to the MRF and
+ * issue another brw_urb_WRITE().
+ */
+ for (i = 0; i < c->overflow_count; i += BRW_MAX_MRF) {
+ unsigned nr = MIN2(c->overflow_count - i, BRW_MAX_MRF);
+ GLuint j;
+
+ eot = (i + nr >= c->overflow_count);
+
/* XXX I'm not 100% sure about which MRF regs to use here. Starting
* at mrf[4] atm...
*/
- GLuint i, mrf = 0;
- for (i = c->first_overflow_output; i < c->prog_data.nr_outputs; i++) {
- /* move from GRF to MRF */
- brw_MOV(p, brw_message_reg(4+mrf), c->regs[TGSI_FILE_OUTPUT][i]);
- mrf++;
+ for (j = 0; j < nr; j++) {
+ brw_MOV(p, brw_message_reg(4+j),
+ brw_vec8_grf(c->overflow_grf_start + i + j, 0));
}
brw_urb_WRITE(p,
c->r0, /* src */
0, /* allocate */
1, /* used */
- mrf+1, /* msg len */
+ nr+1, /* msg len */
0, /* response len */
- 1, /* eot */
- 1, /* writes complete */
- BRW_MAX_MRF-1, /* urb destination offset */
+ eot, /* eot */
+ eot, /* writes complete */
+ i-1, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
}
}
}
static uint32_t
-get_predicate(const struct ureg_instruction *inst)
+get_predicate(const struct tgsi_full_instruction *inst)
{
/* XXX: disabling for now
*/
}
static void emit_insn(struct brw_vs_compile *c,
- const struct ureg_instruction *inst)
+ const struct tgsi_full_instruction *inst)
{
+ unsigned opcode = inst->Instruction.Opcode;
+ unsigned label = inst->InstructionExtLabel.Label;
struct brw_compile *p = &c->func;
struct brw_reg args[3], dst;
GLuint i;
/* Get argument regs.
*/
for (i = 0; i < 3; i++) {
- args[i] = get_arg(c, inst, i);
+ args[i] = get_arg(c, &inst->FullSrcRegisters[i], i);
}
/* Get dest regs. Note that it is possible for a reg to be both
* dst and arg, given the static allocation of registers. So
* care needs to be taken emitting multi-operation instructions.
*/
- dst = get_dst(c, inst->dst);
+ dst = get_dst(c,
+ inst->FullDstRegisters[0].DstRegister.File,
+ inst->FullDstRegisters[0].DstRegister.Index,
+ inst->FullDstRegisters[0].DstRegister.WriteMask);
- if (inst->dst.Saturate) {
+ /* XXX: saturate
+ */
+ if (inst->Instruction.Saturate != TGSI_SAT_NONE) {
debug_printf("Unsupported saturate in vertex shader");
}
- switch (inst->opcode) {
+ switch (opcode) {
case TGSI_OPCODE_ABS:
brw_MOV(p, dst, brw_abs(args[0]));
break;
emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
break;
case TGSI_OPCODE_RSQ:
- emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst,
+ brw_swizzle(args[0], 0,0,0,0), BRW_MATH_PRECISION_FULL);
break;
case TGSI_OPCODE_SEQ:
emit_seq(p, dst, args[0], args[1]);
brw_set_access_mode(p, BRW_ALIGN_16);
brw_ADD(p, get_addr_reg(c->stack_index),
get_addr_reg(c->stack_index), brw_imm_d(4));
- brw_save_call(p, inst->label, p->nr_insn);
+ brw_save_call(p, label, p->nr_insn);
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
break;
case TGSI_OPCODE_RET:
break;
default:
debug_printf("Unsupported opcode %i (%s) in vertex shader",
- inst->opcode,
- tgsi_get_opcode_name(inst->opcode));
+ opcode,
+ tgsi_get_opcode_name(opcode));
}
/* Set the predication update on the last instruction of the native
void brw_vs_emit(struct brw_vs_compile *c)
{
struct brw_compile *p = &c->func;
+ const struct tgsi_token *tokens = c->vp->tokens;
struct brw_instruction *end_inst, *last_inst;
- struct ureg_parse_context parse;
- struct ureg_declaration *decl;
- struct ureg_declaration *imm;
- struct ureg_declaration *insn;
+ struct tgsi_parse_context parse;
+ struct tgsi_full_instruction *inst;
- if (BRW_DEBUG & DEBUG_VS)
+// if (BRW_DEBUG & DEBUG_VS)
tgsi_dump(c->vp->tokens, 0);
c->stack_index = brw_indirect(0, 0);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_access_mode(p, BRW_ALIGN_16);
+
/* Static register allocation
*/
brw_vs_alloc_regs(c);
- brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack));
- while (ureg_next_decl(&parse, &decl)) {
+ if (c->vp->has_flow_control) {
+ brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack));
}
- while (ureg_next_immediate(&parse, &imm)) {
- }
-
- while (ureg_next_instruction(&parse, &insn)) {
+ /* Instructions
+ */
+ tgsi_parse_init( &parse, tokens );
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ inst = &parse.FullToken.FullInstruction;
+ emit_insn( c, inst );
+ break;
+
+ default:
+ assert( 0 );
+ }
}
+ tgsi_parse_free( &parse );
- end_inst = &p->store[end_offset];
+ end_inst = &p->store[c->end_offset];
last_inst = &p->store[p->nr_insn];
/* The END instruction will be patched to jump to this code */
post_vs_emit(c, end_inst, last_inst);
if (BRW_DEBUG & DEBUG_VS) {
- int i;
-
debug_printf("vs-native:\n");
- for (i = 0; i < p->nr_insn; i++)
- brw_disasm(stderr, &p->store[i]);
- debug_printf("\n");
+ brw_disasm(stderr, p->store, p->nr_insn);
}
}