(1 << PROGRAM_CONSTANT) | \
(1 << PROGRAM_UNIFORM))
-/**
- * Maximum number of temporary registers.
- *
- * It is too big for stack allocated arrays -- it will cause stack overflow on
- * Windows and likely Mac OS X.
- */
-#define MAX_TEMPS 4096
-
/**
* Maximum number of arrays
*/
#define MAX_ARRAYS 256
-/* will be 4 for GLSL 4.00 */
-#define MAX_GLSL_TEXTURE_OFFSET 1
+#define MAX_GLSL_TEXTURE_OFFSET 4
class st_src_reg;
class st_dst_reg;
unsigned op;
st_dst_reg dst;
- st_src_reg src[3];
+ st_src_reg src[4];
/** Pointer to the ir source this tree came from for debugging */
ir_instruction *ir;
GLboolean cond_update;
bool saturate;
- int sampler; /**< sampler index */
+ st_src_reg sampler; /**< sampler register */
+ int sampler_array_size; /**< 1-based size of sampler array, 1 if not array */
int tex_target; /**< One of TEXTURE_*_INDEX */
GLboolean tex_shadow;
- struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
+
+ st_src_reg tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
unsigned tex_offset_num_offset;
int dead_mask; /**< Used in dead code elimination */
struct gl_context *ctx;
struct gl_program *prog;
struct gl_shader_program *shader_program;
+ struct gl_shader *shader;
struct gl_shader_compiler_options *options;
int next_temp;
glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
st_dst_reg dst,
st_src_reg src0, st_src_reg src1, st_src_reg src2);
-
+
+ glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
+ st_dst_reg dst,
+ st_src_reg src0, st_src_reg src1,
+ st_src_reg src2, st_src_reg src3);
+
unsigned get_opcode(ir_instruction *ir, unsigned op,
st_dst_reg dst,
st_src_reg src0, st_src_reg src1);
int mul_operand);
bool try_emit_mad_for_and_not(ir_expression *ir,
int mul_operand);
- bool try_emit_sat(ir_expression *ir);
void emit_swz(ir_expression *ir);
int get_last_temp_write(int index);
void copy_propagate(void);
- void eliminate_dead_code(void);
- int eliminate_dead_code_advanced(void);
+ int eliminate_dead_code(void);
void merge_registers(void);
void renumber_registers(void);
static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0);
static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1);
+static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2);
static void
fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
glsl_to_tgsi_instruction *
glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
- st_dst_reg dst,
- st_src_reg src0, st_src_reg src1, st_src_reg src2)
+ st_dst_reg dst,
+ st_src_reg src0, st_src_reg src1,
+ st_src_reg src2, st_src_reg src3)
{
glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
int num_reladdr = 0, i;
num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL;
num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL;
num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL;
+ num_reladdr += src3.reladdr != NULL || src3.reladdr2 != NULL;
+ reladdr_to_temp(ir, &src3, &num_reladdr);
reladdr_to_temp(ir, &src2, &num_reladdr);
reladdr_to_temp(ir, &src1, &num_reladdr);
reladdr_to_temp(ir, &src0, &num_reladdr);
inst->src[0] = src0;
inst->src[1] = src1;
inst->src[2] = src2;
+ inst->src[3] = src3;
inst->ir = ir;
inst->dead_mask = 0;
}
}
else {
- for (i=0; i<3; i++) {
+ for (i=0; i<4; i++) {
if(inst->src[i].reladdr) {
switch(inst->src[i].file) {
case PROGRAM_STATE_VAR:
return inst;
}
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
+ st_dst_reg dst, st_src_reg src0,
+ st_src_reg src1, st_src_reg src2)
+{
+ return emit(ir, op, dst, src0, src1, src2, undef_src);
+}
glsl_to_tgsi_instruction *
glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
st_dst_reg dst, st_src_reg src0, st_src_reg src1)
{
- return emit(ir, op, dst, src0, src1, undef_src);
+ return emit(ir, op, dst, src0, src1, undef_src, undef_src);
}
glsl_to_tgsi_instruction *
st_dst_reg dst, st_src_reg src0)
{
assert(dst.writemask != 0);
- return emit(ir, op, dst, src0, undef_src, undef_src);
+ return emit(ir, op, dst, src0, undef_src, undef_src, undef_src);
}
glsl_to_tgsi_instruction *
glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
{
- return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
+ return emit(ir, op, undef_dst, undef_src, undef_src, undef_src, undef_src);
}
/**
st_src_reg src0, st_src_reg src1)
{
int type = GLSL_TYPE_FLOAT;
-
+
+ if (op == TGSI_OPCODE_MOV)
+ return op;
+
assert(src0.type != GLSL_TYPE_ARRAY);
assert(src0.type != GLSL_TYPE_STRUCT);
assert(src1.type != GLSL_TYPE_ARRAY);
case2fi(SSG, ISSG);
case3(ABS, IABS, IABS);
-
+
+ case2iu(IBFE, UBFE);
+ case2iu(IMSB, UMSB);
+ case2iu(IMUL_HI, UMUL_HI);
default: break;
}
/* Search immediate storage to see if we already have an identical
* immediate that we can use instead of adding a duplicate entry.
*/
- foreach_list(node, &this->immediates) {
- entry = (immediate_storage *) node;
-
+ foreach_in_list(immediate_storage, entry, &this->immediates) {
if (entry->size == size &&
entry->type == datatype &&
!memcmp(entry->values, values, size * sizeof(gl_constant_value))) {
}
return size;
case GLSL_TYPE_SAMPLER:
+ case GLSL_TYPE_IMAGE:
/* Samplers take up one slot in UNIFORMS[], but they're baked in
* at link time.
*/
glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
{
- variable_storage *entry;
-
- foreach_list(node, &this->variables) {
- entry = (variable_storage *) node;
-
+ foreach_in_list(variable_storage, entry, &this->variables) {
if (entry->var == var)
return entry;
}
if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
unsigned int i;
- const ir_state_slot *const slots = ir->state_slots;
- assert(ir->state_slots != NULL);
+ const ir_state_slot *const slots = ir->get_state_slots();
+ assert(slots != NULL);
/* Check if this statevar's setup in the STATE file exactly
* matches how we'll want to reference it as a
* temporary storage and hope that it'll get copy-propagated
* out.
*/
- for (i = 0; i < ir->num_state_slots; i++) {
+ for (i = 0; i < ir->get_num_state_slots(); i++) {
if (slots[i].swizzle != SWIZZLE_XYZW) {
break;
}
variable_storage *storage;
st_dst_reg dst;
- if (i == ir->num_state_slots) {
+ if (i == ir->get_num_state_slots()) {
/* We'll set the index later. */
storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
this->variables.push_tail(storage);
* of the type. However, this had better match the number of state
* elements that we're going to copy into the new temporary.
*/
- assert((int) ir->num_state_slots == type_size(ir->type));
+ assert((int) ir->get_num_state_slots() == type_size(ir->type));
dst = st_dst_reg(get_temp(ir->type));
}
- for (unsigned int i = 0; i < ir->num_state_slots; i++) {
+ for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
int index = _mesa_add_state_reference(this->prog->Parameters,
(gl_state_index *)slots[i].tokens);
}
if (storage->file == PROGRAM_TEMPORARY &&
- dst.index != storage->index + (int) ir->num_state_slots) {
+ dst.index != storage->index + (int) ir->get_num_state_slots()) {
fail_link(this->shader_program,
"failed to load builtin uniform `%s' (%d/%d regs loaded)\n",
ir->name, dst.index - storage->index,
const ir_function_signature *sig;
exec_list empty;
- sig = ir->matching_signature(NULL, &empty);
+ sig = ir->matching_signature(NULL, &empty, false);
assert(sig);
- foreach_list(node, &sig->body) {
- ir_instruction *ir = (ir_instruction *) node;
-
+ foreach_in_list(ir_instruction, ir, &sig->body) {
ir->accept(this);
}
}
return true;
}
-bool
-glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
-{
- /* Emit saturates in the vertex shader only if SM 3.0 is supported.
- */
- if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
- !st_context(this->ctx)->has_shader_model3) {
- return false;
- }
-
- ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
- if (!sat_src)
- return false;
-
- sat_src->accept(this);
- st_src_reg src = this->result;
-
- /* If we generated an expression instruction into a temporary in
- * processing the saturate's operand, apply the saturate to that
- * instruction. Otherwise, generate a MOV to do the saturate.
- *
- * Note that we have to be careful to only do this optimization if
- * the instruction in question was what generated src->result. For
- * example, ir_dereference_array might generate a MUL instruction
- * to create the reladdr, and return us a src reg using that
- * reladdr. That MUL result is not the value we're trying to
- * saturate.
- */
- ir_expression *sat_src_expr = sat_src->as_expression();
- if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
- sat_src_expr->operation == ir_binop_add ||
- sat_src_expr->operation == ir_binop_dot)) {
- glsl_to_tgsi_instruction *new_inst;
- new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
- new_inst->saturate = true;
- } else {
- this->result = get_temp(ir->type);
- st_dst_reg result_dst = st_dst_reg(this->result);
- result_dst.writemask = (1 << ir->type->vector_elements) - 1;
- glsl_to_tgsi_instruction *inst;
- inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
- inst->saturate = true;
- }
-
- return true;
-}
-
void
glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
st_src_reg *reg, int *num_reladdr)
/* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
*/
- if (ir->operation == ir_binop_logic_and) {
+ if (!native_integers && ir->operation == ir_binop_logic_and) {
if (try_emit_mad_for_and_not(ir, 1))
return;
if (try_emit_mad_for_and_not(ir, 0))
return;
}
- if (try_emit_sat(ir))
- return;
-
if (ir->operation == ir_quadop_vector)
assert(!"ir_quadop_vector should have been lowered");
case ir_unop_cos_reduced:
emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
break;
+ case ir_unop_saturate: {
+ glsl_to_tgsi_instruction *inst;
+ inst = emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
+ inst->saturate = true;
+ break;
+ }
case ir_unop_dFdx:
+ case ir_unop_dFdx_coarse:
emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
break;
+ case ir_unop_dFdx_fine:
+ emit(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]);
+ break;
case ir_unop_dFdy:
+ case ir_unop_dFdy_coarse:
+ case ir_unop_dFdy_fine:
{
/* The X component contains 1 or -1 depending on whether the framebuffer
* is a FBO or the window system buffer, respectively.
st_src_reg temp = get_temp(glsl_type::vec4_type);
emit(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]);
- emit(ir, TGSI_OPCODE_DDY, result_dst, temp);
+ emit(ir, ir->operation == ir_unop_dFdy_fine ?
+ TGSI_OPCODE_DDY_FINE : TGSI_OPCODE_DDY, result_dst, temp);
break;
}
case ir_unop_any: {
assert(ir->operands[0]->type->is_vector());
- /* After the dot-product, the value will be an integer on the
- * range [0,4]. Zero stays zero, and positive values become 1.0.
- */
- glsl_to_tgsi_instruction *const dp =
- emit_dp(ir, result_dst, op[0], op[0],
- ir->operands[0]->type->vector_elements);
- if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
- result_dst.type == GLSL_TYPE_FLOAT) {
- /* The clamping to [0,1] can be done for free in the fragment
- * shader with a saturate.
- */
- dp->saturate = true;
- } else if (result_dst.type == GLSL_TYPE_FLOAT) {
- /* Negating the result of the dot-product gives values on the range
- * [-4, 0]. Zero stays zero, and negative values become 1.0. This
- * is achieved using SLT.
- */
- st_src_reg slt_src = result_src;
- slt_src.negate = ~slt_src.negate;
- emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
- }
- else {
- /* Use SNE 0 if integers are being used as boolean values. */
- emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+ if (native_integers) {
+ int dst_swizzle = 0, op0_swizzle, i;
+ st_src_reg accum = op[0];
+
+ op0_swizzle = op[0].swizzle;
+ accum.swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 0),
+ GET_SWZ(op0_swizzle, 0),
+ GET_SWZ(op0_swizzle, 0),
+ GET_SWZ(op0_swizzle, 0));
+ for (i = 0; i < 4; i++) {
+ if (result_dst.writemask & (1 << i)) {
+ dst_swizzle = MAKE_SWIZZLE4(i, i, i, i);
+ break;
+ }
+ }
+ assert(i != 4);
+ assert(ir->operands[0]->type->is_boolean());
+
+ /* OR all the components together, since they should be either 0 or ~0
+ */
+ switch (ir->operands[0]->type->vector_elements) {
+ case 4:
+ op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 3),
+ GET_SWZ(op0_swizzle, 3),
+ GET_SWZ(op0_swizzle, 3),
+ GET_SWZ(op0_swizzle, 3));
+ emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
+ accum = st_src_reg(result_dst);
+ accum.swizzle = dst_swizzle;
+ /* fallthrough */
+ case 3:
+ op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 2),
+ GET_SWZ(op0_swizzle, 2),
+ GET_SWZ(op0_swizzle, 2),
+ GET_SWZ(op0_swizzle, 2));
+ emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
+ accum = st_src_reg(result_dst);
+ accum.swizzle = dst_swizzle;
+ /* fallthrough */
+ case 2:
+ op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 1),
+ GET_SWZ(op0_swizzle, 1),
+ GET_SWZ(op0_swizzle, 1),
+ GET_SWZ(op0_swizzle, 1));
+ emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
+ break;
+ default:
+ assert(!"Unexpected vector size");
+ break;
+ }
+ } else {
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero stays zero, and positive values become 1.0.
+ */
+ glsl_to_tgsi_instruction *const dp =
+ emit_dp(ir, result_dst, op[0], op[0],
+ ir->operands[0]->type->vector_elements);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+ result_dst.type == GLSL_TYPE_FLOAT) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate.
+ */
+ dp->saturate = true;
+ } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero stays zero, and negative values become 1.0. This
+ * is achieved using SLT.
+ */
+ st_src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+ }
+ else {
+ /* Use SNE 0 if integers are being used as boolean values. */
+ emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+ }
}
break;
}
break;
case ir_binop_ubo_load: {
- ir_constant *uniform_block = ir->operands[0]->as_constant();
+ ir_constant *const_uniform_block = ir->operands[0]->as_constant();
ir_constant *const_offset_ir = ir->operands[1]->as_constant();
unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
+ unsigned const_block = const_uniform_block ? const_uniform_block->value.u[0] + 1 : 0;
st_src_reg index_reg = get_temp(glsl_type::uint_type);
st_src_reg cbuf;
cbuf.type = glsl_type::vec4_type->base_type;
cbuf.file = PROGRAM_CONSTANT;
cbuf.index = 0;
- cbuf.index2D = uniform_block->value.u[0] + 1;
cbuf.reladdr = NULL;
cbuf.negate = 0;
assert(ir->type->is_vector() || ir->type->is_scalar());
if (const_offset_ir) {
- index_reg = st_src_reg_for_int(const_offset / 16);
- } else {
- emit(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1], st_src_reg_for_int(4));
+ /* Constant index into constant buffer */
+ cbuf.reladdr = NULL;
+ cbuf.index = const_offset / 16;
+ }
+ else {
+ /* Relative/variable index into constant buffer */
+ emit(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1],
+ st_src_reg_for_int(4));
+ cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
+ memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
+ }
+
+ if (const_uniform_block) {
+ /* Constant constant buffer */
+ cbuf.reladdr2 = NULL;
+ cbuf.index2D = const_block;
+ cbuf.has_index2 = true;
+ }
+ else {
+ /* Relative/variable constant buffer */
+ cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg);
+ cbuf.index2D = 1;
+ memcpy(cbuf.reladdr2, &op[0], sizeof(st_src_reg));
+ cbuf.has_index2 = true;
}
cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
const_offset % 16 / 4,
const_offset % 16 / 4);
- cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
- memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
-
if (ir->type->base_type == GLSL_TYPE_BOOL) {
emit(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0));
} else {
emit(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]);
}
break;
+ case ir_triop_bitfield_extract:
+ emit(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]);
+ break;
+ case ir_quadop_bitfield_insert:
+ emit(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]);
+ break;
+ case ir_unop_bitfield_reverse:
+ emit(ir, TGSI_OPCODE_BREV, result_dst, op[0]);
+ break;
+ case ir_unop_bit_count:
+ emit(ir, TGSI_OPCODE_POPC, result_dst, op[0]);
+ break;
+ case ir_unop_find_msb:
+ emit(ir, TGSI_OPCODE_IMSB, result_dst, op[0]);
+ break;
+ case ir_unop_find_lsb:
+ emit(ir, TGSI_OPCODE_LSB, result_dst, op[0]);
+ break;
+ case ir_binop_imul_high:
+ emit(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]);
+ break;
+ case ir_triop_fma:
+ /* NOTE: Perhaps there should be a special opcode that enforces fused
+ * mul-add. Just use MAD for now.
+ */
+ emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
+ break;
+ case ir_unop_interpolate_at_centroid:
+ emit(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]);
+ break;
+ case ir_binop_interpolate_at_offset:
+ emit(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_interpolate_at_sample:
+ emit(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]);
+ break;
case ir_unop_pack_snorm_2x16:
case ir_unop_pack_unorm_2x16:
case ir_unop_pack_half_2x16:
case ir_unop_unpack_snorm_4x8:
case ir_unop_unpack_unorm_4x8:
case ir_binop_pack_half_2x16_split:
- case ir_unop_bitfield_reverse:
- case ir_unop_bit_count:
- case ir_unop_find_msb:
- case ir_unop_find_lsb:
case ir_binop_bfm:
- case ir_triop_fma:
case ir_triop_bfi:
- case ir_triop_bitfield_extract:
- case ir_quadop_bitfield_insert:
case ir_quadop_vector:
case ir_binop_vector_extract:
case ir_triop_vector_insert:
case ir_binop_ldexp:
case ir_binop_carry:
case ir_binop_borrow:
- case ir_binop_imul_high:
/* This operation is not supported, or should have already been handled.
*/
assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
st_src_reg temp_base = get_temp(ir->type);
st_dst_reg temp = st_dst_reg(temp_base);
- foreach_list(node, &ir->components) {
- ir_constant *field_value = (ir_constant *) node;
+ foreach_in_list(ir_constant, field_value, &ir->components) {
int size = type_size(field_value->type);
assert(size > 0);
case GLSL_TYPE_BOOL:
gl_type = native_integers ? GL_BOOL : GL_FLOAT;
for (i = 0; i < ir->type->vector_elements; i++) {
- if (native_integers)
- values[i].u = ir->value.b[i] ? ~0 : 0;
- else
- values[i].f = ir->value.b[i];
+ values[i].u = ir->value.b[i] ? ctx->Const.UniformBooleanTrue : 0;
}
break;
default:
function_entry *
glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
{
- function_entry *entry;
-
- foreach_list(node, &this->function_signatures) {
- entry = (function_entry *) node;
-
+ foreach_in_list_use_after(function_entry, entry, &this->function_signatures) {
if (entry->sig == sig)
return entry;
}
entry->bgn_inst = NULL;
/* Allocate storage for all the parameters. */
- foreach_list(node, &sig->parameters) {
- ir_variable *param = (ir_variable *) node;
+ foreach_in_list(ir_variable, param, &sig->parameters) {
variable_storage *storage;
storage = find_variable_storage(param);
int i;
/* Process in parameters. */
- exec_list_iterator sig_iter = sig->parameters.iterator();
- foreach_iter(exec_list_iterator, iter, *ir) {
- ir_rvalue *param_rval = (ir_rvalue *)iter.get();
- ir_variable *param = (ir_variable *)sig_iter.get();
+ foreach_two_lists(formal_node, &sig->parameters,
+ actual_node, &ir->actual_parameters) {
+ ir_rvalue *param_rval = (ir_rvalue *) actual_node;
+ ir_variable *param = (ir_variable *) formal_node;
if (param->data.mode == ir_var_function_in ||
param->data.mode == ir_var_function_inout) {
r.index++;
}
}
-
- sig_iter.next();
}
- assert(!sig_iter.has_next());
/* Emit call instruction */
call_inst = emit(ir, TGSI_OPCODE_CAL);
call_inst->function = entry;
/* Process out parameters. */
- sig_iter = sig->parameters.iterator();
- foreach_iter(exec_list_iterator, iter, *ir) {
- ir_rvalue *param_rval = (ir_rvalue *)iter.get();
- ir_variable *param = (ir_variable *)sig_iter.get();
+ foreach_two_lists(formal_node, &sig->parameters,
+ actual_node, &ir->actual_parameters) {
+ ir_rvalue *param_rval = (ir_rvalue *) actual_node;
+ ir_variable *param = (ir_variable *) formal_node;
if (param->data.mode == ir_var_function_out ||
param->data.mode == ir_var_function_inout) {
r.index++;
}
}
-
- sig_iter.next();
}
- assert(!sig_iter.has_next());
/* Process return value. */
this->result = entry->return_reg;
void
glsl_to_tgsi_visitor::visit(ir_texture *ir)
{
- st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy, offset, sample_index;
+ st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy;
+ st_src_reg offset[MAX_GLSL_TEXTURE_OFFSET], sample_index, component;
+ st_src_reg levels_src;
st_dst_reg result_dst, coord_dst, cube_sc_dst;
glsl_to_tgsi_instruction *inst = NULL;
unsigned opcode = TGSI_OPCODE_NOP;
const glsl_type *sampler_type = ir->sampler->type;
+ ir_rvalue *sampler_index =
+ _mesa_get_sampler_array_nonconst_index(ir->sampler);
bool is_cube_array = false;
+ unsigned i;
/* if we are a cube array sampler */
if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX;
if (ir->offset) {
ir->offset->accept(this);
- offset = this->result;
+ offset[0] = this->result;
}
break;
case ir_txb:
- opcode = is_cube_array ? TGSI_OPCODE_TXB2 : TGSI_OPCODE_TXB;
+ if (is_cube_array ||
+ sampler_type == glsl_type::samplerCubeShadow_type) {
+ opcode = TGSI_OPCODE_TXB2;
+ }
+ else {
+ opcode = TGSI_OPCODE_TXB;
+ }
ir->lod_info.bias->accept(this);
lod_info = this->result;
if (ir->offset) {
ir->offset->accept(this);
- offset = this->result;
+ offset[0] = this->result;
}
break;
case ir_txl:
lod_info = this->result;
if (ir->offset) {
ir->offset->accept(this);
- offset = this->result;
+ offset[0] = this->result;
}
break;
case ir_txd:
dy = this->result;
if (ir->offset) {
ir->offset->accept(this);
- offset = this->result;
+ offset[0] = this->result;
}
break;
case ir_txs:
ir->lod_info.lod->accept(this);
lod_info = this->result;
break;
+ case ir_query_levels:
+ opcode = TGSI_OPCODE_TXQ;
+ lod_info = st_src_reg(PROGRAM_IMMEDIATE, 0, GLSL_TYPE_INT);
+ levels_src = get_temp(ir->type);
+ break;
case ir_txf:
opcode = TGSI_OPCODE_TXF;
ir->lod_info.lod->accept(this);
lod_info = this->result;
if (ir->offset) {
ir->offset->accept(this);
- offset = this->result;
+ offset[0] = this->result;
}
break;
case ir_txf_ms:
ir->lod_info.sample_index->accept(this);
sample_index = this->result;
break;
- case ir_lod:
- assert(!"Unexpected ir_lod opcode");
- break;
case ir_tg4:
- assert(!"Unexpected ir_tg4 opcode");
+ opcode = TGSI_OPCODE_TG4;
+ ir->lod_info.component->accept(this);
+ component = this->result;
+ if (ir->offset) {
+ ir->offset->accept(this);
+ if (ir->offset->type->base_type == GLSL_TYPE_ARRAY) {
+ const glsl_type *elt_type = ir->offset->type->fields.array;
+ for (i = 0; i < ir->offset->type->length; i++) {
+ offset[i] = this->result;
+ offset[i].index += i * type_size(elt_type);
+ offset[i].type = elt_type->base_type;
+ offset[i].swizzle = swizzle_for_size(elt_type->vector_elements);
+ }
+ } else {
+ offset[0] = this->result;
+ }
+ }
break;
- case ir_query_levels:
- assert(!"Unexpected ir_query_levels opcode");
+ case ir_lod:
+ opcode = TGSI_OPCODE_LODQ;
break;
}
coord_dst.writemask = WRITEMASK_XYZW;
}
+ if (sampler_index) {
+ sampler_index->accept(this);
+ emit_arl(ir, sampler_reladdr, this->result);
+ }
+
if (opcode == TGSI_OPCODE_TXD)
inst = emit(ir, opcode, result_dst, coord, dx, dy);
- else if (opcode == TGSI_OPCODE_TXQ)
- inst = emit(ir, opcode, result_dst, lod_info);
- else if (opcode == TGSI_OPCODE_TXF) {
+ else if (opcode == TGSI_OPCODE_TXQ) {
+ if (ir->op == ir_query_levels) {
+ /* the level is stored in W */
+ inst = emit(ir, opcode, st_dst_reg(levels_src), lod_info);
+ result_dst.writemask = WRITEMASK_X;
+ levels_src.swizzle = SWIZZLE_WWWW;
+ emit(ir, TGSI_OPCODE_MOV, result_dst, levels_src);
+ } else
+ inst = emit(ir, opcode, result_dst, lod_info);
+ } else if (opcode == TGSI_OPCODE_TXF) {
inst = emit(ir, opcode, result_dst, coord);
} else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) {
inst = emit(ir, opcode, result_dst, coord, lod_info);
} else if (opcode == TGSI_OPCODE_TEX2) {
inst = emit(ir, opcode, result_dst, coord, cube_sc);
- } else
+ } else if (opcode == TGSI_OPCODE_TG4) {
+ if (is_cube_array && ir->shadow_comparitor) {
+ inst = emit(ir, opcode, result_dst, coord, cube_sc);
+ } else {
+ inst = emit(ir, opcode, result_dst, coord, component);
+ }
+ } else
inst = emit(ir, opcode, result_dst, coord);
if (ir->shadow_comparitor)
inst->tex_shadow = GL_TRUE;
- inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
- this->shader_program,
- this->prog);
+ inst->sampler.index = _mesa_get_sampler_uniform_value(ir->sampler,
+ this->shader_program,
+ this->prog);
+ if (sampler_index) {
+ inst->sampler.reladdr = ralloc(mem_ctx, st_src_reg);
+ memcpy(inst->sampler.reladdr, &sampler_reladdr, sizeof(sampler_reladdr));
+ inst->sampler_array_size =
+ ir->sampler->as_dereference_array()->array->type->array_size();
+ } else {
+ inst->sampler_array_size = 1;
+ }
if (ir->offset) {
- inst->tex_offset_num_offset = 1;
- inst->tex_offsets[0].Index = offset.index;
- inst->tex_offsets[0].File = offset.file;
- inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0);
- inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1);
- inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2);
+ for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && offset[i].file != PROGRAM_UNDEFINED; i++)
+ inst->tex_offsets[i] = offset[i];
+ inst->tex_offset_num_offset = i;
}
switch (sampler_type->sampler_dimensionality) {
{
if (ir->condition) {
ir->condition->accept(this);
- this->result.negate = ~this->result.negate;
- emit(ir, TGSI_OPCODE_KILL_IF, undef_dst, this->result);
+ st_src_reg condition = this->result;
+
+ /* Convert the bool condition to a float so we can negate. */
+ if (native_integers) {
+ st_src_reg temp = get_temp(ir->condition->type);
+ emit(ir, TGSI_OPCODE_AND, st_dst_reg(temp),
+ condition, st_src_reg_for_float(1.0));
+ condition = temp;
+ }
+
+ condition.negate = ~condition.negate;
+ emit(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition);
} else {
/* unconditional kil */
emit(ir, TGSI_OPCODE_KILL);
glsl_to_tgsi_visitor::visit(ir_emit_vertex *ir)
{
assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
- emit(ir, TGSI_OPCODE_EMIT);
+
+ ir->stream->accept(this);
+ emit(ir, TGSI_OPCODE_EMIT, undef_dst, this->result);
}
void
glsl_to_tgsi_visitor::visit(ir_end_primitive *ir)
{
assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
- emit(ir, TGSI_OPCODE_ENDPRIM);
+
+ ir->stream->accept(this);
+ emit(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result);
}
glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
ctx = NULL;
prog = NULL;
shader_program = NULL;
+ shader = NULL;
options = NULL;
+ have_sqrt = false;
}
glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
{
v->samplers_used = 0;
- foreach_list(node, &v->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
-
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
if (is_tex_instruction(inst->op)) {
- v->samplers_used |= 1 << inst->sampler;
+ for (int i = 0; i < inst->sampler_array_size; i++) {
+ v->samplers_used |= 1 << (inst->sampler.index + i);
- if (inst->tex_shadow) {
- prog->ShadowSamplers |= 1 << inst->sampler;
+ if (inst->tex_shadow) {
+ prog->ShadowSamplers |= 1 << (inst->sampler.index + i);
+ }
}
}
}
_mesa_update_shader_textures_used(v->shader_program, prog);
}
-static void
-set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
- struct gl_shader_program *shader_program,
- const char *name, const glsl_type *type,
- ir_constant *val)
-{
- if (type->is_record()) {
- ir_constant *field_constant;
-
- field_constant = (ir_constant *)val->components.get_head();
-
- for (unsigned int i = 0; i < type->length; i++) {
- const glsl_type *field_type = type->fields.structure[i].type;
- const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
- type->fields.structure[i].name);
- set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
- field_type, field_constant);
- field_constant = (ir_constant *)field_constant->next;
- }
- return;
- }
-
- unsigned offset;
- unsigned index = _mesa_get_uniform_location(ctx, shader_program, name,
- &offset);
- if (offset == GL_INVALID_INDEX) {
- fail_link(shader_program,
- "Couldn't find uniform for initializer %s\n", name);
- return;
- }
- int loc = _mesa_uniform_merge_location_offset(shader_program, index, offset);
-
- for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
- ir_constant *element;
- const glsl_type *element_type;
- if (type->is_array()) {
- element = val->array_elements[i];
- element_type = type->fields.array;
- } else {
- element = val;
- element_type = type;
- }
-
- void *values;
-
- if (element_type->base_type == GLSL_TYPE_BOOL) {
- int *conv = ralloc_array(mem_ctx, int, element_type->components());
- for (unsigned int j = 0; j < element_type->components(); j++) {
- conv[j] = element->value.b[j];
- }
- values = (void *)conv;
- element_type = glsl_type::get_instance(GLSL_TYPE_INT,
- element_type->vector_elements,
- 1);
- } else {
- values = &element->value;
- }
-
- if (element_type->is_matrix()) {
- _mesa_uniform_matrix(ctx, shader_program,
- element_type->matrix_columns,
- element_type->vector_elements,
- loc, 1, GL_FALSE, (GLfloat *)values);
- } else {
- _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
- values, element_type->gl_type);
- }
-
- loc++;
- }
-}
-
/**
* Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
* are read from the given src in this instruction
void
glsl_to_tgsi_visitor::simplify_cmp(void)
{
- unsigned *tempWrites;
+ int tempWritesSize = 0;
+ unsigned *tempWrites = NULL;
unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
- tempWrites = new unsigned[MAX_TEMPS];
- if (!tempWrites) {
- return;
- }
- memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS);
memset(outputWrites, 0, sizeof(outputWrites));
- foreach_list(node, &this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
unsigned prevWriteMask = 0;
/* Give up if we encounter relative addressing or flow control. */
prevWriteMask = outputWrites[inst->dst.index];
outputWrites[inst->dst.index] |= inst->dst.writemask;
} else if (inst->dst.file == PROGRAM_TEMPORARY) {
- assert(inst->dst.index < MAX_TEMPS);
+ if (inst->dst.index >= tempWritesSize) {
+ const int inc = 4096;
+
+ tempWrites = (unsigned*)
+ realloc(tempWrites,
+ (tempWritesSize + inc) * sizeof(unsigned));
+ if (!tempWrites)
+ return;
+
+ memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned));
+ tempWritesSize += inc;
+ }
+
prevWriteMask = tempWrites[inst->dst.index];
tempWrites[inst->dst.index] |= inst->dst.writemask;
} else
}
}
- delete [] tempWrites;
+ free(tempWrites);
}
/* Replaces all references to a temporary register index with another index. */
void
glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
{
- foreach_list(node, &this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
unsigned j;
for (j=0; j < num_inst_src_regs(inst->op); j++) {
inst->src[j].index = new_index;
}
}
+
+ for (j=0; j < inst->tex_offset_num_offset; j++) {
+ if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
+ inst->tex_offsets[j].index == index) {
+ inst->tex_offsets[j].index = new_index;
+ }
+ }
if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
inst->dst.index = new_index;
int loop_start = -1; /* index of the first active BGNLOOP (if any) */
unsigned i = 0, j;
- foreach_list(node, &this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
-
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
for (j=0; j < num_inst_src_regs(inst->op); j++) {
if (inst->src[j].file == PROGRAM_TEMPORARY &&
inst->src[j].index == index) {
return (depth == 0) ? i : loop_start;
}
}
+ for (j=0; j < inst->tex_offset_num_offset; j++) {
+ if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
+ inst->tex_offsets[j].index == index) {
+ return (depth == 0) ? i : loop_start;
+ }
+ }
if (inst->op == TGSI_OPCODE_BGNLOOP) {
if(depth++ == 0)
int loop_start = -1; /* index of the first active BGNLOOP (if any) */
int i = 0;
- foreach_list(node, &this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
-
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
return (depth == 0) ? i : loop_start;
}
int last = -1; /* index of last instruction that reads the temporary */
unsigned i = 0, j;
- foreach_list(node, &this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
-
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
for (j=0; j < num_inst_src_regs(inst->op); j++) {
if (inst->src[j].file == PROGRAM_TEMPORARY &&
inst->src[j].index == index) {
last = (depth == 0) ? i : -2;
}
}
+ for (j=0; j < inst->tex_offset_num_offset; j++) {
+ if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
+ inst->tex_offsets[j].index == index)
+ last = (depth == 0) ? i : -2;
+ }
if (inst->op == TGSI_OPCODE_BGNLOOP)
depth++;
int last = -1; /* index of last instruction that writes to the temporary */
int i = 0;
- foreach_list(node, &this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
-
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
last = (depth == 0) ? i : -2;
int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
int level = 0;
- foreach_list(node, &this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
-
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
assert(inst->dst.file != PROGRAM_TEMPORARY
|| inst->dst.index < this->next_temp);
}
/*
- * Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
+ * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
+ * code elimination.
*
* The glsl_to_tgsi_visitor lazily produces code assuming that this pass
* will occur. As an example, a TXP production after copy propagation but
* and after this pass:
*
* 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
- *
- * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
- * FIXME: doesn't eliminate all dead code inside of loops; it steps around them
- */
-void
-glsl_to_tgsi_visitor::eliminate_dead_code(void)
-{
- int i;
-
- for (i=0; i < this->next_temp; i++) {
- int last_read = get_last_temp_read(i);
- int j = 0;
-
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
-
- if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
- j > last_read)
- {
- iter.remove();
- delete inst;
- }
-
- j++;
- }
- }
-}
-
-/*
- * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
- * code elimination. This is less primitive than eliminate_dead_code(), as it
- * is per-channel and can detect consecutive writes without a read between them
- * as dead code. However, there is some dead code that can be eliminated by
- * eliminate_dead_code() but not this function - for example, this function
- * cannot eliminate an instruction writing to a register that is never read and
- * is the only instruction writing to that register.
- *
- * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
- * will occur.
*/
int
-glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
+glsl_to_tgsi_visitor::eliminate_dead_code(void)
{
glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
glsl_to_tgsi_instruction *,
int level = 0;
int removed = 0;
- foreach_list(node, &this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
-
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
assert(inst->dst.file != PROGRAM_TEMPORARY
|| inst->dst.index < this->next_temp);
}
}
}
+ for (unsigned i = 0; i < inst->tex_offset_num_offset; i++) {
+ if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY && inst->tex_offsets[i].reladdr){
+ /* Any temporary might be read, so no dead code elimination
+ * across this instruction.
+ */
+ memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+ } else if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY) {
+ /* Clear where it's used as src. */
+ int src_chans = 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 0);
+ src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 1);
+ src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 2);
+ src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 3);
+
+ for (int c = 0; c < 4; c++) {
+ if (src_chans & (1 << c)) {
+ writes[4 * inst->tex_offsets[i].index + c] = NULL;
+ }
+ }
+ }
+ }
break;
}
/* Now actually remove the instructions that are completely dead and update
* the writemask of other instructions with dead channels.
*/
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
-
+ foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
if (!inst->dead_mask || !inst->dst.writemask)
continue;
else if ((inst->dst.writemask & ~inst->dead_mask) == 0) {
- iter.remove();
+ inst->remove();
delete inst;
removed++;
} else
v->ctx = original->ctx;
v->prog = prog;
v->shader_program = NULL;
+ v->shader = NULL;
v->glsl_version = original->glsl_version;
v->native_integers = original->native_integers;
v->options = original->options;
src0 = v->get_temp(glsl_type::vec4_type);
dst0 = st_dst_reg(src0);
inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
- inst->sampler = 0;
+ inst->sampler_array_size = 1;
inst->tex_target = TEXTURE_2D_INDEX;
prog->InputsRead |= VARYING_BIT_TEX0;
/* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
temp_dst.writemask = WRITEMASK_XY; /* write R,G */
inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
- inst->sampler = 1;
+ inst->sampler.index = 1;
+ inst->sampler_array_size = 1;
inst->tex_target = TEXTURE_2D_INDEX;
/* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
- inst->sampler = 1;
+ inst->sampler.index = 1;
+ inst->sampler_array_size = 1;
inst->tex_target = TEXTURE_2D_INDEX;
prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
/* Now copy the instructions from the original glsl_to_tgsi_visitor into the
* new visitor. */
- foreach_list(node, &original->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) {
glsl_to_tgsi_instruction *newinst;
st_src_reg src_regs[3];
v->ctx = original->ctx;
v->prog = prog;
v->shader_program = NULL;
+ v->shader = NULL;
v->glsl_version = original->glsl_version;
v->native_integers = original->native_integers;
v->options = original->options;
src0 = v->get_temp(glsl_type::vec4_type);
dst0 = st_dst_reg(src0);
inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
- inst->sampler = samplerIndex;
+ inst->sampler.index = samplerIndex;
+ inst->sampler_array_size = 1;
inst->tex_target = TEXTURE_2D_INDEX;
prog->InputsRead |= VARYING_BIT_TEX0;
/* Now copy the instructions from the original glsl_to_tgsi_visitor into the
* new visitor. */
- foreach_list(node, &original->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) {
glsl_to_tgsi_instruction *newinst;
st_src_reg src_regs[3];
struct st_translate {
struct ureg_program *ureg;
- struct ureg_dst temps[MAX_TEMPS];
+ unsigned temps_size;
+ struct ureg_dst *temps;
+
struct ureg_dst arrays[MAX_ARRAYS];
struct ureg_src *constants;
struct ureg_src *immediates;
struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
- struct ureg_dst address[2];
+ struct ureg_dst address[3];
struct ureg_src samplers[PIPE_MAX_SAMPLERS];
struct ureg_src systemValues[SYSTEM_VALUE_MAX];
-
+ struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
unsigned array_sizes[MAX_ARRAYS];
const GLuint *inputMapping;
};
/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
-static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
- TGSI_SEMANTIC_FACE,
+const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
+ /* Vertex shader
+ */
TGSI_SEMANTIC_VERTEXID,
- TGSI_SEMANTIC_INSTANCEID
+ TGSI_SEMANTIC_INSTANCEID,
+ 0,
+ 0,
+
+ /* Geometry shader
+ */
+ TGSI_SEMANTIC_INVOCATIONID,
+
+ /* Fragment shader
+ */
+ TGSI_SEMANTIC_FACE,
+ TGSI_SEMANTIC_SAMPLEID,
+ TGSI_SEMANTIC_SAMPLEPOS,
+ TGSI_SEMANTIC_SAMPLEMASK,
};
/**
return ureg_dst_undef();
case PROGRAM_TEMPORARY:
- assert(index >= 0);
- assert(index < (int) Elements(t->temps));
-
+ /* Allocate space for temporaries on demand. */
+ if (index >= t->temps_size) {
+ const int inc = 4096;
+
+ t->temps = (struct ureg_dst*)
+ realloc(t->temps,
+ (t->temps_size + inc) * sizeof(struct ureg_dst));
+ if (!t->temps)
+ return ureg_dst_undef();
+
+ memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst));
+ t->temps_size += inc;
+ }
+
if (ureg_dst_is_undef(t->temps[index]))
t->temps[index] = ureg_DECL_local_temporary(t->ureg);
case PROGRAM_ARRAY:
array = index >> 16;
- assert(array >= 0);
- assert(array < (int) Elements(t->arrays));
+ assert(array < Elements(t->arrays));
if (ureg_dst_is_undef(t->arrays[array]))
t->arrays[array] = ureg_DECL_array_temporary(
* Map a glsl_to_tgsi src register to a TGSI ureg_src register.
*/
static struct ureg_src
-src_register(struct st_translate *t,
- gl_register_file file,
- GLint index, GLint index2D)
+src_register(struct st_translate *t, const struct st_src_reg *reg)
{
- switch(file) {
+ switch(reg->file) {
case PROGRAM_UNDEFINED:
return ureg_src_undef();
case PROGRAM_TEMPORARY:
case PROGRAM_ARRAY:
- return ureg_src(dst_register(t, file, index));
+ return ureg_src(dst_register(t, reg->file, reg->index));
case PROGRAM_UNIFORM:
- assert(index >= 0);
- return t->constants[index];
+ assert(reg->index >= 0);
+ return t->constants[reg->index];
case PROGRAM_STATE_VAR:
case PROGRAM_CONSTANT: /* ie, immediate */
- if (index2D) {
- struct ureg_src src;
- src = ureg_src_register(TGSI_FILE_CONSTANT, 0);
- src.Dimension = 1;
- src.DimensionIndex = index2D;
- return src;
- } else if (index < 0)
+ if (reg->has_index2)
+ return ureg_src_register(TGSI_FILE_CONSTANT, reg->index);
+ else if (reg->index < 0)
return ureg_DECL_constant(t->ureg, 0);
else
- return t->constants[index];
+ return t->constants[reg->index];
case PROGRAM_IMMEDIATE:
- return t->immediates[index];
+ return t->immediates[reg->index];
case PROGRAM_INPUT:
- assert(t->inputMapping[index] < Elements(t->inputs));
- return t->inputs[t->inputMapping[index]];
+ assert(t->inputMapping[reg->index] < Elements(t->inputs));
+ return t->inputs[t->inputMapping[reg->index]];
case PROGRAM_OUTPUT:
- assert(t->outputMapping[index] < Elements(t->outputs));
- return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
+ assert(t->outputMapping[reg->index] < Elements(t->outputs));
+ return ureg_src(t->outputs[t->outputMapping[reg->index]]); /* not needed? */
case PROGRAM_ADDRESS:
- return ureg_src(t->address[index]);
+ return ureg_src(t->address[reg->index]);
case PROGRAM_SYSTEM_VALUE:
- assert(index < (int) Elements(t->systemValues));
- return t->systemValues[index];
+ assert(reg->index < (int) Elements(t->systemValues));
+ return t->systemValues[reg->index];
default:
assert(!"unknown src register file");
break;
case TGSI_PROCESSOR_FRAGMENT:
- if (dst_reg->index >= FRAG_RESULT_COLOR) {
+ if (dst_reg->index == FRAG_RESULT_COLOR ||
+ dst_reg->index >= FRAG_RESULT_DATA0) {
dst = ureg_saturate(dst);
}
break;
static struct ureg_src
translate_src(struct st_translate *t, const st_src_reg *src_reg)
{
- struct ureg_src src = src_register(t, src_reg->file, src_reg->index, src_reg->index2D);
+ struct ureg_src src = src_register(t, src_reg);
- if (t->procType == TGSI_PROCESSOR_GEOMETRY && src_reg->has_index2) {
- src = src_register(t, src_reg->file, src_reg->index, src_reg->index2D);
+ if (src_reg->has_index2) {
+ /* 2D indexes occur with geometry shader inputs (attrib, vertex)
+ * and UBO constant buffers (buffer, position).
+ */
if (src_reg->reladdr2)
src = ureg_src_dimension_indirect(src, ureg_src(t->address[1]),
src_reg->index2D);
static struct tgsi_texture_offset
translate_tex_offset(struct st_translate *t,
- const struct tgsi_texture_offset *in_offset)
+ const st_src_reg *in_offset, int idx)
{
struct tgsi_texture_offset offset;
struct ureg_src imm_src;
+ struct ureg_dst dst;
+ int array;
- assert(in_offset->File == PROGRAM_IMMEDIATE);
- imm_src = t->immediates[in_offset->Index];
+ switch (in_offset->file) {
+ case PROGRAM_IMMEDIATE:
+ imm_src = t->immediates[in_offset->index];
+
+ offset.File = imm_src.File;
+ offset.Index = imm_src.Index;
+ offset.SwizzleX = imm_src.SwizzleX;
+ offset.SwizzleY = imm_src.SwizzleY;
+ offset.SwizzleZ = imm_src.SwizzleZ;
+ offset.Padding = 0;
+ break;
+ case PROGRAM_TEMPORARY:
+ imm_src = ureg_src(t->temps[in_offset->index]);
+ offset.File = imm_src.File;
+ offset.Index = imm_src.Index;
+ offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
+ offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
+ offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
+ offset.Padding = 0;
+ break;
+ case PROGRAM_ARRAY:
+ array = in_offset->index >> 16;
- offset.File = imm_src.File;
- offset.Index = imm_src.Index;
- offset.SwizzleX = imm_src.SwizzleX;
- offset.SwizzleY = imm_src.SwizzleY;
- offset.SwizzleZ = imm_src.SwizzleZ;
- offset.File = TGSI_FILE_IMMEDIATE;
- offset.Padding = 0;
+ assert(array >= 0);
+ assert(array < (int) Elements(t->arrays));
+ dst = t->arrays[array];
+ offset.File = dst.File;
+ offset.Index = dst.Index + (in_offset->index & 0xFFFF) - 0x8000;
+ offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
+ offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
+ offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
+ offset.Padding = 0;
+ break;
+ default:
+ break;
+ }
return offset;
}
case TGSI_OPCODE_TEX2:
case TGSI_OPCODE_TXB2:
case TGSI_OPCODE_TXL2:
- src[num_src++] = t->samplers[inst->sampler];
+ case TGSI_OPCODE_TG4:
+ case TGSI_OPCODE_LODQ:
+ src[num_src] = t->samplers[inst->sampler.index];
+ if (inst->sampler.reladdr)
+ src[num_src] =
+ ureg_src_indirect(src[num_src], ureg_src(t->address[2]));
+ num_src++;
for (i = 0; i < inst->tex_offset_num_offset; i++) {
- texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
+ texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i);
}
tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
* saturating the value to [0,1] does the job.
*/
static void
-emit_face_var(struct st_translate *t)
+emit_face_var(struct gl_context *ctx, struct st_translate *t)
{
struct ureg_program *ureg = t->ureg;
struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]];
- /* MOV_SAT face_temp, input[face] */
- face_temp = ureg_saturate(face_temp);
- ureg_MOV(ureg, face_temp, face_input);
+ if (ctx->Const.NativeIntegers) {
+ ureg_FSGE(ureg, face_temp, face_input, ureg_imm1f(ureg, 0));
+ }
+ else {
+ /* MOV_SAT face_temp, input[face] */
+ ureg_MOV(ureg, ureg_saturate(face_temp), face_input);
+ }
/* Use face_temp as face input from here on: */
t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
* \param inputSemanticIndex the semantic index (ex: which texcoord) for
* each input
* \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
+ * \param interpLocation the TGSI_INTERPOLATE_LOC_* location for each input
* \param numOutputs number of output registers used
* \param outputMapping maps Mesa fragment program outputs to TGSI
* generic outputs
const ubyte inputSemanticName[],
const ubyte inputSemanticIndex[],
const GLuint interpMode[],
- const GLboolean is_centroid[],
+ const GLuint interpLocation[],
GLuint numOutputs,
const GLuint outputMapping[],
const ubyte outputSemanticName[],
assert(numInputs <= Elements(t->inputs));
assert(numOutputs <= Elements(t->outputs));
+ assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_FRONT_FACE] ==
+ TGSI_SEMANTIC_FACE);
+ assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_VERTEX_ID] ==
+ TGSI_SEMANTIC_VERTEXID);
+ assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_INSTANCE_ID] ==
+ TGSI_SEMANTIC_INSTANCEID);
+ assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_ID] ==
+ TGSI_SEMANTIC_SAMPLEID);
+ assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_POS] ==
+ TGSI_SEMANTIC_SAMPLEPOS);
+ assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_MASK_IN] ==
+ TGSI_SEMANTIC_SAMPLEMASK);
+ assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_INVOCATION_ID] ==
+ TGSI_SEMANTIC_INVOCATIONID);
+
t = CALLOC_STRUCT(st_translate);
if (!t) {
ret = PIPE_ERROR_OUT_OF_MEMORY;
inputSemanticName[i],
inputSemanticIndex[i],
interpMode[i], 0,
- is_centroid[i]);
+ interpLocation[i]);
}
if (proginfo->InputsRead & VARYING_BIT_POS) {
}
if (proginfo->InputsRead & VARYING_BIT_FACE)
- emit_face_var(t);
+ emit_face_var(ctx, t);
/*
* Declare output attributes.
TGSI_SEMANTIC_COLOR,
outputSemanticIndex[i]);
break;
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ t->outputs[i] = ureg_DECL_output(ureg,
+ TGSI_SEMANTIC_SAMPLEMASK,
+ outputSemanticIndex[i]);
+ /* TODO: If we ever support more than 32 samples, this will have
+ * to become an array.
+ */
+ t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
+ break;
default:
assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
ret = PIPE_ERROR_BAD_INPUT;
/* Declare address register.
*/
if (program->num_address_regs > 0) {
- assert(program->num_address_regs <= 2);
- t->address[0] = ureg_DECL_address(ureg);
- if (program->num_address_regs == 2)
- t->address[1] = ureg_DECL_address(ureg);
+ assert(program->num_address_regs <= 3);
+ for (int i = 0; i < program->num_address_regs; i++)
+ t->address[i] = ureg_DECL_address(ureg);
}
/* Declare misc input registers
unsigned numSys = 0;
for (i = 0; sysInputs; i++) {
if (sysInputs & (1 << i)) {
- unsigned semName = mesa_sysval_to_semantic[i];
+ unsigned semName = _mesa_sysval_to_semantic[i];
t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
if (semName == TGSI_SEMANTIC_INSTANCEID ||
semName == TGSI_SEMANTIC_VERTEXID) {
}
}
- if (program->shader_program) {
- unsigned num_ubos = program->shader_program->NumUniformBlocks;
+ if (program->shader) {
+ unsigned num_ubos = program->shader->NumUniformBlocks;
for (i = 0; i < num_ubos; i++) {
- ureg_DECL_constant2D(t->ureg, 0, program->shader_program->UniformBlocks[i].UniformBufferSize / 4, i + 1);
+ unsigned size = program->shader->UniformBlocks[i].UniformBufferSize;
+ unsigned num_const_vecs = (size + 15) / 16;
+ unsigned first, last;
+ assert(num_const_vecs > 0);
+ first = 0;
+ last = num_const_vecs > 0 ? num_const_vecs - 1 : 0;
+ ureg_DECL_constant2D(t->ureg, first, last, i + 1);
}
}
goto out;
}
i = 0;
- foreach_list(node, &program->immediates) {
- immediate_storage *imm = (immediate_storage *) node;
+ foreach_in_list(immediate_storage, imm, &program->immediates) {
assert(i < program->num_immediates);
t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
}
/* Emit each instruction in turn:
*/
- foreach_list(n, &program->instructions) {
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) {
set_insn_start(t, ureg_get_instruction_number(ureg));
- compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *) n, clamp_color);
+ compile_tgsi_instruction(t, inst, clamp_color);
}
/* Fix up all emitted labels:
out:
if (t) {
+ free(t->temps);
free(t->insn);
free(t->labels);
free(t->constants);
return PIPE_SHADER_FRAGMENT;
case MESA_SHADER_GEOMETRY:
return PIPE_SHADER_GEOMETRY;
+ case MESA_SHADER_COMPUTE:
+ return PIPE_SHADER_COMPUTE;
}
assert(!"should not be reached");
GLenum target = _mesa_shader_stage_to_program(shader->Stage);
bool progress;
struct gl_shader_compiler_options *options =
- &ctx->ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(shader->Type)];
+ &ctx->Const.ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(shader->Type)];
struct pipe_screen *pscreen = ctx->st->pipe->screen;
unsigned ptarget = shader_stage_to_ptarget(shader->Stage);
v->ctx = ctx;
v->prog = prog;
v->shader_program = shader_program;
+ v->shader = shader;
v->options = options;
v->glsl_version = ctx->Const.GLSLVersion;
v->native_integers = ctx->Const.NativeIntegers;
v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
+ _mesa_copy_linked_program_data(shader->Stage, shader_program, prog);
_mesa_generate_parameters_list_for_uniforms(shader_program, shader,
prog->Parameters);
do {
progress = GL_FALSE;
- foreach_list(node, &v->function_signatures) {
- function_entry *entry = (function_entry *) node;
-
+ foreach_in_list(function_entry, entry, &v->function_signatures) {
if (!entry->bgn_inst) {
v->current_function = entry;
/* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
v->simplify_cmp();
v->copy_propagate();
- while (v->eliminate_dead_code_advanced());
+ while (v->eliminate_dead_code());
- v->eliminate_dead_code();
v->merge_registers();
v->renumber_registers();
/* Write the END instruction. */
v->emit(NULL, TGSI_OPCODE_END);
- if (ctx->Shader.Flags & GLSL_DUMP) {
+ if (ctx->_Shader->Flags & GLSL_DUMP) {
printf("\n");
printf("GLSL IR for linked %s program %d:\n",
_mesa_shader_stage_to_string(shader->Stage),
shader_program->Name);
- _mesa_print_ir(shader->ir, NULL);
+ _mesa_print_ir(stdout, shader->ir, NULL);
printf("\n");
printf("\n");
fflush(stdout);
case GL_GEOMETRY_SHADER:
stgp = (struct st_geometry_program *)prog;
stgp->glsl_to_tgsi = v;
- stgp->Base.InputType = shader_program->Geom.InputType;
- stgp->Base.OutputType = shader_program->Geom.OutputType;
- stgp->Base.VerticesOut = shader_program->Geom.VerticesOut;
break;
default:
assert(!"should not be reached");
extern "C" {
-struct gl_shader *
-st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
-{
- struct gl_shader *shader;
- assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
- type == GL_GEOMETRY_SHADER_ARB);
- shader = rzalloc(NULL, struct gl_shader);
- if (shader) {
- shader->Type = type;
- shader->Stage = _mesa_shader_enum_to_shader_stage(type);
- shader->Name = name;
- _mesa_init_shader(ctx, shader);
- }
- return shader;
-}
-
-struct gl_shader_program *
-st_new_shader_program(struct gl_context *ctx, GLuint name)
-{
- struct gl_shader_program *shProg;
- shProg = rzalloc(NULL, struct gl_shader_program);
- if (shProg) {
- shProg->Name = name;
- _mesa_init_shader_program(ctx, shProg);
- }
- return shProg;
-}
-
/**
* Link a shader.
* Called via ctx->Driver.LinkShader()
GLboolean
st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
{
+ struct pipe_screen *pscreen = ctx->st->pipe->screen;
assert(prog->LinkStatus);
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
bool progress;
exec_list *ir = prog->_LinkedShaders[i]->ir;
const struct gl_shader_compiler_options *options =
- &ctx->ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(prog->_LinkedShaders[i]->Type)];
+ &ctx->Const.ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(prog->_LinkedShaders[i]->Type)];
/* If there are forms of indirect addressing that the driver
* cannot handle, perform the lowering pass.
lower_packing_builtins(ir, lower_inst);
}
+ if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
+ lower_offset_arrays(ir);
do_mat_op_to_vec(ir);
+ /* Emit saturates in the vertex shader only if SM 3.0 is supported. */
+ bool vs_sm3 = (_mesa_shader_stage_to_program(prog->_LinkedShaders[i]->Stage) ==
+ GL_VERTEX_PROGRAM_ARB) && st_context(ctx)->has_shader_model3;
lower_instructions(ir,
MOD_TO_FRACT |
DIV_TO_MUL_RCP |
EXP_TO_EXP2 |
LOG_TO_LOG2 |
+ LDEXP_TO_ARITH |
+ CARRY_TO_ARITH |
+ BORROW_TO_ARITH |
(options->EmitNoPow ? POW_TO_EXP2 : 0) |
- (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0));
+ (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
+ (vs_sm3 ? SAT_TO_CLAMP : 0));
lower_ubo_reference(prog->_LinkedShaders[i], ir);
do_vec_index_to_cond_assign(ir);
progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
- progress = do_common_optimization(ir, true, true,
- options->MaxUnrollIterations, options)
+ progress = do_common_optimization(ir, true, true, options,
+ ctx->Const.NativeIntegers)
|| progress;
progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
_mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
linked_prog);
if (!ctx->Driver.ProgramStringNotify(ctx,
- _mesa_program_index_to_target(i),
+ _mesa_shader_stage_to_program(i),
linked_prog)) {
_mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
NULL);
so->output[i].num_components = info->Outputs[i].NumComponents;
so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
so->output[i].dst_offset = info->Outputs[i].DstOffset;
+ so->output[i].stream = info->Outputs[i].StreamId;
}
for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {