#include "st_mesa_to_tgsi.h"
}
+#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \
(1 << PROGRAM_ENV_PARAM) | \
(1 << PROGRAM_STATE_VAR) | \
(1 << PROGRAM_CONSTANT) | \
(1 << PROGRAM_UNIFORM))
+#define MAX_TEMPS 4096
+
+/* will be 4 for GLSL 4.00 */
+#define MAX_GLSL_TEXTURE_OFFSET 1
+
class st_src_reg;
class st_dst_reg;
this->index = reg.index;
this->swizzle = SWIZZLE_XYZW;
this->negate = 0;
- this->reladdr = NULL;
+ this->reladdr = reg.reladdr;
}
st_dst_reg::st_dst_reg(st_src_reg reg)
int sampler; /**< sampler index */
int tex_target; /**< One of TEXTURE_*_INDEX */
GLboolean tex_shadow;
+ struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
+ unsigned tex_offset_num_offset;
+ int dead_mask; /**< Used in dead code elimination */
class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
};
ir_variable *var; /* variable that maps to this, if any */
};
+class immediate_storage : public exec_node {
+public:
+ immediate_storage(gl_constant_value *values, int size, int type)
+ {
+ memcpy(this->values, values, size * sizeof(gl_constant_value));
+ this->size = size;
+ this->type = type;
+ }
+
+ gl_constant_value values[4];
+ int size; /**< Number of components (1-4) */
+ int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
+};
+
class function_entry : public exec_node {
public:
ir_function_signature *sig;
/**
* identifier of this function signature used by the program.
*
- * At the point that Mesa instructions for function calls are
+ * At the point that TGSI instructions for function calls are
* generated, we don't know the address of the first instruction of
* the function body. So we make the BranchTarget that is called a
* small integer and rewrite them during set_branchtargets().
glsl_to_tgsi_instruction *bgn_inst;
/**
- * Index of the first instruction of the function body in actual
- * Mesa IR.
+ * Index of the first instruction of the function body in actual TGSI.
*
- * Set after convertion from glsl_to_tgsi_instruction to prog_instruction.
+ * Set after conversion from glsl_to_tgsi_instruction to TGSI.
*/
int inst;
bool indirect_addr_consts;
int glsl_version;
+ bool native_integers;
variable_storage *find_variable_storage(ir_variable *var);
+ int add_constant(gl_register_file file, gl_constant_value values[4],
+ int size, int datatype, GLuint *swizzle_out);
+
function_entry *get_function_signature(ir_function_signature *sig);
st_src_reg get_temp(const glsl_type *type);
/** List of variable_storage */
exec_list variables;
+ /** List of immediate_storage */
+ exec_list immediates;
+ int num_immediates;
+
/** List of function_entry */
exec_list function_signatures;
int next_signature_id;
/**
* Emit the correct dot-product instruction for the type of arguments
*/
- void emit_dp(ir_instruction *ir,
- st_dst_reg dst,
- st_src_reg src0,
- st_src_reg src1,
- unsigned elements);
+ glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
+ st_dst_reg dst,
+ st_src_reg src0,
+ st_src_reg src1,
+ unsigned elements);
void emit_scalar(ir_instruction *ir, unsigned op,
st_dst_reg dst, st_src_reg src0);
void emit_scalar(ir_instruction *ir, unsigned op,
st_dst_reg dst, st_src_reg src0, st_src_reg src1);
+ void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst);
+
void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
void emit_scs(ir_instruction *ir, unsigned op,
st_dst_reg dst, const st_src_reg &src);
- GLboolean try_emit_mad(ir_expression *ir,
- int mul_operand);
- GLboolean try_emit_sat(ir_expression *ir);
+ bool try_emit_mad(ir_expression *ir,
+ int mul_operand);
+ bool try_emit_mad_for_and_not(ir_expression *ir,
+ int mul_operand);
+ bool try_emit_sat(ir_expression *ir);
void emit_swz(ir_expression *ir);
bool process_move_condition(ir_rvalue *ir);
void remove_output_reads(gl_register_file type);
+ void simplify_cmp(void);
void rename_temp_register(int index, int new_index);
int get_first_temp_read(int index);
void copy_propagate(void);
void eliminate_dead_code(void);
+ int eliminate_dead_code_advanced(void);
void merge_registers(void);
void renumber_registers(void);
void *mem_ctx;
};
-static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL);
+static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
inst->src[1] = src1;
inst->src[2] = src2;
inst->ir = ir;
+ inst->dead_mask = 0;
inst->function = NULL;
- if (op == TGSI_OPCODE_ARL)
+ if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL)
this->num_address_regs = 1;
/* Update indirect addressing status used by TGSI */
case PROGRAM_UNIFORM:
this->indirect_addr_consts = true;
break;
+ case PROGRAM_IMMEDIATE:
+ assert(!"immediates should not have indirect addressing");
+ break;
default:
break;
}
case PROGRAM_UNIFORM:
this->indirect_addr_consts = true;
break;
+ case PROGRAM_IMMEDIATE:
+ assert(!"immediates should not have indirect addressing");
+ break;
default:
break;
}
}
this->instructions.push_tail(inst);
-
+
+ if (native_integers)
+ try_emit_float_set(ir, op, dst);
+
return inst;
}
return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
}
+ /**
+ * Emits the code to convert the result of float SET instructions to integers.
+ */
+void
+glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op,
+ st_dst_reg dst)
+{
+ if ((op == TGSI_OPCODE_SEQ ||
+ op == TGSI_OPCODE_SNE ||
+ op == TGSI_OPCODE_SGE ||
+ op == TGSI_OPCODE_SLT))
+ {
+ st_src_reg src = st_src_reg(dst);
+ src.negate = ~src.negate;
+ dst.type = GLSL_TYPE_FLOAT;
+ emit(ir, TGSI_OPCODE_F2I, dst, src);
+ }
+}
+
/**
* Determines whether to use an integer, unsigned integer, or float opcode
* based on the operands and input opcode, then emits the result.
- *
- * TODO: type checking for remaining TGSI opcodes
*/
unsigned
glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
type = GLSL_TYPE_FLOAT;
- else if (glsl_version >= 130)
- type = src0.type;
+ else if (native_integers)
+ type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
#define case4(c, f, i, u) \
case TGSI_OPCODE_##c: \
case3(SGE, ISGE, USGE);
case3(SLT, ISLT, USLT);
- case2iu(SHL, SHL);
case2iu(ISHR, USHR);
- case2iu(NOT, NOT);
- case2iu(AND, AND);
- case2iu(OR, OR);
- case2iu(XOR, XOR);
default: break;
}
return op;
}
-void
+glsl_to_tgsi_instruction *
glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
st_dst_reg dst, st_src_reg src0, st_src_reg src1,
unsigned elements)
TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
};
- emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+ return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
}
/**
glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
st_dst_reg dst, st_src_reg src0)
{
- st_src_reg tmp = get_temp(glsl_type::float_type);
+ int op = TGSI_OPCODE_ARL;
- if (src0.type == GLSL_TYPE_INT)
- emit(ir, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
- else if (src0.type == GLSL_TYPE_UINT)
- emit(ir, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
- else
- tmp = src0;
-
- emit(ir, TGSI_OPCODE_ARL, dst, tmp);
+ if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
+ op = TGSI_OPCODE_UARL;
+
+ emit(NULL, op, dst, src0);
}
/**
}
}
-struct st_src_reg
+int
+glsl_to_tgsi_visitor::add_constant(gl_register_file file,
+ gl_constant_value values[4], int size, int datatype,
+ GLuint *swizzle_out)
+{
+ if (file == PROGRAM_CONSTANT) {
+ return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
+ size, datatype, swizzle_out);
+ } else {
+ int index = 0;
+ immediate_storage *entry;
+ assert(file == PROGRAM_IMMEDIATE);
+
+ /* Search immediate storage to see if we already have an identical
+ * immediate that we can use instead of adding a duplicate entry.
+ */
+ foreach_iter(exec_list_iterator, iter, this->immediates) {
+ entry = (immediate_storage *)iter.get();
+
+ if (entry->size == size &&
+ entry->type == datatype &&
+ !memcmp(entry->values, values, size * sizeof(gl_constant_value))) {
+ return index;
+ }
+ index++;
+ }
+
+ /* Add this immediate to the list. */
+ entry = new(mem_ctx) immediate_storage(values, size, datatype);
+ this->immediates.push_tail(entry);
+ this->num_immediates++;
+ return index;
+ }
+}
+
+st_src_reg
glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
{
- st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_FLOAT);
+ st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
union gl_constant_value uval;
uval.f = val;
- src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
- &uval, 1, GL_FLOAT, &src.swizzle);
+ src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
return src;
}
-struct st_src_reg
+st_src_reg
glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
{
- st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_INT);
+ st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
union gl_constant_value uval;
- assert(glsl_version >= 130);
+ assert(native_integers);
uval.i = val;
- src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
- &uval, 1, GL_INT, &src.swizzle);
+ src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
return src;
}
-struct st_src_reg
+st_src_reg
glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
{
- if (glsl_version >= 130)
+ if (native_integers)
return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
st_src_reg_for_int(val);
else
glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
{
st_src_reg src;
- int swizzle[4];
- int i;
- src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT;
+ src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
src.file = PROGRAM_TEMPORARY;
src.index = next_temp;
src.reladdr = NULL;
if (type->is_array() || type->is_record()) {
src.swizzle = SWIZZLE_NOOP;
} else {
- for (i = 0; i < type->vector_elements; i++)
- swizzle[i] = i;
- for (; i < 4; i++)
- swizzle[i] = type->vector_elements - 1;
- src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
- swizzle[2], swizzle[3]);
+ src.swizzle = swizzle_for_size(type->vector_elements);
}
src.negate = 0;
}
}
- struct variable_storage *storage;
+ variable_storage *storage;
st_dst_reg dst;
if (i == ir->num_state_slots) {
/* We'll set the index later. */
this->next_temp += type_size(ir->type);
dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
- glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT));
+ native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
}
}
} else {
st_src_reg src(PROGRAM_STATE_VAR, index,
- glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT);
+ native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT);
src.swizzle = slots[i].swizzle;
emit(ir, TGSI_OPCODE_MOV, dst, src);
/* even a float takes up a whole vec4 reg in a struct/array. */
}
}
-GLboolean
+bool
glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
{
int nonmul_operand = 1 - mul_operand;
st_src_reg a, b, c;
+ st_dst_reg result_dst;
ir_expression *expr = ir->operands[mul_operand]->as_expression();
if (!expr || expr->operation != ir_binop_mul)
c = this->result;
this->result = get_temp(ir->type);
- emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, c);
+ result_dst = st_dst_reg(this->result);
+ result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+ emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
return true;
}
-GLboolean
+/**
+ * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
+ *
+ * The logic values are 1.0 for true and 0.0 for false. Logical-and is
+ * implemented using multiplication, and logical-or is implemented using
+ * addition. Logical-not can be implemented as (true - x), or (1.0 - x).
+ * As result, the logical expression (a & !b) can be rewritten as:
+ *
+ * - a * !b
+ * - a * (1 - b)
+ * - (a * 1) - (a * b)
+ * - a + -(a * b)
+ * - a + (a * -b)
+ *
+ * This final expression can be implemented as a single MAD(a, -b, a)
+ * instruction.
+ */
+bool
+glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
+{
+ const int other_operand = 1 - try_operand;
+ st_src_reg a, b;
+
+ ir_expression *expr = ir->operands[try_operand]->as_expression();
+ if (!expr || expr->operation != ir_unop_logic_not)
+ return false;
+
+ ir->operands[other_operand]->accept(this);
+ a = this->result;
+ expr->operands[0]->accept(this);
+ b = this->result;
+
+ b.negate = ~b.negate;
+
+ this->result = get_temp(ir->type);
+ emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
+
+ return true;
+}
+
+bool
glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
{
/* Saturates were only introduced to vertex programs in
sat_src->accept(this);
st_src_reg src = this->result;
- this->result = get_temp(ir->type);
- glsl_to_tgsi_instruction *inst;
- inst = emit(ir, TGSI_OPCODE_MOV, st_dst_reg(this->result), src);
- inst->saturate = true;
+ /* If we generated an expression instruction into a temporary in
+ * processing the saturate's operand, apply the saturate to that
+ * instruction. Otherwise, generate a MOV to do the saturate.
+ *
+ * Note that we have to be careful to only do this optimization if
+ * the instruction in question was what generated src->result. For
+ * example, ir_dereference_array might generate a MUL instruction
+ * to create the reladdr, and return us a src reg using that
+ * reladdr. That MUL result is not the value we're trying to
+ * saturate.
+ */
+ ir_expression *sat_src_expr = sat_src->as_expression();
+ if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
+ sat_src_expr->operation == ir_binop_add ||
+ sat_src_expr->operation == ir_binop_dot)) {
+ glsl_to_tgsi_instruction *new_inst;
+ new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+ new_inst->saturate = true;
+ } else {
+ this->result = get_temp(ir->type);
+ st_dst_reg result_dst = st_dst_reg(this->result);
+ result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+ glsl_to_tgsi_instruction *inst;
+ inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
+ inst->saturate = true;
+ }
return true;
}
if (try_emit_mad(ir, 0))
return;
}
+
+ /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
+ */
+ if (ir->operation == ir_binop_logic_and) {
+ if (try_emit_mad_for_and_not(ir, 1))
+ return;
+ if (try_emit_mad_for_and_not(ir, 0))
+ return;
+ }
+
if (try_emit_sat(ir))
return;
switch (ir->operation) {
case ir_unop_logic_not:
- emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
+ if (result_dst.type != GLSL_TYPE_FLOAT)
+ emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
+ else {
+ /* Previously 'SEQ dst, src, 0.0' was used for this. However, many
+ * older GPUs implement SEQ using multiple instructions (i915 uses two
+ * SGE instructions and a MUL instruction). Since our logic values are
+ * 0.0 and 1.0, 1-x also implements !x.
+ */
+ op[0].negate = ~op[0].negate;
+ emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
+ }
break;
case ir_unop_neg:
assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT);
emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
break;
case ir_binop_greater:
- emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]);
+ emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
break;
case ir_binop_lequal:
- emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]);
+ emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
break;
case ir_binop_gequal:
emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
/* "==" operator producing a scalar boolean. */
if (ir->operands[0]->type->is_vector() ||
ir->operands[1]->type->is_vector()) {
- st_src_reg temp = get_temp(glsl_version >= 130 ?
+ st_src_reg temp = get_temp(native_integers ?
glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
glsl_type::vec4_type);
- assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
- emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
- emit_dp(ir, result_dst, temp, temp, vector_elements);
- emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0));
+
+ if (native_integers) {
+ st_dst_reg temp_dst = st_dst_reg(temp);
+ st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
+
+ emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
+
+ /* Emit 1-3 AND operations to combine the SEQ results. */
+ switch (ir->operands[0]->type->vector_elements) {
+ case 2:
+ break;
+ case 3:
+ temp_dst.writemask = WRITEMASK_Y;
+ temp1.swizzle = SWIZZLE_YYYY;
+ temp2.swizzle = SWIZZLE_ZZZZ;
+ emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
+ break;
+ case 4:
+ temp_dst.writemask = WRITEMASK_X;
+ temp1.swizzle = SWIZZLE_XXXX;
+ temp2.swizzle = SWIZZLE_YYYY;
+ emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
+ temp_dst.writemask = WRITEMASK_Y;
+ temp1.swizzle = SWIZZLE_ZZZZ;
+ temp2.swizzle = SWIZZLE_WWWW;
+ emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
+ }
+
+ temp1.swizzle = SWIZZLE_XXXX;
+ temp2.swizzle = SWIZZLE_YYYY;
+ emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
+ } else {
+ emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero becomes 1.0, and positive values become zero.
+ */
+ emit_dp(ir, result_dst, temp, temp, vector_elements);
+
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero becomes 1.0, and negative values become zero.
+ * This is achieved using SGE.
+ */
+ st_src_reg sge_src = result_src;
+ sge_src.negate = ~sge_src.negate;
+ emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
+ }
} else {
emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
}
/* "!=" operator producing a scalar boolean. */
if (ir->operands[0]->type->is_vector() ||
ir->operands[1]->type->is_vector()) {
- st_src_reg temp = get_temp(glsl_version >= 130 ?
+ st_src_reg temp = get_temp(native_integers ?
glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
glsl_type::vec4_type);
- assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
- emit_dp(ir, result_dst, temp, temp, vector_elements);
- emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+
+ if (native_integers) {
+ st_dst_reg temp_dst = st_dst_reg(temp);
+ st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
+
+ /* Emit 1-3 OR operations to combine the SNE results. */
+ switch (ir->operands[0]->type->vector_elements) {
+ case 2:
+ break;
+ case 3:
+ temp_dst.writemask = WRITEMASK_Y;
+ temp1.swizzle = SWIZZLE_YYYY;
+ temp2.swizzle = SWIZZLE_ZZZZ;
+ emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
+ break;
+ case 4:
+ temp_dst.writemask = WRITEMASK_X;
+ temp1.swizzle = SWIZZLE_XXXX;
+ temp2.swizzle = SWIZZLE_YYYY;
+ emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
+ temp_dst.writemask = WRITEMASK_Y;
+ temp1.swizzle = SWIZZLE_ZZZZ;
+ temp2.swizzle = SWIZZLE_WWWW;
+ emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
+ }
+
+ temp1.swizzle = SWIZZLE_XXXX;
+ temp2.swizzle = SWIZZLE_YYYY;
+ emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
+ } else {
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero stays zero, and positive values become 1.0.
+ */
+ glsl_to_tgsi_instruction *const dp =
+ emit_dp(ir, result_dst, temp, temp, vector_elements);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate.
+ */
+ dp->saturate = true;
+ } else {
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero stays zero, and negative values become 1.0. This
+ * achieved using SLT.
+ */
+ st_src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+ }
+ }
} else {
emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
}
break;
- case ir_unop_any:
+ case ir_unop_any: {
assert(ir->operands[0]->type->is_vector());
- emit_dp(ir, result_dst, op[0], op[0],
- ir->operands[0]->type->vector_elements);
- emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero stays zero, and positive values become 1.0.
+ */
+ glsl_to_tgsi_instruction *const dp =
+ emit_dp(ir, result_dst, op[0], op[0],
+ ir->operands[0]->type->vector_elements);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+ result_dst.type == GLSL_TYPE_FLOAT) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate.
+ */
+ dp->saturate = true;
+ } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero stays zero, and negative values become 1.0. This
+ * is achieved using SLT.
+ */
+ st_src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+ }
+ else {
+ /* Use SNE 0 if integers are being used as boolean values. */
+ emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+ }
break;
+ }
case ir_binop_logic_xor:
- emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
+ if (native_integers)
+ emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
+ else
+ emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
break;
- case ir_binop_logic_or:
- /* This could be a saturated add and skip the SNE. */
- emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
- emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+ case ir_binop_logic_or: {
+ if (native_integers) {
+ /* If integers are used as booleans, we can use an actual "or"
+ * instruction.
+ */
+ assert(native_integers);
+ emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
+ } else {
+ /* After the addition, the value will be an integer on the
+ * range [0,2]. Zero stays zero, and positive values become 1.0.
+ */
+ glsl_to_tgsi_instruction *add =
+ emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate if floats are being used as boolean values.
+ */
+ add->saturate = true;
+ } else {
+ /* Negating the result of the addition gives values on the range
+ * [-2, 0]. Zero stays zero, and negative values become 1.0. This
+ * is achieved using SLT.
+ */
+ st_src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+ }
+ }
break;
+ }
case ir_binop_logic_and:
- /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
- emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
+ /* If native integers are disabled, the bool args are stored as float 0.0
+ * or 1.0, so "mul" gives us "and". If they're enabled, just use the
+ * actual AND opcode.
+ */
+ if (native_integers)
+ emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
+ else
+ emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
break;
case ir_binop_dot:
emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
break;
case ir_unop_i2f:
- case ir_unop_b2f:
- if (glsl_version >= 130) {
+ if (native_integers) {
emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
break;
}
- case ir_unop_b2i:
- /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */
+ /* fallthrough to next case otherwise */
+ case ir_unop_b2f:
+ if (native_integers) {
+ emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
+ break;
+ }
+ /* fallthrough to next case otherwise */
+ case ir_unop_i2u:
+ case ir_unop_u2i:
+ /* Converting between signed and unsigned integers is a no-op. */
result_src = op[0];
break;
+ case ir_unop_b2i:
+ if (native_integers) {
+ /* Booleans are stored as integers using ~0 for true and 0 for false.
+ * GLSL requires that int(bool) return 1 for true and 0 for false.
+ * This conversion is done with AND, but it could be done with NEG.
+ */
+ emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
+ } else {
+ /* Booleans and integers are both stored as floats when native
+ * integers are disabled.
+ */
+ result_src = op[0];
+ }
+ break;
case ir_unop_f2i:
- if (glsl_version >= 130)
+ if (native_integers)
emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
else
emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
break;
case ir_unop_f2b:
+ emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
+ break;
case ir_unop_i2b:
- emit(ir, TGSI_OPCODE_SNE, result_dst, op[0],
- st_src_reg_for_type(result_dst.type, 0));
+ if (native_integers)
+ emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
+ else
+ emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
break;
case ir_unop_trunc:
emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
break;
case ir_unop_bit_not:
- if (glsl_version >= 130) {
+ if (native_integers) {
emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
break;
}
case ir_unop_u2f:
- if (glsl_version >= 130) {
+ if (native_integers) {
emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
break;
}
case ir_binop_lshift:
- if (glsl_version >= 130) {
+ if (native_integers) {
emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]);
break;
}
case ir_binop_rshift:
- if (glsl_version >= 130) {
+ if (native_integers) {
emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]);
break;
}
case ir_binop_bit_and:
- if (glsl_version >= 130) {
+ if (native_integers) {
emit(ir, TGSI_OPCODE_AND, result_dst, op[0]);
break;
}
case ir_binop_bit_xor:
- if (glsl_version >= 130) {
+ if (native_integers) {
emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]);
break;
}
case ir_binop_bit_or:
- if (glsl_version >= 130) {
+ if (native_integers) {
emit(ir, TGSI_OPCODE_OR, result_dst, op[0]);
break;
}
entry = new(mem_ctx) variable_storage(var,
PROGRAM_INPUT,
var->location);
- if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
- var->location >= VERT_ATTRIB_GENERIC0) {
- _mesa_add_attribute(this->prog->Attributes,
- var->name,
- _mesa_sizeof_glsl_type(var->type->gl_type),
- var->type->gl_type,
- var->location - VERT_ATTRIB_GENERIC0);
- }
break;
case ir_var_out:
assert(var->location != -1);
}
this->result = st_src_reg(entry->file, entry->index, var->type);
- if (glsl_version <= 120)
+ if (!native_integers)
this->result.type = GLSL_TYPE_FLOAT;
}
if (index) {
src.index += index->value.i[0] * element_size;
} else {
- st_src_reg array_base = this->result;
/* Variable index array dereference. It eats the "vec4" of the
- * base of the array and an index that offsets the Mesa register
+ * base of the array and an index that offsets the TGSI register
* index.
*/
ir->array_index->accept(this);
if (element_size == 1) {
index_reg = this->result;
} else {
- index_reg = get_temp(glsl_type::float_type);
+ index_reg = get_temp(native_integers ?
+ glsl_type::int_type : glsl_type::float_type);
emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
- this->result, st_src_reg_for_float(element_size));
+ this->result, st_src_reg_for_type(index_reg.type, element_size));
+ }
+
+ /* If there was already a relative address register involved, add the
+ * new and the old together to get the new offset.
+ */
+ if (src.reladdr != NULL) {
+ st_src_reg accum_reg = get_temp(native_integers ?
+ glsl_type::int_type : glsl_type::float_type);
+
+ emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
+ index_reg, *src.reladdr);
+
+ index_reg = accum_reg;
}
src.reladdr = ralloc(mem_ctx, st_src_reg);
if (ir->write_mask == 0) {
assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
l.writemask = WRITEMASK_XYZW;
- } else if (ir->lhs->type->is_scalar()) {
+ } else if (ir->lhs->type->is_scalar() &&
+ ir->lhs->variable_referenced()->mode == ir_var_out) {
/* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
* FINISHME: W component of fragment shader output zero, work correctly.
*/
int first_enabled_chan = 0;
int rhs_chan = 0;
- assert(ir->lhs->type->is_vector());
l.writemask = ir->write_mask;
for (int i = 0; i < 4; i++) {
/* Swizzle a small RHS vector into the channels being written.
*
* glsl ir treats write_mask as dictating how many channels are
- * present on the RHS while Mesa IR treats write_mask as just
+ * present on the RHS while TGSI treats write_mask as just
* showing which channels of the vec4 RHS get written.
*/
for (int i = 0; i < 4; i++) {
st_src_reg condition = this->result;
for (i = 0; i < type_size(ir->lhs->type); i++) {
+ st_src_reg l_src = st_src_reg(l);
+ st_src_reg condition_temp = condition;
+ l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
+
+ if (native_integers) {
+ /* This is necessary because TGSI's CMP instruction expects the
+ * condition to be a float, and we store booleans as integers.
+ * If TGSI had a UCMP instruction or similar, this extra
+ * instruction would not be necessary.
+ */
+ condition_temp = get_temp(glsl_type::vec4_type);
+ condition.negate = 0;
+ emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition);
+ condition_temp.swizzle = condition.swizzle;
+ }
+
if (switch_order) {
- emit(ir, TGSI_OPCODE_CMP, l, condition, st_src_reg(l), r);
+ emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r);
} else {
- emit(ir, TGSI_OPCODE_CMP, l, condition, r, st_src_reg(l));
+ emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src);
}
l.index++;
r.index++;
}
+ } else if (ir->rhs->as_expression() &&
+ this->instructions.get_tail() &&
+ ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
+ type_size(ir->lhs->type) == 1 &&
+ l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) {
+ /* To avoid emitting an extra MOV when assigning an expression to a
+ * variable, emit the last instruction of the expression again, but
+ * replace the destination register with the target of the assignment.
+ * Dead code elimination will remove the original instruction.
+ */
+ glsl_to_tgsi_instruction *inst, *new_inst;
+ inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+ new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
+ new_inst->saturate = inst->saturate;
+ inst->dead_mask = inst->dst.writemask;
} else {
for (i = 0; i < type_size(ir->lhs->type); i++) {
emit(ir, TGSI_OPCODE_MOV, l, r);
gl_constant_value *values = (gl_constant_value *) stack_vals;
GLenum gl_type = GL_NONE;
unsigned int i;
+ static int in_array = 0;
+ gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
/* Unfortunately, 4 floats is all we can get into
- * _mesa_add_unnamed_constant. So, make a temp to store an
+ * _mesa_add_typed_unnamed_constant. So, make a temp to store an
* aggregate constant and move each constant value into it. If we
* get lucky, copy propagation will eliminate the extra moves.
*/
int size = type_size(ir->type->fields.array);
assert(size > 0);
+ in_array++;
for (i = 0; i < ir->type->length; i++) {
ir->array_elements[i]->accept(this);
}
}
this->result = temp_base;
+ in_array--;
return;
}
assert(ir->type->base_type == GLSL_TYPE_FLOAT);
values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
- src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type);
- src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
- values,
- ir->type->vector_elements,
- GL_FLOAT,
- &src.swizzle);
+ src = st_src_reg(file, -1, ir->type->base_type);
+ src.index = add_constant(file,
+ values,
+ ir->type->vector_elements,
+ GL_FLOAT,
+ &src.swizzle);
emit(ir, TGSI_OPCODE_MOV, mat_column, src);
mat_column.index++;
return;
}
- src.file = PROGRAM_CONSTANT;
switch (ir->type->base_type) {
case GLSL_TYPE_FLOAT:
gl_type = GL_FLOAT;
}
break;
case GLSL_TYPE_UINT:
- gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT;
+ gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
for (i = 0; i < ir->type->vector_elements; i++) {
- if (glsl_version >= 130)
+ if (native_integers)
values[i].u = ir->value.u[i];
else
values[i].f = ir->value.u[i];
}
break;
case GLSL_TYPE_INT:
- gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT;
+ gl_type = native_integers ? GL_INT : GL_FLOAT;
for (i = 0; i < ir->type->vector_elements; i++) {
- if (glsl_version >= 130)
+ if (native_integers)
values[i].i = ir->value.i[i];
else
values[i].f = ir->value.i[i];
}
break;
case GLSL_TYPE_BOOL:
- gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT;
+ gl_type = native_integers ? GL_BOOL : GL_FLOAT;
for (i = 0; i < ir->type->vector_elements; i++) {
- if (glsl_version >= 130)
+ if (native_integers)
values[i].b = ir->value.b[i];
else
values[i].f = ir->value.b[i];
assert(!"Non-float/uint/int/bool constant");
}
- this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type);
- this->result.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
- values, ir->type->vector_elements, gl_type,
- &this->result.swizzle);
+ this->result = st_src_reg(file, -1, ir->type);
+ this->result.index = add_constant(file,
+ values,
+ ir->type->vector_elements,
+ gl_type,
+ &this->result.swizzle);
}
function_entry *
void
glsl_to_tgsi_visitor::visit(ir_texture *ir)
{
- st_src_reg result_src, coord, lod_info, projector, dx, dy;
+ st_src_reg result_src, coord, lod_info, projector, dx, dy, offset;
st_dst_reg result_dst, coord_dst;
glsl_to_tgsi_instruction *inst = NULL;
unsigned opcode = TGSI_OPCODE_NOP;
- ir->coordinate->accept(this);
+ if (ir->coordinate) {
+ ir->coordinate->accept(this);
- /* Put our coords in a temp. We'll need to modify them for shadow,
- * projection, or LOD, so the only case we'd use it as is is if
- * we're doing plain old texturing. Mesa IR optimization should
- * handle cleaning up our mess in that case.
- */
- coord = get_temp(glsl_type::vec4_type);
- coord_dst = st_dst_reg(coord);
- emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+ /* Put our coords in a temp. We'll need to modify them for shadow,
+ * projection, or LOD, so the only case we'd use it as is is if
+ * we're doing plain old texturing. The optimization passes on
+ * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
+ */
+ coord = get_temp(glsl_type::vec4_type);
+ coord_dst = st_dst_reg(coord);
+ emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+ }
if (ir->projector) {
ir->projector->accept(this);
ir->lod_info.grad.dPdy->accept(this);
dy = this->result;
break;
- case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */
- assert(!"GLSL 1.30 features unsupported");
+ case ir_txs:
+ opcode = TGSI_OPCODE_TXQ;
+ ir->lod_info.lod->accept(this);
+ lod_info = this->result;
+ break;
+ case ir_txf:
+ opcode = TGSI_OPCODE_TXF;
+ ir->lod_info.lod->accept(this);
+ lod_info = this->result;
+ if (ir->offset) {
+ ir->offset->accept(this);
+ offset = this->result;
+ }
break;
}
+ const glsl_type *sampler_type = ir->sampler->type;
+
if (ir->projector) {
if (opcode == TGSI_OPCODE_TEX) {
/* Slot the projector in as the last component of the coord. */
tmp_src = get_temp(glsl_type::vec4_type);
st_dst_reg tmp_dst = st_dst_reg(tmp_src);
+ /* Projective division not allowed for array samplers. */
+ assert(!sampler_type->sampler_array);
+
tmp_dst.writemask = WRITEMASK_Z;
emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
* coord.
*/
ir->shadow_comparitor->accept(this);
- coord_dst.writemask = WRITEMASK_Z;
+
+ /* XXX This will need to be updated for cubemap array samplers. */
+ if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
+ sampler_type->sampler_array) {
+ coord_dst.writemask = WRITEMASK_W;
+ } else {
+ coord_dst.writemask = WRITEMASK_Z;
+ }
+
emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
coord_dst.writemask = WRITEMASK_XYZW;
}
- if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB) {
+ if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
+ opcode == TGSI_OPCODE_TXF) {
/* TGSI stores LOD or LOD bias in the last channel of the coords. */
coord_dst.writemask = WRITEMASK_W;
emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
if (opcode == TGSI_OPCODE_TXD)
inst = emit(ir, opcode, result_dst, coord, dx, dy);
- else
+ else if (opcode == TGSI_OPCODE_TXQ)
+ inst = emit(ir, opcode, result_dst, lod_info);
+ else if (opcode == TGSI_OPCODE_TXF) {
+ inst = emit(ir, opcode, result_dst, coord);
+ } else
inst = emit(ir, opcode, result_dst, coord);
if (ir->shadow_comparitor)
this->shader_program,
this->prog);
- const glsl_type *sampler_type = ir->sampler->type;
+ if (ir->offset) {
+ inst->tex_offset_num_offset = 1;
+ inst->tex_offsets[0].Index = offset.index;
+ inst->tex_offsets[0].File = offset.file;
+ inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0);
+ inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1);
+ inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2);
+ }
switch (sampler_type->sampler_dimensionality) {
case GLSL_SAMPLER_DIM_1D:
void
glsl_to_tgsi_visitor::visit(ir_if *ir)
{
- glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL;
+ glsl_to_tgsi_instruction *cond_inst, *if_inst;
glsl_to_tgsi_instruction *prev_inst;
prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
visit_exec_list(&ir->then_instructions, this);
if (!ir->else_instructions.is_empty()) {
- else_inst = emit(ir->condition, TGSI_OPCODE_ELSE);
+ emit(ir->condition, TGSI_OPCODE_ELSE);
visit_exec_list(&ir->else_instructions, this);
}
result.file = PROGRAM_UNDEFINED;
next_temp = 1;
next_signature_id = 1;
+ num_immediates = 0;
current_function = NULL;
num_address_regs = 0;
indirect_addr_temps = false;
}
}
-static void
-set_uniform_initializers(struct gl_context *ctx,
- struct gl_shader_program *shader_program)
-{
- void *mem_ctx = NULL;
-
- for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) {
- struct gl_shader *shader = shader_program->_LinkedShaders[i];
-
- if (shader == NULL)
- continue;
-
- foreach_iter(exec_list_iterator, iter, *shader->ir) {
- ir_instruction *ir = (ir_instruction *)iter.get();
- ir_variable *var = ir->as_variable();
-
- if (!var || var->mode != ir_var_uniform || !var->constant_value)
- continue;
-
- if (!mem_ctx)
- mem_ctx = ralloc_context(NULL);
-
- set_uniform_initializer(ctx, mem_ctx, shader_program, var->name,
- var->type, var->constant_value);
- }
- }
-
- ralloc_free(mem_ctx);
-}
-
/*
* Scan/rewrite program to remove reads of custom (output) registers.
* The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING
GLint outputMap[VERT_RESULT_MAX];
GLint outputTypes[VERT_RESULT_MAX];
GLuint numVaryingReads = 0;
- GLboolean usedTemps[MAX_PROGRAM_TEMPS];
+ GLboolean usedTemps[MAX_TEMPS];
GLuint firstTemp = 0;
_mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
- usedTemps, MAX_PROGRAM_TEMPS);
+ usedTemps, MAX_TEMPS);
assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT);
assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING);
if (outputMap[var] == -1) {
numVaryingReads++;
outputMap[var] = _mesa_find_free_register(usedTemps,
- MAX_PROGRAM_TEMPS,
+ MAX_TEMPS,
firstTemp);
outputTypes[var] = inst->src[j].type;
firstTemp = outputMap[var] + 1;
}
}
+/**
+ * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
+ * are read from the given src in this instruction
+ */
+static int
+get_src_arg_mask(st_dst_reg dst, st_src_reg src)
+{
+ int read_mask = 0, comp;
+
+ /* Now, given the src swizzle and the written channels, find which
+ * components are actually read
+ */
+ for (comp = 0; comp < 4; ++comp) {
+ const unsigned coord = GET_SWZ(src.swizzle, comp);
+ ASSERT(coord < 4);
+ if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
+ read_mask |= 1 << coord;
+ }
+
+ return read_mask;
+}
+
+/**
+ * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
+ * instruction is the first instruction to write to register T0. There are
+ * several lowering passes done in GLSL IR (e.g. branches and
+ * relative addressing) that create a large number of conditional assignments
+ * that ir_to_mesa converts to CMP instructions like the one mentioned above.
+ *
+ * Here is why this conversion is safe:
+ * CMP T0, T1 T2 T0 can be expanded to:
+ * if (T1 < 0.0)
+ * MOV T0, T2;
+ * else
+ * MOV T0, T0;
+ *
+ * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
+ * as the original program. If (T1 < 0.0) evaluates to false, executing
+ * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
+ * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
+ * because any instruction that was going to read from T0 after this was going
+ * to read a garbage value anyway.
+ */
+void
+glsl_to_tgsi_visitor::simplify_cmp(void)
+{
+ unsigned tempWrites[MAX_TEMPS];
+ unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
+
+ memset(tempWrites, 0, sizeof(tempWrites));
+ memset(outputWrites, 0, sizeof(outputWrites));
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ unsigned prevWriteMask = 0;
+
+ /* Give up if we encounter relative addressing or flow control. */
+ if (inst->dst.reladdr ||
+ tgsi_get_opcode_info(inst->op)->is_branch ||
+ inst->op == TGSI_OPCODE_BGNSUB ||
+ inst->op == TGSI_OPCODE_CONT ||
+ inst->op == TGSI_OPCODE_END ||
+ inst->op == TGSI_OPCODE_ENDSUB ||
+ inst->op == TGSI_OPCODE_RET) {
+ return;
+ }
+
+ if (inst->dst.file == PROGRAM_OUTPUT) {
+ assert(inst->dst.index < MAX_PROGRAM_OUTPUTS);
+ prevWriteMask = outputWrites[inst->dst.index];
+ outputWrites[inst->dst.index] |= inst->dst.writemask;
+ } else if (inst->dst.file == PROGRAM_TEMPORARY) {
+ assert(inst->dst.index < MAX_TEMPS);
+ prevWriteMask = tempWrites[inst->dst.index];
+ tempWrites[inst->dst.index] |= inst->dst.writemask;
+ }
+
+ /* For a CMP to be considered a conditional write, the destination
+ * register and source register two must be the same. */
+ if (inst->op == TGSI_OPCODE_CMP
+ && !(inst->dst.writemask & prevWriteMask)
+ && inst->src[2].file == inst->dst.file
+ && inst->src[2].index == inst->dst.index
+ && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) {
+
+ inst->op = TGSI_OPCODE_MOV;
+ inst->src[0] = inst->src[1];
+ }
+ }
+}
+
/* Replaces all references to a temporary register index with another index. */
void
glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
}
}
+/*
+ * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
+ * code elimination. This is less primitive than eliminate_dead_code(), as it
+ * is per-channel and can detect consecutive writes without a read between them
+ * as dead code. However, there is some dead code that can be eliminated by
+ * eliminate_dead_code() but not this function - for example, this function
+ * cannot eliminate an instruction writing to a register that is never read and
+ * is the only instruction writing to that register.
+ *
+ * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
+ * will occur.
+ */
+int
+glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
+{
+ glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
+ glsl_to_tgsi_instruction *,
+ this->next_temp * 4);
+ int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
+ int level = 0;
+ int removed = 0;
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+ assert(inst->dst.file != PROGRAM_TEMPORARY
+ || inst->dst.index < this->next_temp);
+
+ switch (inst->op) {
+ case TGSI_OPCODE_BGNLOOP:
+ case TGSI_OPCODE_ENDLOOP:
+ /* End of a basic block, clear the write array entirely.
+ * FIXME: This keeps us from killing dead code when the writes are
+ * on either side of a loop, even when the register isn't touched
+ * inside the loop.
+ */
+ memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+ break;
+
+ case TGSI_OPCODE_ENDIF:
+ --level;
+ break;
+
+ case TGSI_OPCODE_ELSE:
+ /* Clear all channels written inside the preceding if block from the
+ * write array, but leave those that were not touched.
+ *
+ * FIXME: This destroys opportunities to remove dead code inside of
+ * IF blocks that are followed by an ELSE block.
+ */
+ for (int r = 0; r < this->next_temp; r++) {
+ for (int c = 0; c < 4; c++) {
+ if (!writes[4 * r + c])
+ continue;
+
+ if (write_level[4 * r + c] >= level)
+ writes[4 * r + c] = NULL;
+ }
+ }
+ break;
+
+ case TGSI_OPCODE_IF:
+ ++level;
+ /* fallthrough to default case to mark the condition as read */
+
+ default:
+ /* Continuing the block, clear any channels from the write array that
+ * are read by this instruction.
+ */
+ for (unsigned i = 0; i < Elements(inst->src); i++) {
+ if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
+ /* Any temporary might be read, so no dead code elimination
+ * across this instruction.
+ */
+ memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+ } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
+ /* Clear where it's used as src. */
+ int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
+ src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
+ src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
+ src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
+
+ for (int c = 0; c < 4; c++) {
+ if (src_chans & (1 << c)) {
+ writes[4 * inst->src[i].index + c] = NULL;
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ /* If this instruction writes to a temporary, add it to the write array.
+ * If there is already an instruction in the write array for one or more
+ * of the channels, flag that channel write as dead.
+ */
+ if (inst->dst.file == PROGRAM_TEMPORARY &&
+ !inst->dst.reladdr &&
+ !inst->saturate) {
+ for (int c = 0; c < 4; c++) {
+ if (inst->dst.writemask & (1 << c)) {
+ if (writes[4 * inst->dst.index + c]) {
+ if (write_level[4 * inst->dst.index + c] < level)
+ continue;
+ else
+ writes[4 * inst->dst.index + c]->dead_mask |= (1 << c);
+ }
+ writes[4 * inst->dst.index + c] = inst;
+ write_level[4 * inst->dst.index + c] = level;
+ }
+ }
+ }
+ }
+
+ /* Anything still in the write array at this point is dead code. */
+ for (int r = 0; r < this->next_temp; r++) {
+ for (int c = 0; c < 4; c++) {
+ glsl_to_tgsi_instruction *inst = writes[4 * r + c];
+ if (inst)
+ inst->dead_mask |= (1 << c);
+ }
+ }
+
+ /* Now actually remove the instructions that are completely dead and update
+ * the writemask of other instructions with dead channels.
+ */
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+ if (!inst->dead_mask || !inst->dst.writemask)
+ continue;
+ else if (inst->dead_mask == inst->dst.writemask) {
+ iter.remove();
+ delete inst;
+ removed++;
+ } else
+ inst->dst.writemask &= ~(inst->dead_mask);
+ }
+
+ ralloc_free(write_level);
+ ralloc_free(writes);
+
+ return removed;
+}
+
/* Merges temporary registers together where possible to reduce the number of
* registers needed to run a program.
*
this->next_temp = new_index;
}
+/**
+ * Returns a fragment program which implements the current pixel transfer ops.
+ * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
+ */
+extern "C" void
+get_pixel_transfer_visitor(struct st_fragment_program *fp,
+ glsl_to_tgsi_visitor *original,
+ int scale_and_bias, int pixel_maps)
+{
+ glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
+ struct st_context *st = st_context(original->ctx);
+ struct gl_program *prog = &fp->Base.Base;
+ struct gl_program_parameter_list *params = _mesa_new_parameter_list();
+ st_src_reg coord, src0;
+ st_dst_reg dst0;
+ glsl_to_tgsi_instruction *inst;
+
+ /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
+ v->ctx = original->ctx;
+ v->prog = prog;
+ v->glsl_version = original->glsl_version;
+ v->native_integers = original->native_integers;
+ v->options = original->options;
+ v->next_temp = original->next_temp;
+ v->num_address_regs = original->num_address_regs;
+ v->samplers_used = prog->SamplersUsed = original->samplers_used;
+ v->indirect_addr_temps = original->indirect_addr_temps;
+ v->indirect_addr_consts = original->indirect_addr_consts;
+ memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
+
+ /*
+ * Get initial pixel color from the texture.
+ * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
+ */
+ coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
+ src0 = v->get_temp(glsl_type::vec4_type);
+ dst0 = st_dst_reg(src0);
+ inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
+ inst->sampler = 0;
+ inst->tex_target = TEXTURE_2D_INDEX;
+
+ prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+ prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
+ v->samplers_used |= (1 << 0);
+
+ if (scale_and_bias) {
+ static const gl_state_index scale_state[STATE_LENGTH] =
+ { STATE_INTERNAL, STATE_PT_SCALE,
+ (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
+ static const gl_state_index bias_state[STATE_LENGTH] =
+ { STATE_INTERNAL, STATE_PT_BIAS,
+ (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
+ GLint scale_p, bias_p;
+ st_src_reg scale, bias;
+
+ scale_p = _mesa_add_state_reference(params, scale_state);
+ bias_p = _mesa_add_state_reference(params, bias_state);
+
+ /* MAD colorTemp, colorTemp, scale, bias; */
+ scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
+ bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
+ inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
+ }
+
+ if (pixel_maps) {
+ st_src_reg temp = v->get_temp(glsl_type::vec4_type);
+ st_dst_reg temp_dst = st_dst_reg(temp);
+
+ assert(st->pixel_xfer.pixelmap_texture);
+
+ /* With a little effort, we can do four pixel map look-ups with
+ * two TEX instructions:
+ */
+
+ /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
+ temp_dst.writemask = WRITEMASK_XY; /* write R,G */
+ inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
+ inst->sampler = 1;
+ inst->tex_target = TEXTURE_2D_INDEX;
+
+ /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
+ src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
+ temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
+ inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
+ inst->sampler = 1;
+ inst->tex_target = TEXTURE_2D_INDEX;
+
+ prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
+ v->samplers_used |= (1 << 1);
+
+ /* MOV colorTemp, temp; */
+ inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp);
+ }
+
+ /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
+ * new visitor. */
+ foreach_iter(exec_list_iterator, iter, original->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ st_src_reg src_regs[3];
+
+ if (inst->dst.file == PROGRAM_OUTPUT)
+ prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
+
+ for (int i=0; i<3; i++) {
+ src_regs[i] = inst->src[i];
+ if (src_regs[i].file == PROGRAM_INPUT &&
+ src_regs[i].index == FRAG_ATTRIB_COL0)
+ {
+ src_regs[i].file = PROGRAM_TEMPORARY;
+ src_regs[i].index = src0.index;
+ }
+ else if (src_regs[i].file == PROGRAM_INPUT)
+ prog->InputsRead |= (1 << src_regs[i].index);
+ }
+
+ v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+ }
+
+ /* Make modifications to fragment program info. */
+ prog->Parameters = _mesa_combine_parameter_lists(params,
+ original->prog->Parameters);
+ _mesa_free_parameter_list(params);
+ count_resources(v, prog);
+ fp->glsl_to_tgsi = v;
+}
+
+/**
+ * Make fragment program for glBitmap:
+ * Sample the texture and kill the fragment if the bit is 0.
+ * This program will be combined with the user's fragment program.
+ *
+ * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
+ */
+extern "C" void
+get_bitmap_visitor(struct st_fragment_program *fp,
+ glsl_to_tgsi_visitor *original, int samplerIndex)
+{
+ glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
+ struct st_context *st = st_context(original->ctx);
+ struct gl_program *prog = &fp->Base.Base;
+ st_src_reg coord, src0;
+ st_dst_reg dst0;
+ glsl_to_tgsi_instruction *inst;
+
+ /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
+ v->ctx = original->ctx;
+ v->prog = prog;
+ v->glsl_version = original->glsl_version;
+ v->native_integers = original->native_integers;
+ v->options = original->options;
+ v->next_temp = original->next_temp;
+ v->num_address_regs = original->num_address_regs;
+ v->samplers_used = prog->SamplersUsed = original->samplers_used;
+ v->indirect_addr_temps = original->indirect_addr_temps;
+ v->indirect_addr_consts = original->indirect_addr_consts;
+ memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
+
+ /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
+ coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
+ src0 = v->get_temp(glsl_type::vec4_type);
+ dst0 = st_dst_reg(src0);
+ inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
+ inst->sampler = samplerIndex;
+ inst->tex_target = TEXTURE_2D_INDEX;
+
+ prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+ prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
+ v->samplers_used |= (1 << samplerIndex);
+
+ /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
+ src0.negate = NEGATE_XYZW;
+ if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
+ src0.swizzle = SWIZZLE_XXXX;
+ inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0);
+
+ /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
+ * new visitor. */
+ foreach_iter(exec_list_iterator, iter, original->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ st_src_reg src_regs[3];
+
+ if (inst->dst.file == PROGRAM_OUTPUT)
+ prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
+
+ for (int i=0; i<3; i++) {
+ src_regs[i] = inst->src[i];
+ if (src_regs[i].file == PROGRAM_INPUT)
+ prog->InputsRead |= (1 << src_regs[i].index);
+ }
+
+ v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+ }
+
+ /* Make modifications to fragment program info. */
+ prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
+ count_resources(v, prog);
+ fp->glsl_to_tgsi = v;
+}
+
/* ------------------------- TGSI conversion stuff -------------------------- */
struct label {
unsigned branch_target;
struct st_translate {
struct ureg_program *ureg;
- struct ureg_dst temps[MAX_PROGRAM_TEMPS];
+ struct ureg_dst temps[MAX_TEMPS];
struct ureg_src *constants;
+ struct ureg_src *immediates;
struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
struct ureg_dst address[1];
* of labels built here and patch the TGSI code with the actual
* location of each label.
*/
-static unsigned *get_label( struct st_translate *t,
- unsigned branch_target )
+static unsigned *get_label(struct st_translate *t, unsigned branch_target)
{
unsigned i;
if (t->labels_count + 1 >= t->labels_size) {
t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
t->labels = (struct label *)realloc(t->labels,
- t->labels_size * sizeof t->labels[0]);
+ t->labels_size * sizeof(struct label));
if (t->labels == NULL) {
static unsigned dummy;
t->error = TRUE;
}
/**
- * Called prior to emitting the TGSI code for each Mesa instruction.
+ * Called prior to emitting the TGSI code for each instruction.
* Allocate additional space for instructions if needed.
- * Update the insn[] array so the next Mesa instruction points to
+ * Update the insn[] array so the next glsl_to_tgsi_instruction points to
* the next TGSI instruction.
*/
-static void set_insn_start( struct st_translate *t,
- unsigned start )
+static void set_insn_start(struct st_translate *t, unsigned start)
{
if (t->insn_count + 1 >= t->insn_size) {
t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
- t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof t->insn[0]);
+ t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
if (t->insn == NULL) {
t->error = TRUE;
return;
}
/**
- * Map a Mesa dst register to a TGSI ureg_dst register.
+ * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
+ */
+static struct ureg_src
+emit_immediate(struct st_translate *t,
+ gl_constant_value values[4],
+ int type, int size)
+{
+ struct ureg_program *ureg = t->ureg;
+
+ switch(type)
+ {
+ case GL_FLOAT:
+ return ureg_DECL_immediate(ureg, &values[0].f, size);
+ case GL_INT:
+ return ureg_DECL_immediate_int(ureg, &values[0].i, size);
+ case GL_UNSIGNED_INT:
+ case GL_BOOL:
+ return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
+ default:
+ assert(!"should not get here - type must be float, int, uint, or bool");
+ return ureg_src_undef();
+ }
+}
+
+/**
+ * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
*/
static struct ureg_dst
-dst_register( struct st_translate *t,
- gl_register_file file,
- GLuint index )
+dst_register(struct st_translate *t,
+ gl_register_file file,
+ GLuint index)
{
- switch( file ) {
+ switch(file) {
case PROGRAM_UNDEFINED:
return ureg_dst_undef();
case PROGRAM_TEMPORARY:
if (ureg_dst_is_undef(t->temps[index]))
- t->temps[index] = ureg_DECL_temporary( t->ureg );
+ t->temps[index] = ureg_DECL_temporary(t->ureg);
return t->temps[index];
return t->address[index];
default:
- debug_assert( 0 );
+ assert(!"unknown dst register file");
return ureg_dst_undef();
}
}
/**
- * Map a Mesa src register to a TGSI ureg_src register.
+ * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
*/
static struct ureg_src
-src_register( struct st_translate *t,
- gl_register_file file,
- GLuint index )
+src_register(struct st_translate *t,
+ gl_register_file file,
+ GLuint index)
{
- switch( file ) {
+ switch(file) {
case PROGRAM_UNDEFINED:
return ureg_src_undef();
assert(index >= 0);
assert(index < Elements(t->temps));
if (ureg_dst_is_undef(t->temps[index]))
- t->temps[index] = ureg_DECL_temporary( t->ureg );
+ t->temps[index] = ureg_DECL_temporary(t->ureg);
return ureg_src(t->temps[index]);
case PROGRAM_NAMED_PARAM:
case PROGRAM_STATE_VAR:
case PROGRAM_CONSTANT: /* ie, immediate */
if (index < 0)
- return ureg_DECL_constant( t->ureg, 0 );
+ return ureg_DECL_constant(t->ureg, 0);
else
return t->constants[index];
+ case PROGRAM_IMMEDIATE:
+ return t->immediates[index];
+
case PROGRAM_INPUT:
assert(t->inputMapping[index] < Elements(t->inputs));
return t->inputs[t->inputMapping[index]];
return t->systemValues[index];
default:
- debug_assert( 0 );
+ assert(!"unknown src register file");
return ureg_src_undef();
}
}
* Create a TGSI ureg_dst register from an st_dst_reg.
*/
static struct ureg_dst
-translate_dst( struct st_translate *t,
- const st_dst_reg *dst_reg,
- boolean saturate )
+translate_dst(struct st_translate *t,
+ const st_dst_reg *dst_reg,
+ bool saturate)
{
- struct ureg_dst dst = dst_register( t,
- dst_reg->file,
- dst_reg->index );
+ struct ureg_dst dst = dst_register(t,
+ dst_reg->file,
+ dst_reg->index);
- dst = ureg_writemask( dst,
- dst_reg->writemask );
+ dst = ureg_writemask(dst, dst_reg->writemask);
if (saturate)
- dst = ureg_saturate( dst );
+ dst = ureg_saturate(dst);
if (dst_reg->reladdr != NULL)
- dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
+ dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
return dst;
}
* Create a TGSI ureg_src register from an st_src_reg.
*/
static struct ureg_src
-translate_src( struct st_translate *t,
- const st_src_reg *src_reg )
+translate_src(struct st_translate *t, const st_src_reg *src_reg)
{
- struct ureg_src src = src_register( t, src_reg->file, src_reg->index );
+ struct ureg_src src = src_register(t, src_reg->file, src_reg->index);
- src = ureg_swizzle( src,
- GET_SWZ( src_reg->swizzle, 0 ) & 0x3,
- GET_SWZ( src_reg->swizzle, 1 ) & 0x3,
- GET_SWZ( src_reg->swizzle, 2 ) & 0x3,
- GET_SWZ( src_reg->swizzle, 3 ) & 0x3);
+ src = ureg_swizzle(src,
+ GET_SWZ(src_reg->swizzle, 0) & 0x3,
+ GET_SWZ(src_reg->swizzle, 1) & 0x3,
+ GET_SWZ(src_reg->swizzle, 2) & 0x3,
+ GET_SWZ(src_reg->swizzle, 3) & 0x3);
if ((src_reg->negate & 0xf) == NEGATE_XYZW)
src = ureg_negate(src);
return src;
}
+static struct tgsi_texture_offset
+translate_tex_offset(struct st_translate *t,
+ const struct tgsi_texture_offset *in_offset)
+{
+ struct tgsi_texture_offset offset;
+
+ assert(in_offset->File == PROGRAM_IMMEDIATE);
+
+ offset.File = TGSI_FILE_IMMEDIATE;
+ offset.Index = in_offset->Index;
+ offset.SwizzleX = in_offset->SwizzleX;
+ offset.SwizzleY = in_offset->SwizzleY;
+ offset.SwizzleZ = in_offset->SwizzleZ;
+
+ return offset;
+}
+
static void
-compile_tgsi_instruction(struct st_translate *t,
- const struct glsl_to_tgsi_instruction *inst)
+compile_tgsi_instruction(struct st_translate *t,
+ const glsl_to_tgsi_instruction *inst)
{
struct ureg_program *ureg = t->ureg;
GLuint i;
struct ureg_dst dst[1];
struct ureg_src src[4];
+ struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
+
unsigned num_dst;
unsigned num_src;
- num_dst = num_inst_dst_regs( inst->op );
- num_src = num_inst_src_regs( inst->op );
+ num_dst = num_inst_dst_regs(inst->op);
+ num_src = num_inst_src_regs(inst->op);
if (num_dst)
- dst[0] = translate_dst( t,
- &inst->dst,
- inst->saturate);
+ dst[0] = translate_dst(t,
+ &inst->dst,
+ inst->saturate);
for (i = 0; i < num_src; i++)
- src[i] = translate_src( t, &inst->src[i] );
+ src[i] = translate_src(t, &inst->src[i]);
- switch( inst->op ) {
+ switch(inst->op) {
case TGSI_OPCODE_BGNLOOP:
case TGSI_OPCODE_CAL:
case TGSI_OPCODE_ELSE:
case TGSI_OPCODE_ENDLOOP:
case TGSI_OPCODE_IF:
- debug_assert(num_dst == 0);
- ureg_label_insn( ureg,
- inst->op,
- src, num_src,
- get_label( t,
- inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0 ));
+ assert(num_dst == 0);
+ ureg_label_insn(ureg,
+ inst->op,
+ src, num_src,
+ get_label(t,
+ inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
return;
case TGSI_OPCODE_TEX:
case TGSI_OPCODE_TXD:
case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXP:
+ case TGSI_OPCODE_TXQ:
+ case TGSI_OPCODE_TXF:
src[num_src++] = t->samplers[inst->sampler];
- ureg_tex_insn( ureg,
- inst->op,
- dst, num_dst,
- translate_texture_target( inst->tex_target,
- inst->tex_shadow ),
- src, num_src );
+ for (i = 0; i < inst->tex_offset_num_offset; i++) {
+ texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
+ }
+ ureg_tex_insn(ureg,
+ inst->op,
+ dst, num_dst,
+ translate_texture_target(inst->tex_target, inst->tex_shadow),
+ texoffsets, inst->tex_offset_num_offset,
+ src, num_src);
return;
case TGSI_OPCODE_SCS:
- dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
- ureg_insn( ureg,
- inst->op,
- dst, num_dst,
- src, num_src );
- break;
-
- case TGSI_OPCODE_XPD:
- dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
- ureg_insn( ureg,
- inst->op,
- dst, num_dst,
- src, num_src );
+ dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
+ ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
break;
default:
- ureg_insn( ureg,
- inst->op,
- dst, num_dst,
- src, num_src );
+ ureg_insn(ureg,
+ inst->op,
+ dst, num_dst,
+ src, num_src);
break;
}
}
* Basically, add (adjX, adjY) to the fragment position.
*/
static void
-emit_adjusted_wpos( struct st_translate *t,
- const struct gl_program *program,
- GLfloat adjX, GLfloat adjY)
+emit_adjusted_wpos(struct st_translate *t,
+ const struct gl_program *program,
+ float adjX, float adjY)
{
struct ureg_program *ureg = t->ureg;
struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
* a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
*/
static void
-emit_wpos_inversion( struct st_translate *t,
- const struct gl_program *program,
- boolean invert)
+emit_wpos_inversion(struct st_translate *t,
+ const struct gl_program *program,
+ bool invert)
{
struct ureg_program *ureg = t->ureg;
unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
wposTransformState);
- struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
+ struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst);
struct ureg_dst wpos_temp;
struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
if (wpos_input.File == TGSI_FILE_TEMPORARY)
wpos_temp = ureg_dst(wpos_input);
else {
- wpos_temp = ureg_DECL_temporary( ureg );
- ureg_MOV( ureg, wpos_temp, wpos_input );
+ wpos_temp = ureg_DECL_temporary(ureg);
+ ureg_MOV(ureg, wpos_temp, wpos_input);
}
if (invert) {
/* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
*/
- ureg_MAD( ureg,
- ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
- wpos_input,
- ureg_scalar(wpostrans, 0),
- ureg_scalar(wpostrans, 1));
+ ureg_MAD(ureg,
+ ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
+ wpos_input,
+ ureg_scalar(wpostrans, 0),
+ ureg_scalar(wpostrans, 1));
} else {
/* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
*/
- ureg_MAD( ureg,
- ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
- wpos_input,
- ureg_scalar(wpostrans, 2),
- ureg_scalar(wpostrans, 3));
+ ureg_MAD(ureg,
+ ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
+ wpos_input,
+ ureg_scalar(wpostrans, 2),
+ ureg_scalar(wpostrans, 3));
}
/* Use wpos_temp as position input from here on:
const GLuint outputMapping[],
const ubyte outputSemanticName[],
const ubyte outputSemanticIndex[],
- boolean passthrough_edgeflags )
+ boolean passthrough_edgeflags)
{
struct st_translate translate, *t;
unsigned i;
for (i = 0; i < numOutputs; i++) {
switch (outputSemanticName[i]) {
case TGSI_SEMANTIC_POSITION:
- t->outputs[i] = ureg_DECL_output( ureg,
- TGSI_SEMANTIC_POSITION, /* Z / Depth */
- outputSemanticIndex[i] );
-
- t->outputs[i] = ureg_writemask( t->outputs[i],
- TGSI_WRITEMASK_Z );
+ t->outputs[i] = ureg_DECL_output(ureg,
+ TGSI_SEMANTIC_POSITION, /* Z/Depth */
+ outputSemanticIndex[i]);
+ t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
break;
case TGSI_SEMANTIC_STENCIL:
- t->outputs[i] = ureg_DECL_output( ureg,
- TGSI_SEMANTIC_STENCIL, /* Stencil */
- outputSemanticIndex[i] );
- t->outputs[i] = ureg_writemask( t->outputs[i],
- TGSI_WRITEMASK_Y );
+ t->outputs[i] = ureg_DECL_output(ureg,
+ TGSI_SEMANTIC_STENCIL, /* Stencil */
+ outputSemanticIndex[i]);
+ t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
break;
case TGSI_SEMANTIC_COLOR:
- t->outputs[i] = ureg_DECL_output( ureg,
- TGSI_SEMANTIC_COLOR,
- outputSemanticIndex[i] );
+ t->outputs[i] = ureg_DECL_output(ureg,
+ TGSI_SEMANTIC_COLOR,
+ outputSemanticIndex[i]);
break;
default:
- debug_assert(0);
+ assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
return PIPE_ERROR_BAD_INPUT;
}
}
}
for (i = 0; i < numOutputs; i++) {
- t->outputs[i] = ureg_DECL_output( ureg,
- outputSemanticName[i],
- outputSemanticIndex[i] );
+ t->outputs[i] = ureg_DECL_output(ureg,
+ outputSemanticName[i],
+ outputSemanticIndex[i]);
}
}
else {
}
for (i = 0; i < numOutputs; i++) {
- t->outputs[i] = ureg_DECL_output( ureg,
- outputSemanticName[i],
- outputSemanticIndex[i] );
+ t->outputs[i] = ureg_DECL_output(ureg,
+ outputSemanticName[i],
+ outputSemanticIndex[i]);
if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
/* Writing to the point size result register requires special
* handling to implement clamping.
unsigned pointSizeClampConst =
_mesa_add_state_reference(proginfo->Parameters,
pointSizeClampState);
- struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg );
- t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst );
+ struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg);
+ t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst);
t->pointSizeResult = t->outputs[i];
t->pointSizeOutIndex = i;
t->outputs[i] = psizregtemp;
/* Declare address register.
*/
if (program->num_address_regs > 0) {
- debug_assert( program->num_address_regs == 1 );
- t->address[0] = ureg_DECL_address( ureg );
+ assert(program->num_address_regs == 1);
+ t->address[0] = ureg_DECL_address(ureg);
}
/* Declare misc input registers
*/
for (i = 0; i < (unsigned)program->next_temp; i++) {
/* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
- t->temps[i] = ureg_DECL_temporary( t->ureg );
+ t->temps[i] = ureg_DECL_temporary(t->ureg);
}
}
- /* Emit constants and immediates. Mesa uses a single index space
- * for these, so we put all the translated regs in t->constants.
- * XXX: this entire if block depends on proginfo->Parameters from Mesa IR
+ /* Emit constants and uniforms. TGSI uses a single index space for these,
+ * so we put all the translated regs in t->constants.
*/
if (proginfo->Parameters) {
- t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] );
+ t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0]));
if (t->constants == NULL) {
ret = PIPE_ERROR_OUT_OF_MEMORY;
goto out;
case PROGRAM_STATE_VAR:
case PROGRAM_NAMED_PARAM:
case PROGRAM_UNIFORM:
- t->constants[i] = ureg_DECL_constant( ureg, i );
+ t->constants[i] = ureg_DECL_constant(ureg, i);
break;
- /* Emit immediates only when there's no indirect addressing of
- * the const buffer.
- * FIXME: Be smarter and recognize param arrays:
- * indirect addressing is only valid within the referenced
- * array.
- */
+ /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
+ * addressing of the const buffer.
+ * FIXME: Be smarter and recognize param arrays:
+ * indirect addressing is only valid within the referenced
+ * array.
+ */
case PROGRAM_CONSTANT:
if (program->indirect_addr_consts)
- t->constants[i] = ureg_DECL_constant( ureg, i );
+ t->constants[i] = ureg_DECL_constant(ureg, i);
else
- switch(proginfo->Parameters->Parameters[i].DataType)
- {
- case GL_FLOAT:
- case GL_FLOAT_VEC2:
- case GL_FLOAT_VEC3:
- case GL_FLOAT_VEC4:
- t->constants[i] = ureg_DECL_immediate(ureg, (float *)proginfo->Parameters->ParameterValues[i], 4);
- break;
- case GL_INT:
- case GL_INT_VEC2:
- case GL_INT_VEC3:
- case GL_INT_VEC4:
- t->constants[i] = ureg_DECL_immediate_int(ureg, (int *)proginfo->Parameters->ParameterValues[i], 4);
- break;
- case GL_UNSIGNED_INT:
- case GL_UNSIGNED_INT_VEC2:
- case GL_UNSIGNED_INT_VEC3:
- case GL_UNSIGNED_INT_VEC4:
- case GL_BOOL:
- case GL_BOOL_VEC2:
- case GL_BOOL_VEC3:
- case GL_BOOL_VEC4:
- t->constants[i] = ureg_DECL_immediate_uint(ureg, (unsigned *)proginfo->Parameters->ParameterValues[i], 4);
- break;
- default:
- assert(!"should not get here");
- }
+ t->constants[i] = emit_immediate(t,
+ proginfo->Parameters->ParameterValues[i],
+ proginfo->Parameters->Parameters[i].DataType,
+ 4);
break;
default:
break;
}
}
}
+
+ /* Emit immediate values.
+ */
+ t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src));
+ if (t->immediates == NULL) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
+ goto out;
+ }
+ i = 0;
+ foreach_iter(exec_list_iterator, iter, program->immediates) {
+ immediate_storage *imm = (immediate_storage *)iter.get();
+ t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
+ }
/* texture samplers */
for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
if (program->samplers_used & (1 << i)) {
- t->samplers[i] = ureg_DECL_sampler( ureg, i );
+ t->samplers[i] = ureg_DECL_sampler(ureg, i);
}
}
/* Emit each instruction in turn:
*/
foreach_iter(exec_list_iterator, iter, program->instructions) {
- set_insn_start( t, ureg_get_instruction_number( ureg ));
- compile_tgsi_instruction( t, (glsl_to_tgsi_instruction *)iter.get() );
+ set_insn_start(t, ureg_get_instruction_number(ureg));
+ compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get());
if (t->prevInstWrotePointSize && proginfo->Id) {
/* The previous instruction wrote to the (fake) vertex point size
* Note that we can't do this easily at the end of program due to
* possible early return.
*/
- set_insn_start( t, ureg_get_instruction_number( ureg ));
- ureg_MAX( t->ureg,
- ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
- ureg_src(t->outputs[t->pointSizeOutIndex]),
- ureg_swizzle(t->pointSizeConst, 1,1,1,1));
- ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
- ureg_src(t->outputs[t->pointSizeOutIndex]),
- ureg_swizzle(t->pointSizeConst, 2,2,2,2));
+ set_insn_start(t, ureg_get_instruction_number(ureg));
+ ureg_MAX(t->ureg,
+ ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
+ ureg_src(t->outputs[t->pointSizeOutIndex]),
+ ureg_swizzle(t->pointSizeConst, 1,1,1,1));
+ ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
+ ureg_src(t->outputs[t->pointSizeOutIndex]),
+ ureg_swizzle(t->pointSizeConst, 2,2,2,2));
}
t->prevInstWrotePointSize = GL_FALSE;
}
/* Fix up all emitted labels:
*/
for (i = 0; i < t->labels_count; i++) {
- ureg_fixup_label( ureg,
- t->labels[i].token,
- t->insn[t->labels[i].branch_target] );
+ ureg_fixup_label(ureg, t->labels[i].token,
+ t->insn[t->labels[i].branch_target]);
}
out:
FREE(t->insn);
FREE(t->labels);
FREE(t->constants);
+ FREE(t->immediates);
if (t->error) {
debug_printf("%s: translate error flag set\n", __FUNCTION__);
struct gl_program *prog;
GLenum target;
const char *target_string;
- GLboolean progress;
+ bool progress;
struct gl_shader_compiler_options *options =
&ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
if (!prog)
return NULL;
prog->Parameters = _mesa_new_parameter_list();
- prog->Varying = _mesa_new_parameter_list();
- prog->Attributes = _mesa_new_parameter_list();
v->ctx = ctx;
v->prog = prog;
v->shader_program = shader_program;
v->options = options;
v->glsl_version = ctx->Const.GLSLVersion;
+ v->native_integers = ctx->Const.NativeIntegers;
add_uniforms_to_parameters_list(shader_program, shader, prog);
v->remove_output_reads(PROGRAM_OUTPUT);
if (target == GL_VERTEX_PROGRAM_ARB)
v->remove_output_reads(PROGRAM_VARYING);
+
+ /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
+ v->simplify_cmp();
+ v->copy_propagate();
+ while (v->eliminate_dead_code_advanced());
- /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor.
- * FIXME: These passes to optimize temporary registers don't work when there
+ /* FIXME: These passes to optimize temporary registers don't work when there
* is indirect addressing of the temporary register space. We need proper
* array support so that we don't have to give up these passes in every
* shader that uses arrays.
*/
if (!v->indirect_addr_temps) {
- v->copy_propagate();
- v->merge_registers();
v->eliminate_dead_code();
+ v->merge_registers();
v->renumber_registers();
}
/* Lowering */
do_mat_op_to_vec(ir);
lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
- | LOG_TO_LOG2
+ | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
| ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
- progress = lower_quadop_vector(ir, true) || progress;
+ progress = lower_quadop_vector(ir, false) || progress;
- if (options->EmitNoIfs) {
+ if (options->MaxIfDepth == 0)
progress = lower_discard(ir) || progress;
- progress = lower_if_to_cond_assign(ir) || progress;
- }
+
+ progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
if (options->EmitNoNoise)
progress = lower_noise(ir) || progress;
linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
if (linked_prog) {
- bool ok = true;
-
- switch (prog->_LinkedShaders[i]->Type) {
- case GL_VERTEX_SHADER:
- _mesa_reference_vertprog(ctx, &prog->VertexProgram,
- (struct gl_vertex_program *)linked_prog);
- ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
- linked_prog);
- break;
- case GL_FRAGMENT_SHADER:
- _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
- (struct gl_fragment_program *)linked_prog);
- ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
- linked_prog);
- break;
- case GL_GEOMETRY_SHADER:
- _mesa_reference_geomprog(ctx, &prog->GeometryProgram,
- (struct gl_geometry_program *)linked_prog);
- ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
- linked_prog);
- break;
- }
- if (!ok) {
+ static const GLenum targets[] = {
+ GL_VERTEX_PROGRAM_ARB,
+ GL_FRAGMENT_PROGRAM_ARB,
+ GL_GEOMETRY_PROGRAM_NV
+ };
+
+ _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
+ linked_prog);
+ if (!ctx->Driver.ProgramStringNotify(ctx, targets[i], linked_prog)) {
+ _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
+ NULL);
+ _mesa_reference_program(ctx, &linked_prog, NULL);
return GL_FALSE;
}
}
return GL_TRUE;
}
-
-/**
- * Link a GLSL shader program. Called via glLinkProgram().
- */
-void
-st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
-{
- unsigned int i;
-
- _mesa_clear_shader_program_data(ctx, prog);
-
- prog->LinkStatus = GL_TRUE;
-
- for (i = 0; i < prog->NumShaders; i++) {
- if (!prog->Shaders[i]->CompileStatus) {
- fail_link(prog, "linking with uncompiled shader");
- prog->LinkStatus = GL_FALSE;
- }
- }
-
- prog->Varying = _mesa_new_parameter_list();
- _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
- _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
- _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
-
- if (prog->LinkStatus) {
- link_shaders(ctx, prog);
- }
-
- if (prog->LinkStatus) {
- if (!ctx->Driver.LinkShader(ctx, prog)) {
- prog->LinkStatus = GL_FALSE;
- }
- }
-
- set_uniform_initializers(ctx, prog);
-
- if (ctx->Shader.Flags & GLSL_DUMP) {
- if (!prog->LinkStatus) {
- printf("GLSL shader program %d failed to link\n", prog->Name);
- }
-
- if (prog->InfoLog && prog->InfoLog[0] != 0) {
- printf("GLSL shader program %d info log:\n", prog->Name);
- printf("%s\n", prog->InfoLog);
- }
- }
-}
-
} /* extern "C" */