bool indirect_addr_consts;
int glsl_version;
+ bool native_integers;
variable_storage *find_variable_storage(ir_variable *var);
/**
* Emit the correct dot-product instruction for the type of arguments
*/
- void emit_dp(ir_instruction *ir,
- st_dst_reg dst,
- st_src_reg src0,
- st_src_reg src1,
- unsigned elements);
+ glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
+ st_dst_reg dst,
+ st_src_reg src0,
+ st_src_reg src1,
+ unsigned elements);
void emit_scalar(ir_instruction *ir, unsigned op,
st_dst_reg dst, st_src_reg src0);
if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
type = GLSL_TYPE_FLOAT;
- else if (glsl_version >= 130)
+ else if (native_integers)
type = src0.type;
#define case4(c, f, i, u) \
return op;
}
-void
+glsl_to_tgsi_instruction *
glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
st_dst_reg dst, st_src_reg src0, st_src_reg src1,
unsigned elements)
TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
};
- emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+ return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
}
/**
st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
union gl_constant_value uval;
- assert(glsl_version >= 130);
+ assert(native_integers);
uval.i = val;
src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
struct st_src_reg
glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
{
- if (glsl_version >= 130)
+ if (native_integers)
return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
st_src_reg_for_int(val);
else
{
st_src_reg src;
- src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT;
+ src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
src.file = PROGRAM_TEMPORARY;
src.index = next_temp;
src.reladdr = NULL;
this->next_temp += type_size(ir->type);
dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
- glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT));
+ native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
}
}
} else {
st_src_reg src(PROGRAM_STATE_VAR, index,
- glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT);
+ native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT);
src.swizzle = slots[i].swizzle;
emit(ir, TGSI_OPCODE_MOV, dst, src);
/* even a float takes up a whole vec4 reg in a struct/array. */
switch (ir->operation) {
case ir_unop_logic_not:
- emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
+ if (result_dst.type != GLSL_TYPE_FLOAT)
+ emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
+ else {
+ /* Previously 'SEQ dst, src, 0.0' was used for this. However, many
+ * older GPUs implement SEQ using multiple instructions (i915 uses two
+ * SGE instructions and a MUL instruction). Since our logic values are
+ * 0.0 and 1.0, 1-x also implements !x.
+ */
+ op[0].negate = ~op[0].negate;
+ emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
+ }
break;
case ir_unop_neg:
assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT);
/* "==" operator producing a scalar boolean. */
if (ir->operands[0]->type->is_vector() ||
ir->operands[1]->type->is_vector()) {
- st_src_reg temp = get_temp(glsl_version >= 130 ?
+ st_src_reg temp = get_temp(native_integers ?
glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
glsl_type::vec4_type);
assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
/* "!=" operator producing a scalar boolean. */
if (ir->operands[0]->type->is_vector() ||
ir->operands[1]->type->is_vector()) {
- st_src_reg temp = get_temp(glsl_version >= 130 ?
+ st_src_reg temp = get_temp(native_integers ?
glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
glsl_type::vec4_type);
assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
}
break;
- case ir_unop_any:
+ case ir_unop_any: {
assert(ir->operands[0]->type->is_vector());
- emit_dp(ir, result_dst, op[0], op[0],
- ir->operands[0]->type->vector_elements);
- emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero stays zero, and positive values become 1.0.
+ */
+ glsl_to_tgsi_instruction *const dp =
+ emit_dp(ir, result_dst, op[0], op[0],
+ ir->operands[0]->type->vector_elements);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+ result_dst.type == GLSL_TYPE_FLOAT) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate.
+ */
+ dp->saturate = true;
+ } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero stays zero, and negative values become 1.0. This
+ * is achieved using SLT.
+ */
+ st_src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+ }
+ else {
+ /* Use SNE 0 if integers are being used as boolean values. */
+ emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+ }
break;
+ }
case ir_binop_logic_xor:
emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
break;
- case ir_binop_logic_or:
- /* This could be a saturated add and skip the SNE. */
- emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
- emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+ case ir_binop_logic_or: {
+ /* After the addition, the value will be an integer on the
+ * range [0,2]. Zero stays zero, and positive values become 1.0.
+ */
+ glsl_to_tgsi_instruction *add =
+ emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+ result_dst.type == GLSL_TYPE_FLOAT) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate if floats are being used as boolean values.
+ */
+ add->saturate = true;
+ } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+ /* Negating the result of the addition gives values on the range
+ * [-2, 0]. Zero stays zero, and negative values become 1.0. This
+ * is achieved using SLT.
+ */
+ st_src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+ } else {
+ /* Use an SNE on the result of the addition. Zero stays zero,
+ * 1 stays 1, and 2 becomes 1.
+ */
+ emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+ }
break;
+ }
case ir_binop_logic_and:
/* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
break;
case ir_unop_i2f:
case ir_unop_b2f:
- if (glsl_version >= 130) {
+ if (native_integers) {
emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
break;
}
result_src = op[0];
break;
case ir_unop_f2i:
- if (glsl_version >= 130)
+ if (native_integers)
emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
else
emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
break;
}
case ir_unop_u2f:
- if (glsl_version >= 130) {
+ if (native_integers) {
emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
break;
}
}
this->result = st_src_reg(entry->file, entry->index, var->type);
- if (glsl_version <= 120)
+ if (!native_integers)
this->result.type = GLSL_TYPE_FLOAT;
}
} else if (ir->rhs->as_expression() &&
this->instructions.get_tail() &&
ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
- type_size(ir->lhs->type) == 1) {
+ type_size(ir->lhs->type) == 1 &&
+ l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) {
/* To avoid emitting an extra MOV when assigning an expression to a
* variable, emit the last instruction of the expression again, but
* replace the destination register with the target of the assignment.
* Dead code elimination will remove the original instruction.
*/
- glsl_to_tgsi_instruction *inst;
+ glsl_to_tgsi_instruction *inst, *new_inst;
inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
- emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
+ new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
+ new_inst->saturate = inst->saturate;
} else {
for (i = 0; i < type_size(ir->lhs->type); i++) {
emit(ir, TGSI_OPCODE_MOV, l, r);
}
break;
case GLSL_TYPE_UINT:
- gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT;
+ gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
for (i = 0; i < ir->type->vector_elements; i++) {
- if (glsl_version >= 130)
+ if (native_integers)
values[i].u = ir->value.u[i];
else
values[i].f = ir->value.u[i];
}
break;
case GLSL_TYPE_INT:
- gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT;
+ gl_type = native_integers ? GL_INT : GL_FLOAT;
for (i = 0; i < ir->type->vector_elements; i++) {
- if (glsl_version >= 130)
+ if (native_integers)
values[i].i = ir->value.i[i];
else
values[i].f = ir->value.i[i];
}
break;
case GLSL_TYPE_BOOL:
- gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT;
+ gl_type = native_integers ? GL_BOOL : GL_FLOAT;
for (i = 0; i < ir->type->vector_elements; i++) {
- if (glsl_version >= 130)
+ if (native_integers)
values[i].b = ir->value.b[i];
else
values[i].f = ir->value.b[i];
/* Continuing the block, clear any channels from the write array that
* are read by this instruction.
*/
- for (int i = 0; i < 4; i++) {
+ for (unsigned i = 0; i < Elements(inst->src); i++) {
if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
/* Any temporary might be read, so no dead code elimination
* across this instruction.
v->ctx = original->ctx;
v->prog = prog;
v->glsl_version = original->glsl_version;
+ v->native_integers = original->native_integers;
v->options = original->options;
v->next_temp = original->next_temp;
v->num_address_regs = original->num_address_regs;
v->ctx = original->ctx;
v->prog = prog;
v->glsl_version = original->glsl_version;
+ v->native_integers = original->native_integers;
v->options = original->options;
v->next_temp = original->next_temp;
v->num_address_regs = original->num_address_regs;
v->shader_program = shader_program;
v->options = options;
v->glsl_version = ctx->Const.GLSLVersion;
+ v->native_integers = ctx->Const.NativeIntegers;
add_uniforms_to_parameters_list(shader_program, shader, prog);