(1 << PROGRAM_CONSTANT) | \
(1 << PROGRAM_UNIFORM))
-/**
- * Maximum number of temporary registers.
- *
- * It is too big for stack allocated arrays -- it will cause stack overflow on
- * Windows and likely Mac OS X.
- */
-#define MAX_TEMPS 4096
-
/**
* Maximum number of arrays
*/
int mul_operand);
bool try_emit_mad_for_and_not(ir_expression *ir,
int mul_operand);
- bool try_emit_sat(ir_expression *ir);
void emit_swz(ir_expression *ir);
if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
unsigned int i;
- const ir_state_slot *const slots = ir->state_slots;
- assert(ir->state_slots != NULL);
+ const ir_state_slot *const slots = ir->get_state_slots();
+ assert(slots != NULL);
/* Check if this statevar's setup in the STATE file exactly
* matches how we'll want to reference it as a
* temporary storage and hope that it'll get copy-propagated
* out.
*/
- for (i = 0; i < ir->num_state_slots; i++) {
+ for (i = 0; i < ir->get_num_state_slots(); i++) {
if (slots[i].swizzle != SWIZZLE_XYZW) {
break;
}
variable_storage *storage;
st_dst_reg dst;
- if (i == ir->num_state_slots) {
+ if (i == ir->get_num_state_slots()) {
/* We'll set the index later. */
storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
this->variables.push_tail(storage);
* of the type. However, this had better match the number of state
* elements that we're going to copy into the new temporary.
*/
- assert((int) ir->num_state_slots == type_size(ir->type));
+ assert((int) ir->get_num_state_slots() == type_size(ir->type));
dst = st_dst_reg(get_temp(ir->type));
}
- for (unsigned int i = 0; i < ir->num_state_slots; i++) {
+ for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
int index = _mesa_add_state_reference(this->prog->Parameters,
(gl_state_index *)slots[i].tokens);
}
if (storage->file == PROGRAM_TEMPORARY &&
- dst.index != storage->index + (int) ir->num_state_slots) {
+ dst.index != storage->index + (int) ir->get_num_state_slots()) {
fail_link(this->shader_program,
"failed to load builtin uniform `%s' (%d/%d regs loaded)\n",
ir->name, dst.index - storage->index,
return true;
}
-bool
-glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
-{
- /* Emit saturates in the vertex shader only if SM 3.0 is supported.
- */
- if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
- !st_context(this->ctx)->has_shader_model3) {
- return false;
- }
-
- ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
- if (!sat_src)
- return false;
-
- sat_src->accept(this);
- st_src_reg src = this->result;
-
- /* If we generated an expression instruction into a temporary in
- * processing the saturate's operand, apply the saturate to that
- * instruction. Otherwise, generate a MOV to do the saturate.
- *
- * Note that we have to be careful to only do this optimization if
- * the instruction in question was what generated src->result. For
- * example, ir_dereference_array might generate a MUL instruction
- * to create the reladdr, and return us a src reg using that
- * reladdr. That MUL result is not the value we're trying to
- * saturate.
- */
- ir_expression *sat_src_expr = sat_src->as_expression();
- if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
- sat_src_expr->operation == ir_binop_add ||
- sat_src_expr->operation == ir_binop_dot)) {
- glsl_to_tgsi_instruction *new_inst;
- new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
- new_inst->saturate = true;
- } else {
- this->result = get_temp(ir->type);
- st_dst_reg result_dst = st_dst_reg(this->result);
- result_dst.writemask = (1 << ir->type->vector_elements) - 1;
- glsl_to_tgsi_instruction *inst;
- inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
- inst->saturate = true;
- }
-
- return true;
-}
-
void
glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
st_src_reg *reg, int *num_reladdr)
return;
}
- if (try_emit_sat(ir))
- return;
-
if (ir->operation == ir_quadop_vector)
assert(!"ir_quadop_vector should have been lowered");
case ir_unop_cos_reduced:
emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
break;
+ case ir_unop_saturate: {
+ glsl_to_tgsi_instruction *inst;
+ inst = emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
+ inst->saturate = true;
+ break;
+ }
case ir_unop_dFdx:
case ir_unop_dFdx_coarse:
case GLSL_TYPE_BOOL:
gl_type = native_integers ? GL_BOOL : GL_FLOAT;
for (i = 0; i < ir->type->vector_elements; i++) {
- if (native_integers)
- values[i].u = ir->value.b[i] ? ~0 : 0;
- else
- values[i].f = ir->value.b[i];
+ values[i].u = ir->value.b[i] ? ctx->Const.UniformBooleanTrue : 0;
}
break;
default:
{
if (ir->condition) {
ir->condition->accept(this);
- this->result.negate = ~this->result.negate;
- emit(ir, TGSI_OPCODE_KILL_IF, undef_dst, this->result);
+ st_src_reg condition = this->result;
+
+ /* Convert the bool condition to a float so we can negate. */
+ if (native_integers) {
+ st_src_reg temp = get_temp(ir->condition->type);
+ emit(ir, TGSI_OPCODE_AND, st_dst_reg(temp),
+ condition, st_src_reg_for_float(1.0));
+ condition = temp;
+ }
+
+ condition.negate = ~condition.negate;
+ emit(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition);
} else {
/* unconditional kil */
emit(ir, TGSI_OPCODE_KILL);
shader_program = NULL;
shader = NULL;
options = NULL;
+ have_sqrt = false;
}
glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
void
glsl_to_tgsi_visitor::simplify_cmp(void)
{
- unsigned *tempWrites;
+ int tempWritesSize = 0;
+ unsigned *tempWrites = NULL;
unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
- tempWrites = new unsigned[MAX_TEMPS];
- if (!tempWrites) {
- return;
- }
- memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS);
memset(outputWrites, 0, sizeof(outputWrites));
foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
prevWriteMask = outputWrites[inst->dst.index];
outputWrites[inst->dst.index] |= inst->dst.writemask;
} else if (inst->dst.file == PROGRAM_TEMPORARY) {
- assert(inst->dst.index < MAX_TEMPS);
+ if (inst->dst.index >= tempWritesSize) {
+ const int inc = 4096;
+
+ tempWrites = (unsigned*)
+ realloc(tempWrites,
+ (tempWritesSize + inc) * sizeof(unsigned));
+ if (!tempWrites)
+ return;
+
+ memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned));
+ tempWritesSize += inc;
+ }
+
prevWriteMask = tempWrites[inst->dst.index];
tempWrites[inst->dst.index] |= inst->dst.writemask;
} else
}
}
- delete [] tempWrites;
+ free(tempWrites);
}
/* Replaces all references to a temporary register index with another index. */
struct st_translate {
struct ureg_program *ureg;
- struct ureg_dst temps[MAX_TEMPS];
+ unsigned temps_size;
+ struct ureg_dst *temps;
+
struct ureg_dst arrays[MAX_ARRAYS];
struct ureg_src *constants;
struct ureg_src *immediates;
*/
TGSI_SEMANTIC_VERTEXID,
TGSI_SEMANTIC_INSTANCEID,
+ 0,
+ 0,
/* Geometry shader
*/
return ureg_dst_undef();
case PROGRAM_TEMPORARY:
- assert(index < Elements(t->temps));
+ /* Allocate space for temporaries on demand. */
+ if (index >= t->temps_size) {
+ const int inc = 4096;
+
+ t->temps = (struct ureg_dst*)
+ realloc(t->temps,
+ (t->temps_size + inc) * sizeof(struct ureg_dst));
+ if (!t->temps)
+ return ureg_dst_undef();
+
+ memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst));
+ t->temps_size += inc;
+ }
if (ureg_dst_is_undef(t->temps[index]))
t->temps[index] = ureg_DECL_local_temporary(t->ureg);
* saturating the value to [0,1] does the job.
*/
static void
-emit_face_var(struct st_translate *t)
+emit_face_var(struct gl_context *ctx, struct st_translate *t)
{
struct ureg_program *ureg = t->ureg;
struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]];
- /* MOV_SAT face_temp, input[face] */
- face_temp = ureg_saturate(face_temp);
- ureg_MOV(ureg, face_temp, face_input);
+ if (ctx->Const.NativeIntegers) {
+ ureg_FSGE(ureg, face_temp, face_input, ureg_imm1f(ureg, 0));
+ }
+ else {
+ /* MOV_SAT face_temp, input[face] */
+ ureg_MOV(ureg, ureg_saturate(face_temp), face_input);
+ }
/* Use face_temp as face input from here on: */
t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
}
if (proginfo->InputsRead & VARYING_BIT_FACE)
- emit_face_var(t);
+ emit_face_var(ctx, t);
/*
* Declare output attributes.
out:
if (t) {
+ free(t->temps);
free(t->insn);
free(t->labels);
free(t->constants);
v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
+ _mesa_copy_linked_program_data(shader->Stage, shader_program, prog);
_mesa_generate_parameters_list_for_uniforms(shader_program, shader,
prog->Parameters);
case GL_GEOMETRY_SHADER:
stgp = (struct st_geometry_program *)prog;
stgp->glsl_to_tgsi = v;
- stgp->Base.InputType = shader_program->Geom.InputType;
- stgp->Base.OutputType = shader_program->Geom.OutputType;
- stgp->Base.VerticesOut = shader_program->Geom.VerticesOut;
- stgp->Base.Invocations = shader_program->Geom.Invocations;
break;
default:
assert(!"should not be reached");
extern "C" {
-struct gl_shader *
-st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
-{
- struct gl_shader *shader;
- assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
- type == GL_GEOMETRY_SHADER_ARB);
- shader = rzalloc(NULL, struct gl_shader);
- if (shader) {
- shader->Type = type;
- shader->Stage = _mesa_shader_enum_to_shader_stage(type);
- shader->Name = name;
- _mesa_init_shader(ctx, shader);
- }
- return shader;
-}
-
-struct gl_shader_program *
-st_new_shader_program(struct gl_context *ctx, GLuint name)
-{
- struct gl_shader_program *shProg;
- shProg = rzalloc(NULL, struct gl_shader_program);
- if (shProg) {
- shProg->Name = name;
- _mesa_init_shader_program(ctx, shProg);
- }
- return shProg;
-}
-
/**
* Link a shader.
* Called via ctx->Driver.LinkShader()