#include "ir_optimization.h"
#include "ast.h"
-extern "C" {
#include "main/mtypes.h"
-#include "main/shaderapi.h"
#include "main/shaderobj.h"
-#include "main/uniforms.h"
#include "program/hash_table.h"
+
+extern "C" {
+#include "main/shaderapi.h"
+#include "main/uniforms.h"
#include "program/prog_instruction.h"
#include "program/prog_optimize.h"
#include "program/prog_print.h"
#include "program/program.h"
-#include "program/prog_uniform.h"
#include "program/prog_parameter.h"
#include "program/sampler.h"
(1 << PROGRAM_CONSTANT) | \
(1 << PROGRAM_UNIFORM))
+/**
+ * Maximum number of temporary registers.
+ *
+ * It is too big for stack allocated arrays -- it will cause stack overflow on
+ * Windows and likely Mac OS X.
+ */
#define MAX_TEMPS 4096
/* will be 4 for GLSL 4.00 */
int samplers_used;
bool indirect_addr_temps;
bool indirect_addr_consts;
+ int num_clip_distances;
int glsl_version;
bool native_integers;
/** List of immediate_storage */
exec_list immediates;
- int num_immediates;
+ unsigned num_immediates;
/** List of function_entry */
exec_list function_signatures;
bool process_move_condition(ir_rvalue *ir);
- void remove_output_reads(gl_register_file type);
void simplify_cmp(void);
void rename_temp_register(int index, int new_index);
case3(SLT, ISLT, USLT);
case2iu(ISHR, USHR);
+
+ case2fi(SSG, ISSG);
+ case3(ABS, IABS, IABS);
default: break;
}
fp->OriginUpperLeft = ir->origin_upper_left;
fp->PixelCenterInteger = ir->pixel_center_integer;
-
- } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
- struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
- switch (ir->depth_layout) {
- case ir_depth_layout_none:
- fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
- break;
- case ir_depth_layout_any:
- fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
- break;
- case ir_depth_layout_greater:
- fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
- break;
- case ir_depth_layout_less:
- fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
- break;
- case ir_depth_layout_unchanged:
- fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
- break;
- default:
- assert(0);
- break;
- }
}
if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
}
break;
case ir_unop_neg:
- assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT);
- if (result_dst.type == GLSL_TYPE_INT)
+ if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
else {
op[0].negate = ~op[0].negate;
}
break;
case ir_unop_abs:
- assert(result_dst.type == GLSL_TYPE_FLOAT);
emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
break;
case ir_unop_sign:
case ir_unop_floor:
emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
break;
+ case ir_unop_round_even:
+ emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]);
+ break;
case ir_unop_fract:
emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
break;
}
case ir_binop_lshift:
if (native_integers) {
- emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]);
+ emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]);
break;
}
case ir_binop_rshift:
if (native_integers) {
- emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]);
+ emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]);
break;
}
case ir_binop_bit_and:
if (native_integers) {
- emit(ir, TGSI_OPCODE_AND, result_dst, op[0]);
+ emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
break;
}
case ir_binop_bit_xor:
if (native_integers) {
- emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]);
+ emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
break;
}
case ir_binop_bit_or:
if (native_integers) {
- emit(ir, TGSI_OPCODE_OR, result_dst, op[0]);
+ emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
break;
}
- case ir_unop_round_even:
+
assert(!"GLSL 1.30 features unsupported");
break;
gl_type = native_integers ? GL_BOOL : GL_FLOAT;
for (i = 0; i < ir->type->vector_elements; i++) {
if (native_integers)
- values[i].b = ir->value.b[i];
+ values[i].u = ir->value.b[i] ? ~0 : 0;
else
values[i].f = ir->value.b[i];
}
ir->shadow_comparitor->accept(this);
/* XXX This will need to be updated for cubemap array samplers. */
- if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
- sampler_type->sampler_array) {
+ if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
+ sampler_type->sampler_array) ||
+ sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
coord_dst.writemask = WRITEMASK_W;
} else {
coord_dst.writemask = WRITEMASK_Z;
case GLSL_SAMPLER_DIM_BUF:
assert(!"FINISHME: Implement ARB_texture_buffer_object");
break;
+ case GLSL_SAMPLER_DIM_EXTERNAL:
+ inst->tex_target = TEXTURE_EXTERNAL_INDEX;
+ break;
default:
assert(!"Should not get here.");
}
if (is_tex_instruction(inst->op)) {
v->samplers_used |= 1 << inst->sampler;
- prog->SamplerTargets[inst->sampler] =
- (gl_texture_index)inst->tex_target;
if (inst->tex_shadow) {
prog->ShadowSamplers |= 1 << inst->sampler;
}
}
prog->SamplersUsed = v->samplers_used;
- _mesa_update_shader_textures_used(prog);
-}
-
-
-/**
- * Check if the given vertex/fragment/shader program is within the
- * resource limits of the context (number of texture units, etc).
- * If any of those checks fail, record a linker error.
- *
- * XXX more checks are needed...
- */
-static void
-check_resources(const struct gl_context *ctx,
- struct gl_shader_program *shader_program,
- glsl_to_tgsi_visitor *prog,
- struct gl_program *proginfo)
-{
- switch (proginfo->Target) {
- case GL_VERTEX_PROGRAM_ARB:
- if (_mesa_bitcount(prog->samplers_used) >
- ctx->Const.MaxVertexTextureImageUnits) {
- fail_link(shader_program, "Too many vertex shader texture samplers");
- }
- if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
- fail_link(shader_program, "Too many vertex shader constants");
- }
- break;
- case MESA_GEOMETRY_PROGRAM:
- if (_mesa_bitcount(prog->samplers_used) >
- ctx->Const.MaxGeometryTextureImageUnits) {
- fail_link(shader_program, "Too many geometry shader texture samplers");
- }
- if (proginfo->Parameters->NumParameters >
- MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
- fail_link(shader_program, "Too many geometry shader constants");
- }
- break;
- case GL_FRAGMENT_PROGRAM_ARB:
- if (_mesa_bitcount(prog->samplers_used) >
- ctx->Const.MaxTextureImageUnits) {
- fail_link(shader_program, "Too many fragment shader texture samplers");
- }
- if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
- fail_link(shader_program, "Too many fragment shader constants");
- }
- break;
- default:
- _mesa_problem(ctx, "unexpected program type in check_resources()");
- }
-}
-
-
-
-struct uniform_sort {
- struct gl_uniform *u;
- int pos;
-};
-
-/* The shader_program->Uniforms list is almost sorted in increasing
- * uniform->{Frag,Vert}Pos locations, but not quite when there are
- * uniforms shared between targets. We need to add parameters in
- * increasing order for the targets.
- */
-static int
-sort_uniforms(const void *a, const void *b)
-{
- struct uniform_sort *u1 = (struct uniform_sort *)a;
- struct uniform_sort *u2 = (struct uniform_sort *)b;
- return u1->pos - u2->pos;
-}
-
-/* Add the uniforms to the parameters. The linker chose locations
- * in our parameters lists (which weren't created yet), which the
- * uniforms code will use to poke values into our parameters list
- * when uniforms are updated.
- */
-static void
-add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
- struct gl_shader *shader,
- struct gl_program *prog)
-{
- unsigned int i;
- unsigned int next_sampler = 0, num_uniforms = 0;
- struct uniform_sort *sorted_uniforms;
-
- sorted_uniforms = ralloc_array(NULL, struct uniform_sort,
- shader_program->Uniforms->NumUniforms);
-
- for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) {
- struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i;
- int parameter_index = -1;
-
- switch (shader->Type) {
- case GL_VERTEX_SHADER:
- parameter_index = uniform->VertPos;
- break;
- case GL_FRAGMENT_SHADER:
- parameter_index = uniform->FragPos;
- break;
- case GL_GEOMETRY_SHADER:
- parameter_index = uniform->GeomPos;
- break;
- }
-
- /* Only add uniforms used in our target. */
- if (parameter_index != -1) {
- sorted_uniforms[num_uniforms].pos = parameter_index;
- sorted_uniforms[num_uniforms].u = uniform;
- num_uniforms++;
- }
- }
-
- qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort),
- sort_uniforms);
-
- for (i = 0; i < num_uniforms; i++) {
- struct gl_uniform *uniform = sorted_uniforms[i].u;
- int parameter_index = sorted_uniforms[i].pos;
- const glsl_type *type = uniform->Type;
- unsigned int size;
-
- if (type->is_vector() ||
- type->is_scalar()) {
- size = type->vector_elements;
- } else {
- size = type_size(type) * 4;
- }
-
- gl_register_file file;
- if (type->is_sampler() ||
- (type->is_array() && type->fields.array->is_sampler())) {
- file = PROGRAM_SAMPLER;
- } else {
- file = PROGRAM_UNIFORM;
- }
-
- GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1,
- uniform->Name);
-
- if (index < 0) {
- index = _mesa_add_parameter(prog->Parameters, file,
- uniform->Name, size, type->gl_type,
- NULL, NULL, 0x0);
-
- /* Sampler uniform values are stored in prog->SamplerUnits,
- * and the entry in that array is selected by this index we
- * store in ParameterValues[].
- */
- if (file == PROGRAM_SAMPLER) {
- for (unsigned int j = 0; j < size / 4; j++)
- prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
- }
-
- /* The location chosen in the Parameters list here (returned
- * from _mesa_add_uniform) has to match what the linker chose.
- */
- if (index != parameter_index) {
- fail_link(shader_program, "Allocation of uniform `%s' to target "
- "failed (%d vs %d)\n",
- uniform->Name, index, parameter_index);
- }
- }
- }
-
- ralloc_free(sorted_uniforms);
+ if (v->shader_program != NULL)
+ _mesa_update_shader_textures_used(v->shader_program, prog);
}
static void
element_type->matrix_columns,
element_type->vector_elements,
loc, 1, GL_FALSE, (GLfloat *)values);
- loc += element_type->matrix_columns;
} else {
_mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
values, element_type->gl_type);
- loc += type_size(element_type);
- }
- }
-}
-
-/*
- * Scan/rewrite program to remove reads of custom (output) registers.
- * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING
- * (for vertex shaders).
- * In GLSL shaders, varying vars can be read and written.
- * On some hardware, trying to read an output register causes trouble.
- * So, rewrite the program to use a temporary register in this case.
- *
- * Based on _mesa_remove_output_reads from programopt.c.
- */
-void
-glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
-{
- GLuint i;
- GLint outputMap[VERT_RESULT_MAX];
- GLint outputTypes[VERT_RESULT_MAX];
- GLuint numVaryingReads = 0;
- GLboolean usedTemps[MAX_TEMPS];
- GLuint firstTemp = 0;
-
- _mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
- usedTemps, MAX_TEMPS);
-
- assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT);
- assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING);
-
- for (i = 0; i < VERT_RESULT_MAX; i++)
- outputMap[i] = -1;
-
- /* look for instructions which read from varying vars */
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
- const GLuint numSrc = num_inst_src_regs(inst->op);
- GLuint j;
- for (j = 0; j < numSrc; j++) {
- if (inst->src[j].file == type) {
- /* replace the read with a temp reg */
- const GLuint var = inst->src[j].index;
- if (outputMap[var] == -1) {
- numVaryingReads++;
- outputMap[var] = _mesa_find_free_register(usedTemps,
- MAX_TEMPS,
- firstTemp);
- outputTypes[var] = inst->src[j].type;
- firstTemp = outputMap[var] + 1;
- }
- inst->src[j].file = PROGRAM_TEMPORARY;
- inst->src[j].index = outputMap[var];
- }
}
- }
-
- if (numVaryingReads == 0)
- return; /* nothing to be done */
- /* look for instructions which write to the varying vars identified above */
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
- if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) {
- /* change inst to write to the temp reg, instead of the varying */
- inst->dst.file = PROGRAM_TEMPORARY;
- inst->dst.index = outputMap[inst->dst.index];
- }
- }
-
- /* insert new MOV instructions at the end */
- for (i = 0; i < VERT_RESULT_MAX; i++) {
- if (outputMap[i] >= 0) {
- /* MOV VAR[i], TEMP[tmp]; */
- st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]);
- st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]);
- dst.index = i;
- this->emit(NULL, TGSI_OPCODE_MOV, dst, src);
- }
+ loc++;
}
}
void
glsl_to_tgsi_visitor::simplify_cmp(void)
{
- unsigned tempWrites[MAX_TEMPS];
+ unsigned *tempWrites;
unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
- memset(tempWrites, 0, sizeof(tempWrites));
+ tempWrites = new unsigned[MAX_TEMPS];
+ if (!tempWrites) {
+ return;
+ }
+ memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS);
memset(outputWrites, 0, sizeof(outputWrites));
foreach_iter(exec_list_iterator, iter, this->instructions) {
inst->op == TGSI_OPCODE_END ||
inst->op == TGSI_OPCODE_ENDSUB ||
inst->op == TGSI_OPCODE_RET) {
- return;
+ break;
}
if (inst->dst.file == PROGRAM_OUTPUT) {
inst->src[0] = inst->src[1];
}
}
+
+ delete [] tempWrites;
}
/* Replaces all references to a temporary register index with another index. */
switch (inst->op) {
case TGSI_OPCODE_BGNLOOP:
case TGSI_OPCODE_ENDLOOP:
+ case TGSI_OPCODE_CONT:
+ case TGSI_OPCODE_BRK:
/* End of a basic block, clear the write array entirely.
- * FIXME: This keeps us from killing dead code when the writes are
+ *
+ * This keeps us from killing dead code when the writes are
* on either side of a loop, even when the register isn't touched
- * inside the loop.
+ * inside the loop. However, glsl_to_tgsi_visitor doesn't seem to emit
+ * dead code of this type, so it shouldn't make a difference as long as
+ * the dead code elimination pass in the GLSL compiler does its job.
*/
memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
break;
case TGSI_OPCODE_ENDIF:
- --level;
- break;
-
case TGSI_OPCODE_ELSE:
- /* Clear all channels written inside the preceding if block from the
- * write array, but leave those that were not touched.
- *
- * FIXME: This destroys opportunities to remove dead code inside of
- * IF blocks that are followed by an ELSE block.
+ /* Promote the recorded level of all channels written inside the
+ * preceding if or else block to the level above the if/else block.
*/
for (int r = 0; r < this->next_temp; r++) {
for (int c = 0; c < 4; c++) {
if (!writes[4 * r + c])
continue;
- if (write_level[4 * r + c] >= level)
- writes[4 * r + c] = NULL;
+ if (write_level[4 * r + c] == level)
+ write_level[4 * r + c] = level-1;
}
}
+
+ if(inst->op == TGSI_OPCODE_ENDIF)
+ --level;
+
break;
case TGSI_OPCODE_IF:
if (!inst->dead_mask || !inst->dst.writemask)
continue;
- else if (inst->dead_mask == inst->dst.writemask) {
+ else if ((inst->dst.writemask & ~inst->dead_mask) == 0) {
iter.remove();
delete inst;
removed++;
/* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
v->ctx = original->ctx;
v->prog = prog;
+ v->shader_program = NULL;
v->glsl_version = original->glsl_version;
v->native_integers = original->native_integers;
v->options = original->options;
v->indirect_addr_temps = original->indirect_addr_temps;
v->indirect_addr_consts = original->indirect_addr_consts;
memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
+ v->num_immediates = original->num_immediates;
/*
* Get initial pixel color from the texture.
inst->sampler = 0;
inst->tex_target = TEXTURE_2D_INDEX;
- prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+ prog->InputsRead |= FRAG_BIT_TEX0;
prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
v->samplers_used |= (1 << 0);
* new visitor. */
foreach_iter(exec_list_iterator, iter, original->instructions) {
glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ glsl_to_tgsi_instruction *newinst;
st_src_reg src_regs[3];
if (inst->dst.file == PROGRAM_OUTPUT)
src_regs[i].index = src0.index;
}
else if (src_regs[i].file == PROGRAM_INPUT)
- prog->InputsRead |= (1 << src_regs[i].index);
+ prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
}
- v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+ newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+ newinst->tex_target = inst->tex_target;
}
/* Make modifications to fragment program info. */
/* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
v->ctx = original->ctx;
v->prog = prog;
+ v->shader_program = NULL;
v->glsl_version = original->glsl_version;
v->native_integers = original->native_integers;
v->options = original->options;
v->indirect_addr_temps = original->indirect_addr_temps;
v->indirect_addr_consts = original->indirect_addr_consts;
memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
+ v->num_immediates = original->num_immediates;
/* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
inst->sampler = samplerIndex;
inst->tex_target = TEXTURE_2D_INDEX;
- prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+ prog->InputsRead |= FRAG_BIT_TEX0;
prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
v->samplers_used |= (1 << samplerIndex);
* new visitor. */
foreach_iter(exec_list_iterator, iter, original->instructions) {
glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ glsl_to_tgsi_instruction *newinst;
st_src_reg src_regs[3];
if (inst->dst.file == PROGRAM_OUTPUT)
for (int i=0; i<3; i++) {
src_regs[i] = inst->src[i];
if (src_regs[i].file == PROGRAM_INPUT)
- prog->InputsRead |= (1 << src_regs[i].index);
+ prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
}
- v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+ newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+ newinst->tex_target = inst->tex_target;
}
/* Make modifications to fragment program info. */
struct ureg_src samplers[PIPE_MAX_SAMPLERS];
struct ureg_src systemValues[SYSTEM_VALUE_MAX];
- /* Extra info for handling point size clamping in vertex shader */
- struct ureg_dst pointSizeResult; /**< Actual point size output register */
- struct ureg_src pointSizeConst; /**< Point size range constant register */
- GLint pointSizeOutIndex; /**< Temp point size output register */
- GLboolean prevInstWrotePointSize;
-
const GLuint *inputMapping;
const GLuint *outputMapping;
/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
TGSI_SEMANTIC_FACE,
+ TGSI_SEMANTIC_VERTEXID,
TGSI_SEMANTIC_INSTANCEID
};
return t->temps[index];
case PROGRAM_OUTPUT:
- if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ)
- t->prevInstWrotePointSize = GL_TRUE;
-
if (t->procType == TGSI_PROCESSOR_VERTEX)
assert(index < VERT_RESULT_MAX);
else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
static struct ureg_dst
translate_dst(struct st_translate *t,
const st_dst_reg *dst_reg,
- bool saturate)
+ bool saturate, bool clamp_color)
{
struct ureg_dst dst = dst_register(t,
dst_reg->file,
if (saturate)
dst = ureg_saturate(dst);
+ else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) {
+ /* Clamp colors for ARB_color_buffer_float. */
+ switch (t->procType) {
+ case TGSI_PROCESSOR_VERTEX:
+ /* XXX if the geometry shader is present, this must be done there
+ * instead of here. */
+ if (dst_reg->index == VERT_RESULT_COL0 ||
+ dst_reg->index == VERT_RESULT_COL1 ||
+ dst_reg->index == VERT_RESULT_BFC0 ||
+ dst_reg->index == VERT_RESULT_BFC1) {
+ dst = ureg_saturate(dst);
+ }
+ break;
+
+ case TGSI_PROCESSOR_FRAGMENT:
+ if (dst_reg->index >= FRAG_RESULT_COLOR) {
+ dst = ureg_saturate(dst);
+ }
+ break;
+ }
+ }
if (dst_reg->reladdr != NULL)
dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
static void
compile_tgsi_instruction(struct st_translate *t,
- const glsl_to_tgsi_instruction *inst)
+ const glsl_to_tgsi_instruction *inst,
+ bool clamp_dst_color_output)
{
struct ureg_program *ureg = t->ureg;
GLuint i;
if (num_dst)
dst[0] = translate_dst(t,
&inst->dst,
- inst->saturate);
+ inst->saturate,
+ clamp_dst_color_output);
for (i = 0; i < num_src; i++)
src[i] = translate_src(t, &inst->src[i]);
ureg_tex_insn(ureg,
inst->op,
dst, num_dst,
- translate_texture_target(inst->tex_target, inst->tex_shadow),
+ st_translate_texture_target(inst->tex_target, inst->tex_shadow),
texoffsets, inst->tex_offset_num_offset,
src, num_src);
return;
}
/**
- * Emit the TGSI instructions to adjust the WPOS pixel center convention
- * Basically, add (adjX, adjY) to the fragment position.
- */
-static void
-emit_adjusted_wpos(struct st_translate *t,
- const struct gl_program *program,
- float adjX, float adjY)
-{
- struct ureg_program *ureg = t->ureg;
- struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
- struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
-
- /* Note that we bias X and Y and pass Z and W through unchanged.
- * The shader might also use gl_FragCoord.w and .z.
- */
- ureg_ADD(ureg, wpos_temp, wpos_input,
- ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f));
-
- t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
-}
-
-
-/**
- * Emit the TGSI instructions for inverting the WPOS y coordinate.
+ * Emit the TGSI instructions for inverting and adjusting WPOS.
* This code is unavoidable because it also depends on whether
* a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
*/
static void
-emit_wpos_inversion(struct st_translate *t,
- const struct gl_program *program,
- bool invert)
+emit_wpos_adjustment( struct st_translate *t,
+ const struct gl_program *program,
+ boolean invert,
+ GLfloat adjX, GLfloat adjY[2])
{
struct ureg_program *ureg = t->ureg;
unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
wposTransformState);
- struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst);
- struct ureg_dst wpos_temp;
+ struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
+ struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
- /* MOV wpos_temp, input[wpos]
- */
- if (wpos_input.File == TGSI_FILE_TEMPORARY)
- wpos_temp = ureg_dst(wpos_input);
- else {
- wpos_temp = ureg_DECL_temporary(ureg);
- ureg_MOV(ureg, wpos_temp, wpos_input);
+ /* First, apply the coordinate shift: */
+ if (adjX || adjY[0] || adjY[1]) {
+ if (adjY[0] != adjY[1]) {
+ /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
+ * depending on whether inversion is actually going to be applied
+ * or not, which is determined by testing against the inversion
+ * state variable used below, which will be either +1 or -1.
+ */
+ struct ureg_dst adj_temp = ureg_DECL_temporary(ureg);
+
+ ureg_CMP(ureg, adj_temp,
+ ureg_scalar(wpostrans, invert ? 2 : 0),
+ ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
+ ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
+ ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
+ } else {
+ ureg_ADD(ureg, wpos_temp, wpos_input,
+ ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
+ }
+ wpos_input = ureg_src(wpos_temp);
+ } else {
+ /* MOV wpos_temp, input[wpos]
+ */
+ ureg_MOV( ureg, wpos_temp, wpos_input );
}
+ /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
+ * inversion/identity, or the other way around if we're drawing to an FBO.
+ */
if (invert) {
/* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
*/
- ureg_MAD(ureg,
- ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
- wpos_input,
- ureg_scalar(wpostrans, 0),
- ureg_scalar(wpostrans, 1));
+ ureg_MAD( ureg,
+ ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
+ wpos_input,
+ ureg_scalar(wpostrans, 0),
+ ureg_scalar(wpostrans, 1));
} else {
/* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
*/
- ureg_MAD(ureg,
- ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
- wpos_input,
- ureg_scalar(wpostrans, 2),
- ureg_scalar(wpostrans, 3));
+ ureg_MAD( ureg,
+ ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
+ wpos_input,
+ ureg_scalar(wpostrans, 2),
+ ureg_scalar(wpostrans, 3));
}
/* Use wpos_temp as position input from here on:
const struct gl_fragment_program *fp =
(const struct gl_fragment_program *) program;
struct pipe_screen *pscreen = st->pipe->screen;
+ GLfloat adjX = 0.0f;
+ GLfloat adjY[2] = { 0.0f, 0.0f };
boolean invert = FALSE;
+ /* Query the pixel center conventions supported by the pipe driver and set
+ * adjX, adjY to help out if it cannot handle the requested one internally.
+ *
+ * The bias of the y-coordinate depends on whether y-inversion takes place
+ * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are
+ * drawing to an FBO (causes additional inversion), and whether the the pipe
+ * driver origin and the requested origin differ (the latter condition is
+ * stored in the 'invert' variable).
+ *
+ * For height = 100 (i = integer, h = half-integer, l = lower, u = upper):
+ *
+ * center shift only:
+ * i -> h: +0.5
+ * h -> i: -0.5
+ *
+ * inversion only:
+ * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
+ * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
+ * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
+ * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
+ *
+ * inversion and center shift:
+ * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
+ * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
+ * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
+ * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
+ */
if (fp->OriginUpperLeft) {
/* Fragment shader wants origin in upper-left */
if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
if (fp->PixelCenterInteger) {
/* Fragment shader wants pixel center integer */
- if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
+ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
/* the driver supports pixel center integer */
+ adjY[1] = 1.0f;
ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
- else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
+ }
+ else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
/* the driver supports pixel center half integer, need to bias X,Y */
- emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f);
+ adjX = -0.5f;
+ adjY[0] = -0.5f;
+ adjY[1] = 0.5f;
+ }
else
assert(0);
}
}
else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
/* the driver supports pixel center integer, need to bias X,Y */
+ adjX = adjY[0] = adjY[1] = 0.5f;
ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
- emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f);
}
else
assert(0);
/* we invert after adjustment so that we avoid the MOV to temporary,
* and reuse the adjustment ADD instead */
- emit_wpos_inversion(t, program, invert);
+ emit_wpos_adjustment(t, program, invert, adjX, adjY);
}
/**
const GLuint outputMapping[],
const ubyte outputSemanticName[],
const ubyte outputSemanticIndex[],
- boolean passthrough_edgeflags)
+ boolean passthrough_edgeflags,
+ boolean clamp_color)
{
- struct st_translate translate, *t;
+ struct st_translate *t;
unsigned i;
enum pipe_error ret = PIPE_OK;
assert(numInputs <= Elements(t->inputs));
assert(numOutputs <= Elements(t->outputs));
- t = &translate;
+ t = CALLOC_STRUCT(st_translate);
+ if (!t) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
+ goto out;
+ }
+
memset(t, 0, sizeof *t);
t->procType = procType;
t->inputMapping = inputMapping;
t->outputMapping = outputMapping;
t->ureg = ureg;
- t->pointSizeOutIndex = -1;
- t->prevInstWrotePointSize = GL_FALSE;
+
+ if (program->shader_program) {
+ for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) {
+ struct gl_uniform_storage *const storage =
+ &program->shader_program->UniformStorage[i];
+
+ _mesa_uniform_detach_all_driver_storage(storage);
+ }
+ }
/*
* Declare input attributes.
break;
default:
assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
- return PIPE_ERROR_BAD_INPUT;
+ ret = PIPE_ERROR_BAD_INPUT;
+ goto out;
}
}
}
}
for (i = 0; i < numOutputs; i++) {
- t->outputs[i] = ureg_DECL_output(ureg,
- outputSemanticName[i],
- outputSemanticIndex[i]);
- if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
- /* Writing to the point size result register requires special
- * handling to implement clamping.
- */
- static const gl_state_index pointSizeClampState[STATE_LENGTH]
- = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
- /* XXX: note we are modifying the incoming shader here! Need to
- * do this before emitting the constant decls below, or this
- * will be missed.
- */
- unsigned pointSizeClampConst =
- _mesa_add_state_reference(proginfo->Parameters,
- pointSizeClampState);
- struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg);
- t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst);
- t->pointSizeResult = t->outputs[i];
- t->pointSizeOutIndex = i;
- t->outputs[i] = psizregtemp;
+ if (outputSemanticName[i] == TGSI_SEMANTIC_CLIPDIST) {
+ int mask = ((1 << (program->num_clip_distances - 4*outputSemanticIndex[i])) - 1) & TGSI_WRITEMASK_XYZW;
+ t->outputs[i] = ureg_DECL_output_masked(ureg,
+ outputSemanticName[i],
+ outputSemanticIndex[i],
+ mask);
+ } else {
+ t->outputs[i] = ureg_DECL_output(ureg,
+ outputSemanticName[i],
+ outputSemanticIndex[i]);
}
}
if (passthrough_edgeflags)
i = 0;
foreach_iter(exec_list_iterator, iter, program->immediates) {
immediate_storage *imm = (immediate_storage *)iter.get();
+ assert(i < program->num_immediates);
t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
}
+ assert(i == program->num_immediates);
/* texture samplers */
for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
*/
foreach_iter(exec_list_iterator, iter, program->instructions) {
set_insn_start(t, ureg_get_instruction_number(ureg));
- compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get());
-
- if (t->prevInstWrotePointSize && proginfo->Id) {
- /* The previous instruction wrote to the (fake) vertex point size
- * result register. Now we need to clamp that value to the min/max
- * point size range, putting the result into the real point size
- * register.
- * Note that we can't do this easily at the end of program due to
- * possible early return.
- */
- set_insn_start(t, ureg_get_instruction_number(ureg));
- ureg_MAX(t->ureg,
- ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
- ureg_src(t->outputs[t->pointSizeOutIndex]),
- ureg_swizzle(t->pointSizeConst, 1,1,1,1));
- ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
- ureg_src(t->outputs[t->pointSizeOutIndex]),
- ureg_swizzle(t->pointSizeConst, 2,2,2,2));
- }
- t->prevInstWrotePointSize = GL_FALSE;
+ compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get(),
+ clamp_color);
}
/* Fix up all emitted labels:
t->insn[t->labels[i].branch_target]);
}
+ if (program->shader_program) {
+ /* This has to be done last. Any operation the can cause
+ * prog->ParameterValues to get reallocated (e.g., anything that adds a
+ * program constant) has to happen before creating this linkage.
+ */
+ for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
+ if (program->shader_program->_LinkedShaders[i] == NULL)
+ continue;
+
+ _mesa_associate_uniform_storage(ctx, program->shader_program,
+ program->shader_program->_LinkedShaders[i]->Program->Parameters);
+ }
+ }
+
out:
- FREE(t->insn);
- FREE(t->labels);
- FREE(t->constants);
- FREE(t->immediates);
+ if (t) {
+ FREE(t->insn);
+ FREE(t->labels);
+ FREE(t->constants);
+ FREE(t->immediates);
+
+ if (t->error) {
+ debug_printf("%s: translate error flag set\n", __FUNCTION__);
+ }
- if (t->error) {
- debug_printf("%s: translate error flag set\n", __FUNCTION__);
+ FREE(t);
}
return ret;
static struct gl_program *
get_mesa_program(struct gl_context *ctx,
struct gl_shader_program *shader_program,
- struct gl_shader *shader)
+ struct gl_shader *shader,
+ int num_clip_distances)
{
glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
struct gl_program *prog;
v->options = options;
v->glsl_version = ctx->Const.GLSLVersion;
v->native_integers = ctx->Const.NativeIntegers;
+ v->num_clip_distances = num_clip_distances;
+
+ _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
+ prog->Parameters);
- add_uniforms_to_parameters_list(shader_program, shader, prog);
+ /* Remove reads from output registers. */
+ lower_output_reads(shader->ir);
/* Emit intermediate IR for main(). */
visit_exec_list(shader->ir, v);
}
#endif
- /* Remove reads to output registers, and to varyings in vertex shaders. */
- v->remove_output_reads(PROGRAM_OUTPUT);
- if (target == GL_VERTEX_PROGRAM_ARB)
- v->remove_output_reads(PROGRAM_VARYING);
-
/* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
v->simplify_cmp();
v->copy_propagate();
_mesa_print_ir(shader->ir, NULL);
printf("\n");
printf("\n");
+ fflush(stdout);
}
prog->Instructions = NULL;
prog->NumInstructions = 0;
- do_set_program_inouts(shader->ir, prog);
+ do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER);
count_resources(v, prog);
- check_resources(ctx, shader_program, v, prog);
-
_mesa_reference_program(ctx, &shader->Program, prog);
+ /* This has to be done last. Any operation the can cause
+ * prog->ParameterValues to get reallocated (e.g., anything that adds a
+ * program constant) has to happen before creating this linkage.
+ */
+ _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
+ if (!shader_program->LinkStatus) {
+ return NULL;
+ }
+
struct st_vertex_program *stvp;
struct st_fragment_program *stfp;
struct st_geometry_program *stgp;
return prog;
}
+/**
+ * Searches through the IR for a declaration of gl_ClipDistance and returns the
+ * declared size of the gl_ClipDistance array. Returns 0 if gl_ClipDistance is
+ * not declared in the IR.
+ */
+int get_clip_distance_size(exec_list *ir)
+{
+ foreach_iter (exec_list_iterator, iter, *ir) {
+ ir_instruction *inst = (ir_instruction *)iter.get();
+ ir_variable *var = inst->as_variable();
+ if (var == NULL) continue;
+ if (!strcmp(var->name, "gl_ClipDistance")) {
+ return var->type->length;
+ }
+ }
+
+ return 0;
+}
+
extern "C" {
struct gl_shader *
GLboolean
st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
{
+ int num_clip_distances[MESA_SHADER_TYPES];
assert(prog->LinkStatus);
for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
const struct gl_shader_compiler_options *options =
&ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
+ /* We have to determine the length of the gl_ClipDistance array before
+ * the array is lowered to two vec4s by lower_clip_distance().
+ */
+ num_clip_distances[i] = get_clip_distance_size(ir);
+
do {
+ unsigned what_to_lower = MOD_TO_FRACT | DIV_TO_MUL_RCP |
+ EXP_TO_EXP2 | LOG_TO_LOG2;
+ if (options->EmitNoPow)
+ what_to_lower |= POW_TO_EXP2;
+ if (!ctx->Const.NativeIntegers)
+ what_to_lower |= INT_DIV_TO_MUL_RCP;
+
progress = false;
/* Lowering */
do_mat_op_to_vec(ir);
- lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
- | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
- | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
+ lower_instructions(ir, what_to_lower);
progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
- progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
+ progress = do_common_optimization(ir, true, true,
+ options->MaxUnrollIterations)
+ || progress;
progress = lower_quadop_vector(ir, false) || progress;
+ progress = lower_clip_distance(ir) || progress;
if (options->MaxIfDepth == 0)
progress = lower_discard(ir) || progress;
if (prog->_LinkedShaders[i] == NULL)
continue;
- linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
+ linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i],
+ num_clip_distances[i]);
if (linked_prog) {
- bool ok = true;
-
- switch (prog->_LinkedShaders[i]->Type) {
- case GL_VERTEX_SHADER:
- _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
- linked_prog);
- ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
- linked_prog);
- if (!ok) {
- _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
- }
- break;
- case GL_FRAGMENT_SHADER:
- _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
- (struct gl_fragment_program *)linked_prog);
- ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
- linked_prog);
- if (!ok) {
- _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
- }
- break;
- case GL_GEOMETRY_SHADER:
- _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
- linked_prog);
- ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
- linked_prog);
- if (!ok) {
- _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
- }
- break;
- }
- if (!ok) {
- _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, NULL);
+ static const GLenum targets[] = {
+ GL_VERTEX_PROGRAM_ARB,
+ GL_FRAGMENT_PROGRAM_ARB,
+ GL_GEOMETRY_PROGRAM_NV
+ };
+
+ _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
+ linked_prog);
+ if (!ctx->Driver.ProgramStringNotify(ctx, targets[i], linked_prog)) {
+ _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
+ NULL);
_mesa_reference_program(ctx, &linked_prog, NULL);
return GL_FALSE;
}
return GL_TRUE;
}
+void
+st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
+ const GLuint outputMapping[],
+ struct pipe_stream_output_info *so)
+{
+ unsigned i;
+ struct gl_transform_feedback_info *info =
+ &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
+
+ for (i = 0; i < info->NumOutputs; i++) {
+ so->output[i].register_index =
+ outputMapping[info->Outputs[i].OutputRegister];
+ so->output[i].start_component = info->Outputs[i].ComponentOffset;
+ so->output[i].num_components = info->Outputs[i].NumComponents;
+ so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
+ so->output[i].dst_offset = info->Outputs[i].DstOffset;
+ }
+
+ for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
+ so->stride[i] = info->BufferStride[i];
+ }
+ so->num_outputs = info->NumOutputs;
+}
+
} /* extern "C" */