#include "main/compiler.h"
#include "ir.h"
#include "ir_visitor.h"
-#include "ir_print_visitor.h"
#include "ir_expression_flattening.h"
#include "glsl_types.h"
#include "glsl_parser_extras.h"
*/
#define MAX_TEMPS 4096
+/**
+ * Maximum number of arrays
+ */
+#define MAX_ARRAYS 256
+
/* will be 4 for GLSL 4.00 */
#define MAX_GLSL_TEXTURE_OFFSET 1
explicit st_src_reg(st_dst_reg reg);
gl_register_file file; /**< PROGRAM_* from Mesa */
- int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
+ int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
int index2D;
GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
int negate; /**< NEGATE_XYZW mask from mesa */
explicit st_dst_reg(st_src_reg reg);
gl_register_file file; /**< PROGRAM_* from Mesa */
- int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
+ int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
GLuint cond_mask:4;
int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
int next_temp;
+ unsigned array_sizes[MAX_ARRAYS];
+ unsigned next_array;
+
int num_address_regs;
int samplers_used;
- bool indirect_addr_temps;
bool indirect_addr_consts;
int glsl_version;
/* Update indirect addressing status used by TGSI */
if (dst.reladdr) {
switch(dst.file) {
- case PROGRAM_TEMPORARY:
- this->indirect_addr_temps = true;
- break;
case PROGRAM_LOCAL_PARAM:
case PROGRAM_ENV_PARAM:
case PROGRAM_STATE_VAR:
for (i=0; i<3; i++) {
if(inst->src[i].reladdr) {
switch(inst->src[i].file) {
- case PROGRAM_TEMPORARY:
- this->indirect_addr_temps = true;
- break;
case PROGRAM_LOCAL_PARAM:
case PROGRAM_ENV_PARAM:
case PROGRAM_STATE_VAR:
st_src_reg src;
src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
- src.file = PROGRAM_TEMPORARY;
- src.index = next_temp;
src.reladdr = NULL;
- next_temp += type_size(type);
+ src.negate = 0;
+
+ if (!options->EmitNoIndirectTemp &&
+ (type->is_array() || type->is_matrix())) {
+
+ src.file = PROGRAM_ARRAY;
+ src.index = next_array << 16 | 0x8000;
+ array_sizes[next_array] = type_size(type);
+ ++next_array;
+
+ } else {
+ src.file = PROGRAM_TEMPORARY;
+ src.index = next_temp;
+ next_temp += type_size(type);
+ }
if (type->is_array() || type->is_record()) {
src.swizzle = SWIZZLE_NOOP;
} else {
src.swizzle = swizzle_for_size(type->vector_elements);
}
- src.negate = 0;
return src;
}
*/
assert((int) ir->num_state_slots == type_size(ir->type));
- storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
- this->next_temp);
- this->variables.push_tail(storage);
- this->next_temp += type_size(ir->type);
+ dst = st_dst_reg(get_temp(ir->type));
+
+ storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index);
- dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
- native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
+ this->variables.push_tail(storage);
}
this->result.file = PROGRAM_UNDEFINED;
ir->operands[operand]->accept(this);
if (this->result.file == PROGRAM_UNDEFINED) {
- ir_print_visitor v;
printf("Failed to get tree for expression operand:\n");
- ir->operands[operand]->accept(&v);
+ ir->operands[operand]->print();
+ printf("\n");
exit(1);
}
op[operand] = this->result;
if (ir->type->base_type == GLSL_TYPE_BOOL) {
emit(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0));
- result_src.negate = 1;
- emit(ir, TGSI_OPCODE_UCMP, result_dst, result_src, st_src_reg_for_int(~0), st_src_reg_for_int(0));
} else {
emit(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
}
case ir_unop_unpack_snorm_4x8:
case ir_unop_unpack_unorm_4x8:
case ir_binop_pack_half_2x16_split:
+ case ir_unop_bitfield_reverse:
+ case ir_unop_bit_count:
+ case ir_unop_find_msb:
+ case ir_unop_find_lsb:
+ case ir_binop_bfm:
+ case ir_triop_bfi:
+ case ir_triop_bitfield_extract:
+ case ir_quadop_bitfield_insert:
case ir_quadop_vector:
+ case ir_binop_vector_extract:
+ case ir_triop_vector_insert:
/* This operation is not supported, or should have already been handled.
*/
assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
break;
case ir_var_auto:
case ir_var_temporary:
- entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
- this->next_temp);
+ st_src_reg src = get_temp(var->type);
+
+ entry = new(mem_ctx) variable_storage(var, src.file, src.index);
this->variables.push_tail(entry);
- next_temp += type_size(var->type);
break;
}
if (native_integers) {
/* This is necessary because TGSI's CMP instruction expects the
* condition to be a float, and we store booleans as integers.
- * If TGSI had a UCMP instruction or similar, this extra
- * instruction would not be necessary.
+ * TODO: really want to avoid i2f path and use UCMP. Requires
+ * changes to process_move_condition though too.
*/
condition_temp = get_temp(glsl_type::vec4_type);
condition.negate = 0;
storage = find_variable_storage(param);
assert(!storage);
- storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
- this->next_temp);
- this->variables.push_tail(storage);
+ st_src_reg src = get_temp(param->type);
- this->next_temp += type_size(param->type);
+ storage = new(mem_ctx) variable_storage(param, src.file, src.index);
+ this->variables.push_tail(storage);
}
if (!sig->return_type->is_void()) {
void
glsl_to_tgsi_visitor::visit(ir_texture *ir)
{
- st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy, offset;
+ st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy, offset, sample_index;
st_dst_reg result_dst, coord_dst, cube_sc_dst;
glsl_to_tgsi_instruction *inst = NULL;
unsigned opcode = TGSI_OPCODE_NOP;
*/
coord = get_temp(glsl_type::vec4_type);
coord_dst = st_dst_reg(coord);
+ coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1;
emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
}
}
break;
case ir_txf_ms:
- assert(!"Unexpected ir_txf_ms opcode");
+ opcode = TGSI_OPCODE_TXF;
+ ir->lod_info.sample_index->accept(this);
+ sample_index = this->result;
+ break;
+ case ir_lod:
+ assert(!"Unexpected ir_lod opcode");
break;
}
}
}
- if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
+ if (ir->op == ir_txf_ms) {
+ coord_dst.writemask = WRITEMASK_W;
+ emit(ir, TGSI_OPCODE_MOV, coord_dst, sample_index);
+ coord_dst.writemask = WRITEMASK_XYZW;
+ } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
opcode == TGSI_OPCODE_TXF) {
/* TGSI stores LOD or LOD bias in the last channel of the coords. */
coord_dst.writemask = WRITEMASK_W;
case GLSL_SAMPLER_DIM_EXTERNAL:
inst->tex_target = TEXTURE_EXTERNAL_INDEX;
break;
+ case GLSL_SAMPLER_DIM_MS:
+ inst->tex_target = (sampler_type->sampler_array)
+ ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
+ break;
default:
assert(!"Should not get here.");
}
void
glsl_to_tgsi_visitor::visit(ir_if *ir)
{
+ unsigned if_opcode;
glsl_to_tgsi_instruction *if_inst;
ir->condition->accept(this);
assert(this->result.file != PROGRAM_UNDEFINED);
- if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result);
+ if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF;
+
+ if_inst = emit(ir->condition, if_opcode, undef_dst, this->result);
this->instructions.push_tail(if_inst);
{
result.file = PROGRAM_UNDEFINED;
next_temp = 1;
+ next_array = 0;
next_signature_id = 1;
num_immediates = 0;
current_function = NULL;
num_address_regs = 0;
samplers_used = 0;
- indirect_addr_temps = false;
indirect_addr_consts = false;
glsl_version = 0;
native_integers = false;
"Couldn't find uniform for initializer %s\n", name);
return;
}
- int loc = _mesa_uniform_merge_location_offset(index, offset);
+ int loc = _mesa_uniform_merge_location_offset(shader_program, index, offset);
for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
ir_constant *element;
assert(inst->dst.index < MAX_TEMPS);
prevWriteMask = tempWrites[inst->dst.index];
tempWrites[inst->dst.index] |= inst->dst.writemask;
- }
+ } else
+ continue;
/* For a CMP to be considered a conditional write, the destination
* register and source register two must be the same. */
break;
case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_UIF:
++level;
break;
/* If this is a copy, add it to the ACP. */
if (inst->op == TGSI_OPCODE_MOV &&
inst->dst.file == PROGRAM_TEMPORARY &&
+ !(inst->dst.file == inst->src[0].file &&
+ inst->dst.index == inst->src[0].index) &&
!inst->dst.reladdr &&
!inst->saturate &&
!inst->src[0].reladdr &&
break;
case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_UIF:
++level;
/* fallthrough to default case to mark the condition as read */
v->next_temp = original->next_temp;
v->num_address_regs = original->num_address_regs;
v->samplers_used = prog->SamplersUsed = original->samplers_used;
- v->indirect_addr_temps = original->indirect_addr_temps;
v->indirect_addr_consts = original->indirect_addr_consts;
memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
v->num_immediates = original->num_immediates;
* Get initial pixel color from the texture.
* TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
*/
- coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
+ coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
src0 = v->get_temp(glsl_type::vec4_type);
dst0 = st_dst_reg(src0);
inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
inst->sampler = 0;
inst->tex_target = TEXTURE_2D_INDEX;
- prog->InputsRead |= FRAG_BIT_TEX0;
+ prog->InputsRead |= VARYING_BIT_TEX0;
prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
v->samplers_used |= (1 << 0);
for (int i=0; i<3; i++) {
src_regs[i] = inst->src[i];
if (src_regs[i].file == PROGRAM_INPUT &&
- src_regs[i].index == FRAG_ATTRIB_COL0)
+ src_regs[i].index == VARYING_SLOT_COL0)
{
src_regs[i].file = PROGRAM_TEMPORARY;
src_regs[i].index = src0.index;
v->next_temp = original->next_temp;
v->num_address_regs = original->num_address_regs;
v->samplers_used = prog->SamplersUsed = original->samplers_used;
- v->indirect_addr_temps = original->indirect_addr_temps;
v->indirect_addr_consts = original->indirect_addr_consts;
memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
v->num_immediates = original->num_immediates;
/* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
- coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
+ coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
src0 = v->get_temp(glsl_type::vec4_type);
dst0 = st_dst_reg(src0);
inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
inst->sampler = samplerIndex;
inst->tex_target = TEXTURE_2D_INDEX;
- prog->InputsRead |= FRAG_BIT_TEX0;
+ prog->InputsRead |= VARYING_BIT_TEX0;
prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
v->samplers_used |= (1 << samplerIndex);
struct ureg_program *ureg;
struct ureg_dst temps[MAX_TEMPS];
+ struct ureg_dst arrays[MAX_ARRAYS];
struct ureg_src *constants;
struct ureg_src *immediates;
struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
struct ureg_src samplers[PIPE_MAX_SAMPLERS];
struct ureg_src systemValues[SYSTEM_VALUE_MAX];
+ unsigned array_sizes[MAX_ARRAYS];
+
const GLuint *inputMapping;
const GLuint *outputMapping;
gl_register_file file,
GLuint index)
{
+ unsigned array;
+
switch(file) {
case PROGRAM_UNDEFINED:
return ureg_dst_undef();
case PROGRAM_TEMPORARY:
+ assert(index >= 0);
+ assert(index < (int) Elements(t->temps));
+
if (ureg_dst_is_undef(t->temps[index]))
t->temps[index] = ureg_DECL_local_temporary(t->ureg);
return t->temps[index];
+ case PROGRAM_ARRAY:
+ array = index >> 16;
+
+ assert(array >= 0);
+ assert(array < (int) Elements(t->arrays));
+
+ if (ureg_dst_is_undef(t->arrays[array]))
+ t->arrays[array] = ureg_DECL_array_temporary(
+ t->ureg, t->array_sizes[array], TRUE);
+
+ return ureg_dst_array_offset(t->arrays[array],
+ (int)(index & 0xFFFF) - 0x8000);
+
case PROGRAM_OUTPUT:
if (t->procType == TGSI_PROCESSOR_VERTEX)
assert(index < VARYING_SLOT_MAX);
return ureg_src_undef();
case PROGRAM_TEMPORARY:
- assert(index >= 0);
- assert(index < (int) Elements(t->temps));
- if (ureg_dst_is_undef(t->temps[index]))
- t->temps[index] = ureg_DECL_local_temporary(t->ureg);
- return ureg_src(t->temps[index]);
+ case PROGRAM_ARRAY:
+ return ureg_src(dst_register(t, file, index));
case PROGRAM_ENV_PARAM:
case PROGRAM_LOCAL_PARAM:
}
}
- if (dst_reg->reladdr != NULL)
+ if (dst_reg->reladdr != NULL) {
+ assert(dst_reg->file != PROGRAM_TEMPORARY);
dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
+ }
return dst;
}
src = ureg_negate(src);
if (src_reg->reladdr != NULL) {
- /* Normally ureg_src_indirect() would be used here, but a stupid compiler
- * bug in g++ makes ureg_src_indirect (an inline C function) erroneously
- * set the bit for src.Negate. So we have to do the operation manually
- * here to work around the compiler's problems. */
- /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/
- struct ureg_src addr = ureg_src(t->address[0]);
- src.Indirect = 1;
- src.IndirectFile = addr.File;
- src.IndirectIndex = addr.Index;
- src.IndirectSwizzle = addr.SwizzleX;
-
- if (src_reg->file != PROGRAM_INPUT &&
- src_reg->file != PROGRAM_OUTPUT) {
- /* If src_reg->index was negative, it was set to zero in
- * src_register(). Reassign it now. But don't do this
- * for input/output regs since they get remapped while
- * const buffers don't.
- */
- src.Index = src_reg->index;
- }
+ assert(src_reg->file != PROGRAM_TEMPORARY);
+ src = ureg_src_indirect(src, ureg_src(t->address[0]));
}
return src;
case TGSI_OPCODE_ELSE:
case TGSI_OPCODE_ENDLOOP:
case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_UIF:
assert(num_dst == 0);
ureg_label_insn(ureg,
inst->op,
struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
- struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
+ struct ureg_src wpos_input = t->inputs[t->inputMapping[VARYING_SLOT_POS]];
/* First, apply the coordinate shift: */
if (adjX || adjY[0] || adjY[1]) {
/* Use wpos_temp as position input from here on:
*/
- t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
+ t->inputs[t->inputMapping[VARYING_SLOT_POS]] = ureg_src(wpos_temp);
}
{
struct ureg_program *ureg = t->ureg;
struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
- struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
+ struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]];
/* MOV_SAT face_temp, input[face] */
face_temp = ureg_saturate(face_temp);
ureg_MOV(ureg, face_temp, face_input);
/* Use face_temp as face input from here on: */
- t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
+ t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
}
static void
is_centroid[i]);
}
- if (proginfo->InputsRead & FRAG_BIT_WPOS) {
+ if (proginfo->InputsRead & VARYING_BIT_POS) {
/* Must do this after setting up t->inputs, and before
* emitting constant references, below:
*/
emit_wpos(st_context(ctx), t, proginfo, ureg);
}
- if (proginfo->InputsRead & FRAG_BIT_FACE)
+ if (proginfo->InputsRead & VARYING_BIT_FACE)
emit_face_var(t);
/*
}
}
- if (program->indirect_addr_temps) {
- /* If temps are accessed with indirect addressing, declare temporaries
- * in sequential order. Else, we declare them on demand elsewhere.
- * (Note: the number of temporaries is equal to program->next_temp)
- */
- for (i = 0; i < (unsigned)program->next_temp; i++) {
- /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
- t->temps[i] = ureg_DECL_local_temporary(t->ureg);
- }
- }
+ /* Copy over array sizes
+ */
+ memcpy(t->array_sizes, program->array_sizes, sizeof(unsigned) * program->next_array);
/* Emit constants and uniforms. TGSI uses a single index space for these,
* so we put all the translated regs in t->constants.
assert(i == program->num_immediates);
/* texture samplers */
- for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+ for (i = 0; i < ctx->Const.FragmentProgram.MaxTextureImageUnits; i++) {
if (program->samplers_used & (1 << i)) {
t->samplers[i] = ureg_DECL_sampler(ureg, i);
}
v->copy_propagate();
while (v->eliminate_dead_code_advanced());
- /* FIXME: These passes to optimize temporary registers don't work when there
- * is indirect addressing of the temporary register space. We need proper
- * array support so that we don't have to give up these passes in every
- * shader that uses arrays.
- */
- if (!v->indirect_addr_temps) {
- v->eliminate_dead_code();
- v->merge_registers();
- v->renumber_registers();
- }
+ v->eliminate_dead_code();
+ v->merge_registers();
+ v->renumber_registers();
/* Write the END instruction. */
v->emit(NULL, TGSI_OPCODE_END);
lower_ubo_reference(prog->_LinkedShaders[i], ir);
do_vec_index_to_cond_assign(ir);
+ lower_vector_insert(ir, true);
lower_quadop_vector(ir, false);
lower_noise(ir);
if (options->MaxIfDepth == 0) {
progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
progress = do_common_optimization(ir, true, true,
- options->MaxUnrollIterations)
+ options->MaxUnrollIterations, options)
|| progress;
progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;