#include "main/compiler.h"
#include "ir.h"
#include "ir_visitor.h"
-#include "ir_print_visitor.h"
#include "ir_expression_flattening.h"
#include "glsl_types.h"
#include "glsl_parser_extras.h"
#include "main/mtypes.h"
#include "main/shaderobj.h"
+#include "main/uniforms.h"
#include "program/hash_table.h"
extern "C" {
#include "main/shaderapi.h"
-#include "main/uniforms.h"
#include "program/prog_instruction.h"
#include "program/prog_optimize.h"
#include "program/prog_print.h"
}
#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
-#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \
- (1 << PROGRAM_ENV_PARAM) | \
- (1 << PROGRAM_STATE_VAR) | \
+#define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \
(1 << PROGRAM_CONSTANT) | \
(1 << PROGRAM_UNIFORM))
*/
#define MAX_TEMPS 4096
-/* will be 4 for GLSL 4.00 */
-#define MAX_GLSL_TEXTURE_OFFSET 1
+/**
+ * Maximum number of arrays
+ */
+#define MAX_ARRAYS 256
+
+#define MAX_GLSL_TEXTURE_OFFSET 4
class st_src_reg;
class st_dst_reg;
this->index2D = 0;
this->type = type ? type->base_type : GLSL_TYPE_ERROR;
this->reladdr = NULL;
+ this->reladdr2 = NULL;
+ this->has_index2 = false;
}
st_src_reg(gl_register_file file, int index, int type)
this->swizzle = SWIZZLE_XYZW;
this->negate = 0;
this->reladdr = NULL;
+ this->reladdr2 = NULL;
+ this->has_index2 = false;
}
st_src_reg(gl_register_file file, int index, int type, int index2D)
this->swizzle = SWIZZLE_XYZW;
this->negate = 0;
this->reladdr = NULL;
+ this->reladdr2 = NULL;
+ this->has_index2 = false;
}
st_src_reg()
this->swizzle = 0;
this->negate = 0;
this->reladdr = NULL;
+ this->reladdr2 = NULL;
+ this->has_index2 = false;
}
explicit st_src_reg(st_dst_reg reg);
gl_register_file file; /**< PROGRAM_* from Mesa */
- int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
+ int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
int index2D;
GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
int negate; /**< NEGATE_XYZW mask from mesa */
int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
/** Register index should be offset by the integer in this reg. */
st_src_reg *reladdr;
+ st_src_reg *reladdr2;
+ bool has_index2;
};
class st_dst_reg {
public:
+ st_dst_reg(gl_register_file file, int writemask, int type, int index)
+ {
+ this->file = file;
+ this->index = index;
+ this->writemask = writemask;
+ this->cond_mask = COND_TR;
+ this->reladdr = NULL;
+ this->type = type;
+ }
+
st_dst_reg(gl_register_file file, int writemask, int type)
{
this->file = file;
explicit st_dst_reg(st_src_reg reg);
gl_register_file file; /**< PROGRAM_* from Mesa */
- int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
+ int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
GLuint cond_mask:4;
int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
this->negate = 0;
this->reladdr = reg.reladdr;
this->index2D = 0;
+ this->reladdr2 = NULL;
+ this->has_index2 = false;
}
st_dst_reg::st_dst_reg(st_src_reg reg)
class glsl_to_tgsi_instruction : public exec_node {
public:
- /* Callers of this ralloc-based new need not call delete. It's
- * easier to just ralloc_free 'ctx' (or any of its ancestors). */
- static void* operator new(size_t size, void *ctx)
- {
- void *node;
-
- node = rzalloc_size(ctx, size);
- assert(node != NULL);
-
- return node;
- }
+ DECLARE_RALLOC_CXX_OPERATORS(glsl_to_tgsi_instruction)
unsigned op;
st_dst_reg dst;
- st_src_reg src[3];
+ st_src_reg src[4];
/** Pointer to the ir source this tree came from for debugging */
ir_instruction *ir;
GLboolean cond_update;
int sampler; /**< sampler index */
int tex_target; /**< One of TEXTURE_*_INDEX */
GLboolean tex_shadow;
- struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
+
+ st_src_reg tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
unsigned tex_offset_num_offset;
int dead_mask; /**< Used in dead code elimination */
int next_temp;
+ unsigned array_sizes[MAX_ARRAYS];
+ unsigned next_array;
+
int num_address_regs;
int samplers_used;
- bool indirect_addr_temps;
bool indirect_addr_consts;
int glsl_version;
bool native_integers;
+ bool have_sqrt;
variable_storage *find_variable_storage(ir_variable *var);
virtual void visit(ir_discard *);
virtual void visit(ir_texture *);
virtual void visit(ir_if *);
+ virtual void visit(ir_emit_vertex *);
+ virtual void visit(ir_end_primitive *);
/*@}*/
st_src_reg result;
glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
st_dst_reg dst,
st_src_reg src0, st_src_reg src1, st_src_reg src2);
-
+
+ glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
+ st_dst_reg dst,
+ st_src_reg src0, st_src_reg src1,
+ st_src_reg src2, st_src_reg src3);
+
unsigned get_opcode(ir_instruction *ir, unsigned op,
st_dst_reg dst,
st_src_reg src0, st_src_reg src1);
void emit_scalar(ir_instruction *ir, unsigned op,
st_dst_reg dst, st_src_reg src0, st_src_reg src1);
- void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst);
-
void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
void emit_scs(ir_instruction *ir, unsigned op,
int get_last_temp_write(int index);
void copy_propagate(void);
- void eliminate_dead_code(void);
- int eliminate_dead_code_advanced(void);
+ int eliminate_dead_code(void);
void merge_registers(void);
void renumber_registers(void);
+ void emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
+ st_dst_reg *l, st_src_reg *r);
+
void *mem_ctx;
};
static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
-static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT);
+static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0);
+static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1);
static void
fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
glsl_to_tgsi_instruction *
glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
- st_dst_reg dst,
- st_src_reg src0, st_src_reg src1, st_src_reg src2)
+ st_dst_reg dst,
+ st_src_reg src0, st_src_reg src1,
+ st_src_reg src2, st_src_reg src3)
{
glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
int num_reladdr = 0, i;
* sources into temps.
*/
num_reladdr += dst.reladdr != NULL;
- num_reladdr += src0.reladdr != NULL;
- num_reladdr += src1.reladdr != NULL;
- num_reladdr += src2.reladdr != NULL;
+ num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL;
+ num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL;
+ num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL;
+ num_reladdr += src3.reladdr != NULL || src3.reladdr2 != NULL;
+ reladdr_to_temp(ir, &src3, &num_reladdr);
reladdr_to_temp(ir, &src2, &num_reladdr);
reladdr_to_temp(ir, &src1, &num_reladdr);
reladdr_to_temp(ir, &src0, &num_reladdr);
inst->src[0] = src0;
inst->src[1] = src1;
inst->src[2] = src2;
+ inst->src[3] = src3;
inst->ir = ir;
inst->dead_mask = 0;
inst->function = NULL;
- if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL)
- this->num_address_regs = 1;
-
/* Update indirect addressing status used by TGSI */
if (dst.reladdr) {
switch(dst.file) {
- case PROGRAM_TEMPORARY:
- this->indirect_addr_temps = true;
- break;
- case PROGRAM_LOCAL_PARAM:
- case PROGRAM_ENV_PARAM:
case PROGRAM_STATE_VAR:
case PROGRAM_CONSTANT:
case PROGRAM_UNIFORM:
}
}
else {
- for (i=0; i<3; i++) {
+ for (i=0; i<4; i++) {
if(inst->src[i].reladdr) {
switch(inst->src[i].file) {
- case PROGRAM_TEMPORARY:
- this->indirect_addr_temps = true;
- break;
- case PROGRAM_LOCAL_PARAM:
- case PROGRAM_ENV_PARAM:
case PROGRAM_STATE_VAR:
case PROGRAM_CONSTANT:
case PROGRAM_UNIFORM:
this->instructions.push_tail(inst);
- if (native_integers)
- try_emit_float_set(ir, op, dst);
-
return inst;
}
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
+ st_dst_reg dst, st_src_reg src0,
+ st_src_reg src1, st_src_reg src2)
+{
+ return emit(ir, op, dst, src0, src1, src2, undef_src);
+}
glsl_to_tgsi_instruction *
glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
st_dst_reg dst, st_src_reg src0, st_src_reg src1)
{
- return emit(ir, op, dst, src0, src1, undef_src);
+ return emit(ir, op, dst, src0, src1, undef_src, undef_src);
}
glsl_to_tgsi_instruction *
st_dst_reg dst, st_src_reg src0)
{
assert(dst.writemask != 0);
- return emit(ir, op, dst, src0, undef_src, undef_src);
+ return emit(ir, op, dst, src0, undef_src, undef_src, undef_src);
}
glsl_to_tgsi_instruction *
glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
{
- return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
-}
-
- /**
- * Emits the code to convert the result of float SET instructions to integers.
- */
-void
-glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op,
- st_dst_reg dst)
-{
- if ((op == TGSI_OPCODE_SEQ ||
- op == TGSI_OPCODE_SNE ||
- op == TGSI_OPCODE_SGE ||
- op == TGSI_OPCODE_SLT))
- {
- st_src_reg src = st_src_reg(dst);
- src.negate = ~src.negate;
- dst.type = GLSL_TYPE_FLOAT;
- emit(ir, TGSI_OPCODE_F2I, dst, src);
- }
+ return emit(ir, op, undef_dst, undef_src, undef_src, undef_src, undef_src);
}
/**
st_src_reg src0, st_src_reg src1)
{
int type = GLSL_TYPE_FLOAT;
-
+
+ if (op == TGSI_OPCODE_MOV)
+ return op;
+
assert(src0.type != GLSL_TYPE_ARRAY);
assert(src0.type != GLSL_TYPE_STRUCT);
assert(src1.type != GLSL_TYPE_ARRAY);
#define case4(c, f, i, u) \
case TGSI_OPCODE_##c: \
- if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \
- else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \
- else op = TGSI_OPCODE_##f; \
+ if (type == GLSL_TYPE_INT) \
+ op = TGSI_OPCODE_##i; \
+ else if (type == GLSL_TYPE_UINT) \
+ op = TGSI_OPCODE_##u; \
+ else \
+ op = TGSI_OPCODE_##f; \
break;
+
#define case3(f, i, u) case4(f, f, i, u)
#define case2fi(f, i) case4(f, f, i, i)
#define case2iu(i, u) case4(i, LAST, i, u)
-
+
+#define casecomp(c, f, i, u) \
+ case TGSI_OPCODE_##c: \
+ if (type == GLSL_TYPE_INT) \
+ op = TGSI_OPCODE_##i; \
+ else if (type == GLSL_TYPE_UINT) \
+ op = TGSI_OPCODE_##u; \
+ else if (native_integers) \
+ op = TGSI_OPCODE_##f; \
+ else \
+ op = TGSI_OPCODE_##c; \
+ break;
+
switch(op) {
case2fi(ADD, UADD);
case2fi(MUL, UMUL);
case3(MAX, IMAX, UMAX);
case3(MIN, IMIN, UMIN);
case2iu(MOD, UMOD);
-
- case2fi(SEQ, USEQ);
- case2fi(SNE, USNE);
- case3(SGE, ISGE, USGE);
- case3(SLT, ISLT, USLT);
-
+
+ casecomp(SEQ, FSEQ, USEQ, USEQ);
+ casecomp(SNE, FSNE, USNE, USNE);
+ casecomp(SGE, FSGE, ISGE, USGE);
+ casecomp(SLT, FSLT, ISLT, USLT);
+
case2iu(ISHR, USHR);
case2fi(SSG, ISSG);
case3(ABS, IABS, IABS);
-
+
+ case2iu(IBFE, UBFE);
+ case2iu(IMSB, UMSB);
+ case2iu(IMUL_HI, UMUL_HI);
default: break;
}
if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
op = TGSI_OPCODE_UARL;
+ assert(dst.file == PROGRAM_ADDRESS);
+ if (dst.index >= this->num_address_regs)
+ this->num_address_regs = dst.index + 1;
+
emit(NULL, op, dst, src0);
}
/* Search immediate storage to see if we already have an identical
* immediate that we can use instead of adding a duplicate entry.
*/
- foreach_iter(exec_list_iterator, iter, this->immediates) {
- entry = (immediate_storage *)iter.get();
+ foreach_list(node, &this->immediates) {
+ entry = (immediate_storage *) node;
if (entry->size == size &&
entry->type == datatype &&
}
return size;
case GLSL_TYPE_SAMPLER:
+ case GLSL_TYPE_IMAGE:
/* Samplers take up one slot in UNIFORMS[], but they're baked in
* at link time.
*/
return 1;
- default:
- assert(0);
- return 0;
+ case GLSL_TYPE_ATOMIC_UINT:
+ case GLSL_TYPE_INTERFACE:
+ case GLSL_TYPE_VOID:
+ case GLSL_TYPE_ERROR:
+ assert(!"Invalid type in type_size");
+ break;
}
+ return 0;
}
/**
st_src_reg src;
src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
- src.file = PROGRAM_TEMPORARY;
- src.index = next_temp;
src.reladdr = NULL;
- next_temp += type_size(type);
+ src.negate = 0;
+
+ if (!options->EmitNoIndirectTemp &&
+ (type->is_array() || type->is_matrix())) {
+
+ src.file = PROGRAM_ARRAY;
+ src.index = next_array << 16 | 0x8000;
+ array_sizes[next_array] = type_size(type);
+ ++next_array;
+
+ } else {
+ src.file = PROGRAM_TEMPORARY;
+ src.index = next_temp;
+ next_temp += type_size(type);
+ }
if (type->is_array() || type->is_record()) {
src.swizzle = SWIZZLE_NOOP;
} else {
src.swizzle = swizzle_for_size(type->vector_elements);
}
- src.negate = 0;
return src;
}
variable_storage *entry;
- foreach_iter(exec_list_iterator, iter, this->variables) {
- entry = (variable_storage *)iter.get();
+ foreach_list(node, &this->variables) {
+ entry = (variable_storage *) node;
if (entry->var == var)
return entry;
if (strcmp(ir->name, "gl_FragCoord") == 0) {
struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
- fp->OriginUpperLeft = ir->origin_upper_left;
- fp->PixelCenterInteger = ir->pixel_center_integer;
+ fp->OriginUpperLeft = ir->data.origin_upper_left;
+ fp->PixelCenterInteger = ir->data.pixel_center_integer;
}
- if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
+ if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
unsigned int i;
const ir_state_slot *const slots = ir->state_slots;
assert(ir->state_slots != NULL);
*/
assert((int) ir->num_state_slots == type_size(ir->type));
- storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
- this->next_temp);
- this->variables.push_tail(storage);
- this->next_temp += type_size(ir->type);
+ dst = st_dst_reg(get_temp(ir->type));
- dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
- native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
+ storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index);
+
+ this->variables.push_tail(storage);
}
void
glsl_to_tgsi_visitor::visit(ir_loop *ir)
{
- ir_dereference_variable *counter = NULL;
-
- if (ir->counter != NULL)
- counter = new(ir) ir_dereference_variable(ir->counter);
-
- if (ir->from != NULL) {
- assert(ir->counter != NULL);
-
- ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
-
- a->accept(this);
- delete a;
- }
-
emit(NULL, TGSI_OPCODE_BGNLOOP);
- if (ir->to) {
- ir_expression *e =
- new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
- counter, ir->to);
- ir_if *if_stmt = new(ir) ir_if(e);
-
- ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
-
- if_stmt->then_instructions.push_tail(brk);
-
- if_stmt->accept(this);
-
- delete if_stmt;
- delete e;
- delete brk;
- }
-
visit_exec_list(&ir->body_instructions, this);
- if (ir->increment) {
- ir_expression *e =
- new(ir) ir_expression(ir_binop_add, counter->type,
- counter, ir->increment);
-
- ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
-
- a->accept(this);
- delete a;
- delete e;
- }
-
emit(NULL, TGSI_OPCODE_ENDLOOP);
}
const ir_function_signature *sig;
exec_list empty;
- sig = ir->matching_signature(&empty);
+ sig = ir->matching_signature(NULL, &empty);
assert(sig);
- foreach_iter(exec_list_iterator, iter, sig->body) {
- ir_instruction *ir = (ir_instruction *)iter.get();
+ foreach_list(node, &sig->body) {
+ ir_instruction *ir = (ir_instruction *) node;
ir->accept(this);
}
bool
glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
{
- /* Saturates were only introduced to vertex programs in
- * NV_vertex_program3, so don't give them to drivers in the VP.
+ /* Emit saturates in the vertex shader only if SM 3.0 is supported.
*/
- if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
+ if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
+ !st_context(this->ctx)->has_shader_model3) {
return false;
+ }
ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
if (!sat_src)
glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
st_src_reg *reg, int *num_reladdr)
{
- if (!reg->reladdr)
+ if (!reg->reladdr && !reg->reladdr2)
return;
- emit_arl(ir, address_reg, *reg->reladdr);
+ if (reg->reladdr) emit_arl(ir, address_reg, *reg->reladdr);
+ if (reg->reladdr2) emit_arl(ir, address_reg2, *reg->reladdr2);
if (*num_reladdr != 1) {
st_src_reg temp = get_temp(glsl_type::vec4_type);
this->result.file = PROGRAM_UNDEFINED;
ir->operands[operand]->accept(this);
if (this->result.file == PROGRAM_UNDEFINED) {
- ir_print_visitor v;
printf("Failed to get tree for expression operand:\n");
- ir->operands[operand]->accept(&v);
+ ir->operands[operand]->print();
+ printf("\n");
exit(1);
}
op[operand] = this->result;
case ir_unop_any: {
assert(ir->operands[0]->type->is_vector());
- /* After the dot-product, the value will be an integer on the
- * range [0,4]. Zero stays zero, and positive values become 1.0.
- */
- glsl_to_tgsi_instruction *const dp =
- emit_dp(ir, result_dst, op[0], op[0],
- ir->operands[0]->type->vector_elements);
- if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
- result_dst.type == GLSL_TYPE_FLOAT) {
- /* The clamping to [0,1] can be done for free in the fragment
- * shader with a saturate.
- */
- dp->saturate = true;
- } else if (result_dst.type == GLSL_TYPE_FLOAT) {
- /* Negating the result of the dot-product gives values on the range
- * [-4, 0]. Zero stays zero, and negative values become 1.0. This
- * is achieved using SLT.
- */
- st_src_reg slt_src = result_src;
- slt_src.negate = ~slt_src.negate;
- emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
- }
- else {
- /* Use SNE 0 if integers are being used as boolean values. */
- emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+ if (native_integers) {
+ int dst_swizzle = 0, op0_swizzle, i;
+ st_src_reg accum = op[0];
+
+ op0_swizzle = op[0].swizzle;
+ accum.swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 0),
+ GET_SWZ(op0_swizzle, 0),
+ GET_SWZ(op0_swizzle, 0),
+ GET_SWZ(op0_swizzle, 0));
+ for (i = 0; i < 4; i++) {
+ if (result_dst.writemask & (1 << i)) {
+ dst_swizzle = MAKE_SWIZZLE4(i, i, i, i);
+ break;
+ }
+ }
+ assert(i != 4);
+ assert(ir->operands[0]->type->is_boolean());
+
+ /* OR all the components together, since they should be either 0 or ~0
+ */
+ switch (ir->operands[0]->type->vector_elements) {
+ case 4:
+ op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 3),
+ GET_SWZ(op0_swizzle, 3),
+ GET_SWZ(op0_swizzle, 3),
+ GET_SWZ(op0_swizzle, 3));
+ emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
+ accum = st_src_reg(result_dst);
+ accum.swizzle = dst_swizzle;
+ /* fallthrough */
+ case 3:
+ op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 2),
+ GET_SWZ(op0_swizzle, 2),
+ GET_SWZ(op0_swizzle, 2),
+ GET_SWZ(op0_swizzle, 2));
+ emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
+ accum = st_src_reg(result_dst);
+ accum.swizzle = dst_swizzle;
+ /* fallthrough */
+ case 2:
+ op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 1),
+ GET_SWZ(op0_swizzle, 1),
+ GET_SWZ(op0_swizzle, 1),
+ GET_SWZ(op0_swizzle, 1));
+ emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
+ break;
+ default:
+ assert(!"Unexpected vector size");
+ break;
+ }
+ } else {
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero stays zero, and positive values become 1.0.
+ */
+ glsl_to_tgsi_instruction *const dp =
+ emit_dp(ir, result_dst, op[0], op[0],
+ ir->operands[0]->type->vector_elements);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+ result_dst.type == GLSL_TYPE_FLOAT) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate.
+ */
+ dp->saturate = true;
+ } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero stays zero, and negative values become 1.0. This
+ * is achieved using SLT.
+ */
+ st_src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+ }
+ else {
+ /* Use SNE 0 if integers are being used as boolean values. */
+ emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+ }
}
break;
}
break;
case ir_unop_sqrt:
- /* sqrt(x) = x * rsq(x). */
- emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
- emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
- /* For incoming channels <= 0, set the result to 0. */
- op[0].negate = ~op[0].negate;
- emit(ir, TGSI_OPCODE_CMP, result_dst,
- op[0], result_src, st_src_reg_for_float(0.0));
+ if (have_sqrt) {
+ emit_scalar(ir, TGSI_OPCODE_SQRT, result_dst, op[0]);
+ }
+ else {
+ /* sqrt(x) = x * rsq(x). */
+ emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
+ emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
+ /* For incoming channels <= 0, set the result to 0. */
+ op[0].negate = ~op[0].negate;
+ emit(ir, TGSI_OPCODE_CMP, result_dst,
+ op[0], result_src, st_src_reg_for_float(0.0));
+ }
break;
case ir_unop_rsq:
emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
break;
case ir_unop_bitcast_f2i:
+ result_src = op[0];
+ result_src.type = GLSL_TYPE_INT;
+ break;
case ir_unop_bitcast_f2u:
+ result_src = op[0];
+ result_src.type = GLSL_TYPE_UINT;
+ break;
case ir_unop_bitcast_i2f:
case ir_unop_bitcast_u2f:
result_src = op[0];
+ result_src.type = GLSL_TYPE_FLOAT;
break;
case ir_unop_f2b:
emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
case ir_binop_ubo_load: {
ir_constant *uniform_block = ir->operands[0]->as_constant();
+ ir_constant *const_offset_ir = ir->operands[1]->as_constant();
+ unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
st_src_reg index_reg = get_temp(glsl_type::uint_type);
st_src_reg cbuf;
assert(ir->type->is_vector() || ir->type->is_scalar());
- emit(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1], st_src_reg_for_int(4));
+ if (const_offset_ir) {
+ index_reg = st_src_reg_for_int(const_offset / 16);
+ } else {
+ emit(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1], st_src_reg_for_int(4));
+ }
cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
+ cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4,
+ const_offset % 16 / 4,
+ const_offset % 16 / 4,
+ const_offset % 16 / 4);
+
cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
- emit(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
+ if (ir->type->base_type == GLSL_TYPE_BOOL) {
+ emit(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0));
+ } else {
+ emit(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
+ }
break;
}
+ case ir_triop_lrp:
+ /* note: we have to reorder the three args here */
+ emit(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]);
+ break;
+ case ir_triop_csel:
+ if (this->ctx->Const.NativeIntegers)
+ emit(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]);
+ else {
+ op[0].negate = ~op[0].negate;
+ emit(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]);
+ }
+ break;
+ case ir_triop_bitfield_extract:
+ emit(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]);
+ break;
+ case ir_quadop_bitfield_insert:
+ emit(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]);
+ break;
+ case ir_unop_bitfield_reverse:
+ emit(ir, TGSI_OPCODE_BREV, result_dst, op[0]);
+ break;
+ case ir_unop_bit_count:
+ emit(ir, TGSI_OPCODE_POPC, result_dst, op[0]);
+ break;
+ case ir_unop_find_msb:
+ emit(ir, TGSI_OPCODE_IMSB, result_dst, op[0]);
+ break;
+ case ir_unop_find_lsb:
+ emit(ir, TGSI_OPCODE_LSB, result_dst, op[0]);
+ break;
+ case ir_binop_imul_high:
+ emit(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]);
+ break;
+ case ir_triop_fma:
+ /* NOTE: Perhaps there should be a special opcode that enforces fused
+ * mul-add. Just use MAD for now.
+ */
+ emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
+ break;
+ case ir_unop_pack_snorm_2x16:
+ case ir_unop_pack_unorm_2x16:
+ case ir_unop_pack_half_2x16:
+ case ir_unop_pack_snorm_4x8:
+ case ir_unop_pack_unorm_4x8:
+ case ir_unop_unpack_snorm_2x16:
+ case ir_unop_unpack_unorm_2x16:
+ case ir_unop_unpack_half_2x16:
+ case ir_unop_unpack_half_2x16_split_x:
+ case ir_unop_unpack_half_2x16_split_y:
+ case ir_unop_unpack_snorm_4x8:
+ case ir_unop_unpack_unorm_4x8:
+ case ir_binop_pack_half_2x16_split:
+ case ir_binop_bfm:
+ case ir_triop_bfi:
case ir_quadop_vector:
- /* This operation should have already been handled.
+ case ir_binop_vector_extract:
+ case ir_triop_vector_insert:
+ case ir_binop_ldexp:
+ case ir_binop_carry:
+ case ir_binop_borrow:
+ /* This operation is not supported, or should have already been handled.
*/
- assert(!"Should not get here.");
+ assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
break;
}
ir_variable *var = ir->var;
if (!entry) {
- switch (var->mode) {
+ switch (var->data.mode) {
case ir_var_uniform:
entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
- var->location);
+ var->data.location);
this->variables.push_tail(entry);
break;
- case ir_var_in:
- case ir_var_inout:
+ case ir_var_shader_in:
/* The linker assigns locations for varyings and attributes,
* including deprecated builtins (like gl_Color), user-assign
* generic attributes (glBindVertexLocation), and
* user-defined varyings.
- *
- * FINISHME: We would hit this path for function arguments. Fix!
*/
- assert(var->location != -1);
+ assert(var->data.location != -1);
entry = new(mem_ctx) variable_storage(var,
PROGRAM_INPUT,
- var->location);
+ var->data.location);
break;
- case ir_var_out:
- assert(var->location != -1);
+ case ir_var_shader_out:
+ assert(var->data.location != -1);
entry = new(mem_ctx) variable_storage(var,
PROGRAM_OUTPUT,
- var->location + var->index);
+ var->data.location
+ + var->data.index);
break;
case ir_var_system_value:
entry = new(mem_ctx) variable_storage(var,
PROGRAM_SYSTEM_VALUE,
- var->location);
+ var->data.location);
break;
case ir_var_auto:
case ir_var_temporary:
- entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
- this->next_temp);
+ st_src_reg src = get_temp(var->type);
+
+ entry = new(mem_ctx) variable_storage(var, src.file, src.index);
this->variables.push_tail(entry);
- next_temp += type_size(var->type);
break;
}
ir_constant *index;
st_src_reg src;
int element_size = type_size(ir->type);
+ bool is_2D_input;
index = ir->array_index->constant_expression_value();
ir->array->accept(this);
src = this->result;
+ is_2D_input = this->prog->Target == GL_GEOMETRY_PROGRAM_NV &&
+ src.file == PROGRAM_INPUT &&
+ ir->array->ir_type != ir_type_dereference_array;
+
+ if (is_2D_input)
+ element_size = 1;
+
if (index) {
- src.index += index->value.i[0] * element_size;
+ if (is_2D_input) {
+ src.index2D = index->value.i[0];
+ src.has_index2 = true;
+ } else
+ src.index += index->value.i[0] * element_size;
} else {
/* Variable index array dereference. It eats the "vec4" of the
* base of the array and an index that offsets the TGSI register
/* If there was already a relative address register involved, add the
* new and the old together to get the new offset.
*/
- if (src.reladdr != NULL) {
+ if (!is_2D_input && src.reladdr != NULL) {
st_src_reg accum_reg = get_temp(native_integers ?
glsl_type::int_type : glsl_type::float_type);
index_reg = accum_reg;
}
- src.reladdr = ralloc(mem_ctx, st_src_reg);
- memcpy(src.reladdr, &index_reg, sizeof(index_reg));
+ if (is_2D_input) {
+ src.reladdr2 = ralloc(mem_ctx, st_src_reg);
+ memcpy(src.reladdr2, &index_reg, sizeof(index_reg));
+ src.index2D = 0;
+ src.has_index2 = true;
+ } else {
+ src.reladdr = ralloc(mem_ctx, st_src_reg);
+ memcpy(src.reladdr, &index_reg, sizeof(index_reg));
+ }
}
/* If the type is smaller than a vec4, replicate the last channel out. */
return switch_order;
}
+void
+glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
+ st_dst_reg *l, st_src_reg *r)
+{
+ if (type->base_type == GLSL_TYPE_STRUCT) {
+ for (unsigned int i = 0; i < type->length; i++) {
+ emit_block_mov(ir, type->fields.structure[i].type, l, r);
+ }
+ return;
+ }
+
+ if (type->is_array()) {
+ for (unsigned int i = 0; i < type->length; i++) {
+ emit_block_mov(ir, type->fields.array, l, r);
+ }
+ return;
+ }
+
+ if (type->is_matrix()) {
+ const struct glsl_type *vec_type;
+
+ vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+ type->vector_elements, 1);
+
+ for (int i = 0; i < type->matrix_columns; i++) {
+ emit_block_mov(ir, vec_type, l, r);
+ }
+ return;
+ }
+
+ assert(type->is_scalar() || type->is_vector());
+
+ r->type = type->base_type;
+ emit(ir, TGSI_OPCODE_MOV, *l, *r);
+ l->index++;
+ r->index++;
+}
+
void
glsl_to_tgsi_visitor::visit(ir_assignment *ir)
{
assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
l.writemask = WRITEMASK_XYZW;
} else if (ir->lhs->type->is_scalar() &&
- ir->lhs->variable_referenced()->mode == ir_var_out) {
+ ir->lhs->variable_referenced()->data.mode == ir_var_shader_out) {
/* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
* FINISHME: W component of fragment shader output zero, work correctly.
*/
if (native_integers) {
/* This is necessary because TGSI's CMP instruction expects the
* condition to be a float, and we store booleans as integers.
- * If TGSI had a UCMP instruction or similar, this extra
- * instruction would not be necessary.
+ * TODO: really want to avoid i2f path and use UCMP. Requires
+ * changes to process_move_condition though too.
*/
condition_temp = get_temp(glsl_type::vec4_type);
condition.negate = 0;
new_inst->saturate = inst->saturate;
inst->dead_mask = inst->dst.writemask;
} else {
- for (i = 0; i < type_size(ir->lhs->type); i++) {
- if (ir->rhs->type->is_array())
- r.type = ir->rhs->type->element_type()->base_type;
- else if (ir->rhs->type->is_record())
- r.type = ir->rhs->type->fields.structure[i].type->base_type;
- emit(ir, TGSI_OPCODE_MOV, l, r);
- l.index++;
- r.index++;
- }
+ emit_block_mov(ir, ir->rhs->type, &l, &r);
}
}
st_src_reg temp_base = get_temp(ir->type);
st_dst_reg temp = st_dst_reg(temp_base);
- foreach_iter(exec_list_iterator, iter, ir->components) {
- ir_constant *field_value = (ir_constant *)iter.get();
+ foreach_list(node, &ir->components) {
+ ir_constant *field_value = (ir_constant *) node;
int size = type_size(field_value->type);
assert(size > 0);
{
function_entry *entry;
- foreach_iter(exec_list_iterator, iter, this->function_signatures) {
- entry = (function_entry *)iter.get();
+ foreach_list(node, &this->function_signatures) {
+ entry = (function_entry *) node;
if (entry->sig == sig)
return entry;
entry->bgn_inst = NULL;
/* Allocate storage for all the parameters. */
- foreach_iter(exec_list_iterator, iter, sig->parameters) {
- ir_variable *param = (ir_variable *)iter.get();
+ foreach_list(node, &sig->parameters) {
+ ir_variable *param = (ir_variable *) node;
variable_storage *storage;
storage = find_variable_storage(param);
assert(!storage);
- storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
- this->next_temp);
- this->variables.push_tail(storage);
+ st_src_reg src = get_temp(param->type);
- this->next_temp += type_size(param->type);
+ storage = new(mem_ctx) variable_storage(param, src.file, src.index);
+ this->variables.push_tail(storage);
}
if (!sig->return_type->is_void()) {
int i;
/* Process in parameters. */
- exec_list_iterator sig_iter = sig->parameters.iterator();
- foreach_iter(exec_list_iterator, iter, *ir) {
- ir_rvalue *param_rval = (ir_rvalue *)iter.get();
- ir_variable *param = (ir_variable *)sig_iter.get();
+ foreach_two_lists(formal_node, &sig->parameters,
+ actual_node, &ir->actual_parameters) {
+ ir_rvalue *param_rval = (ir_rvalue *) actual_node;
+ ir_variable *param = (ir_variable *) formal_node;
- if (param->mode == ir_var_in ||
- param->mode == ir_var_inout) {
+ if (param->data.mode == ir_var_function_in ||
+ param->data.mode == ir_var_function_inout) {
variable_storage *storage = find_variable_storage(param);
assert(storage);
r.index++;
}
}
-
- sig_iter.next();
}
- assert(!sig_iter.has_next());
/* Emit call instruction */
call_inst = emit(ir, TGSI_OPCODE_CAL);
call_inst->function = entry;
/* Process out parameters. */
- sig_iter = sig->parameters.iterator();
- foreach_iter(exec_list_iterator, iter, *ir) {
- ir_rvalue *param_rval = (ir_rvalue *)iter.get();
- ir_variable *param = (ir_variable *)sig_iter.get();
+ foreach_two_lists(formal_node, &sig->parameters,
+ actual_node, &ir->actual_parameters) {
+ ir_rvalue *param_rval = (ir_rvalue *) actual_node;
+ ir_variable *param = (ir_variable *) formal_node;
- if (param->mode == ir_var_out ||
- param->mode == ir_var_inout) {
+ if (param->data.mode == ir_var_function_out ||
+ param->data.mode == ir_var_function_inout) {
variable_storage *storage = find_variable_storage(param);
assert(storage);
r.index++;
}
}
-
- sig_iter.next();
}
- assert(!sig_iter.has_next());
/* Process return value. */
this->result = entry->return_reg;
void
glsl_to_tgsi_visitor::visit(ir_texture *ir)
{
- st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy, offset;
+ st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy, offset[MAX_GLSL_TEXTURE_OFFSET], sample_index, component;
st_dst_reg result_dst, coord_dst, cube_sc_dst;
glsl_to_tgsi_instruction *inst = NULL;
unsigned opcode = TGSI_OPCODE_NOP;
const glsl_type *sampler_type = ir->sampler->type;
bool is_cube_array = false;
+ unsigned i;
/* if we are a cube array sampler */
if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
*/
coord = get_temp(glsl_type::vec4_type);
coord_dst = st_dst_reg(coord);
+ coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1;
emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
}
switch (ir->op) {
case ir_tex:
opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX;
+ if (ir->offset) {
+ ir->offset->accept(this);
+ offset[0] = this->result;
+ }
break;
case ir_txb:
opcode = is_cube_array ? TGSI_OPCODE_TXB2 : TGSI_OPCODE_TXB;
ir->lod_info.bias->accept(this);
lod_info = this->result;
+ if (ir->offset) {
+ ir->offset->accept(this);
+ offset[0] = this->result;
+ }
break;
case ir_txl:
opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL;
ir->lod_info.lod->accept(this);
lod_info = this->result;
+ if (ir->offset) {
+ ir->offset->accept(this);
+ offset[0] = this->result;
+ }
break;
case ir_txd:
opcode = TGSI_OPCODE_TXD;
dx = this->result;
ir->lod_info.grad.dPdy->accept(this);
dy = this->result;
+ if (ir->offset) {
+ ir->offset->accept(this);
+ offset[0] = this->result;
+ }
break;
case ir_txs:
opcode = TGSI_OPCODE_TXQ;
ir->lod_info.lod->accept(this);
lod_info = this->result;
if (ir->offset) {
- ir->offset->accept(this);
- offset = this->result;
+ ir->offset->accept(this);
+ offset[0] = this->result;
}
break;
+ case ir_txf_ms:
+ opcode = TGSI_OPCODE_TXF;
+ ir->lod_info.sample_index->accept(this);
+ sample_index = this->result;
+ break;
+ case ir_tg4:
+ opcode = TGSI_OPCODE_TG4;
+ ir->lod_info.component->accept(this);
+ component = this->result;
+ if (ir->offset) {
+ ir->offset->accept(this);
+ if (ir->offset->type->base_type == GLSL_TYPE_ARRAY) {
+ const glsl_type *elt_type = ir->offset->type->fields.array;
+ for (i = 0; i < ir->offset->type->length; i++) {
+ offset[i] = this->result;
+ offset[i].index += i * type_size(elt_type);
+ offset[i].type = elt_type->base_type;
+ offset[i].swizzle = swizzle_for_size(elt_type->vector_elements);
+ }
+ } else {
+ offset[0] = this->result;
+ }
+ }
+ break;
+ case ir_lod:
+ opcode = TGSI_OPCODE_LODQ;
+ break;
+ case ir_query_levels:
+ assert(!"Unexpected ir_query_levels opcode");
+ break;
}
if (ir->projector) {
}
}
- if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
+ if (ir->op == ir_txf_ms) {
+ coord_dst.writemask = WRITEMASK_W;
+ emit(ir, TGSI_OPCODE_MOV, coord_dst, sample_index);
+ coord_dst.writemask = WRITEMASK_XYZW;
+ } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
opcode == TGSI_OPCODE_TXF) {
/* TGSI stores LOD or LOD bias in the last channel of the coords. */
coord_dst.writemask = WRITEMASK_W;
inst = emit(ir, opcode, result_dst, coord, lod_info);
} else if (opcode == TGSI_OPCODE_TEX2) {
inst = emit(ir, opcode, result_dst, coord, cube_sc);
- } else
+ } else if (opcode == TGSI_OPCODE_TG4) {
+ if (is_cube_array && ir->shadow_comparitor) {
+ inst = emit(ir, opcode, result_dst, coord, cube_sc);
+ } else {
+ inst = emit(ir, opcode, result_dst, coord, component);
+ }
+ } else
inst = emit(ir, opcode, result_dst, coord);
if (ir->shadow_comparitor)
this->prog);
if (ir->offset) {
- inst->tex_offset_num_offset = 1;
- inst->tex_offsets[0].Index = offset.index;
- inst->tex_offsets[0].File = offset.file;
- inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0);
- inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1);
- inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2);
+ for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && offset[i].file != PROGRAM_UNDEFINED; i++)
+ inst->tex_offsets[i] = offset[i];
+ inst->tex_offset_num_offset = i;
}
switch (sampler_type->sampler_dimensionality) {
case GLSL_SAMPLER_DIM_EXTERNAL:
inst->tex_target = TEXTURE_EXTERNAL_INDEX;
break;
+ case GLSL_SAMPLER_DIM_MS:
+ inst->tex_target = (sampler_type->sampler_array)
+ ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
+ break;
default:
assert(!"Should not get here.");
}
if (ir->condition) {
ir->condition->accept(this);
this->result.negate = ~this->result.negate;
- emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result);
+ emit(ir, TGSI_OPCODE_KILL_IF, undef_dst, this->result);
} else {
- emit(ir, TGSI_OPCODE_KILP);
+ /* unconditional kil */
+ emit(ir, TGSI_OPCODE_KILL);
}
}
void
glsl_to_tgsi_visitor::visit(ir_if *ir)
{
- glsl_to_tgsi_instruction *cond_inst, *if_inst;
- glsl_to_tgsi_instruction *prev_inst;
-
- prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+ unsigned if_opcode;
+ glsl_to_tgsi_instruction *if_inst;
ir->condition->accept(this);
assert(this->result.file != PROGRAM_UNDEFINED);
- if (this->options->EmitCondCodes) {
- cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+ if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF;
- /* See if we actually generated any instruction for generating
- * the condition. If not, then cook up a move to a temp so we
- * have something to set cond_update on.
- */
- if (cond_inst == prev_inst) {
- st_src_reg temp = get_temp(glsl_type::bool_type);
- cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result);
- }
- cond_inst->cond_update = GL_TRUE;
-
- if_inst = emit(ir->condition, TGSI_OPCODE_IF);
- if_inst->dst.cond_mask = COND_NE;
- } else {
- if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result);
- }
+ if_inst = emit(ir->condition, if_opcode, undef_dst, this->result);
this->instructions.push_tail(if_inst);
if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF);
}
+
+void
+glsl_to_tgsi_visitor::visit(ir_emit_vertex *ir)
+{
+ assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
+ emit(ir, TGSI_OPCODE_EMIT);
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_end_primitive *ir)
+{
+ assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
+ emit(ir, TGSI_OPCODE_ENDPRIM);
+}
+
glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
{
result.file = PROGRAM_UNDEFINED;
next_temp = 1;
+ next_array = 0;
next_signature_id = 1;
num_immediates = 0;
current_function = NULL;
num_address_regs = 0;
samplers_used = 0;
- indirect_addr_temps = false;
indirect_addr_consts = false;
glsl_version = 0;
native_integers = false;
{
v->samplers_used = 0;
- foreach_iter(exec_list_iterator, iter, v->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ foreach_list(node, &v->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
if (is_tex_instruction(inst->op)) {
v->samplers_used |= 1 << inst->sampler;
"Couldn't find uniform for initializer %s\n", name);
return;
}
- int loc = _mesa_uniform_merge_location_offset(index, offset);
+ int loc = _mesa_uniform_merge_location_offset(shader_program, index, offset);
for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
ir_constant *element;
memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS);
memset(outputWrites, 0, sizeof(outputWrites));
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ foreach_list(node, &this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
unsigned prevWriteMask = 0;
/* Give up if we encounter relative addressing or flow control. */
assert(inst->dst.index < MAX_TEMPS);
prevWriteMask = tempWrites[inst->dst.index];
tempWrites[inst->dst.index] |= inst->dst.writemask;
- }
+ } else
+ continue;
/* For a CMP to be considered a conditional write, the destination
* register and source register two must be the same. */
void
glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
{
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ foreach_list(node, &this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
unsigned j;
for (j=0; j < num_inst_src_regs(inst->op); j++) {
inst->src[j].index = new_index;
}
}
+
+ for (j=0; j < inst->tex_offset_num_offset; j++) {
+ if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
+ inst->tex_offsets[j].index == index) {
+ inst->tex_offsets[j].index = new_index;
+ }
+ }
if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
inst->dst.index = new_index;
int loop_start = -1; /* index of the first active BGNLOOP (if any) */
unsigned i = 0, j;
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ foreach_list(node, &this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
for (j=0; j < num_inst_src_regs(inst->op); j++) {
if (inst->src[j].file == PROGRAM_TEMPORARY &&
return (depth == 0) ? i : loop_start;
}
}
+ for (j=0; j < inst->tex_offset_num_offset; j++) {
+ if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
+ inst->tex_offsets[j].index == index) {
+ return (depth == 0) ? i : loop_start;
+ }
+ }
if (inst->op == TGSI_OPCODE_BGNLOOP) {
if(depth++ == 0)
int loop_start = -1; /* index of the first active BGNLOOP (if any) */
int i = 0;
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ foreach_list(node, &this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
return (depth == 0) ? i : loop_start;
int last = -1; /* index of last instruction that reads the temporary */
unsigned i = 0, j;
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ foreach_list(node, &this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
for (j=0; j < num_inst_src_regs(inst->op); j++) {
if (inst->src[j].file == PROGRAM_TEMPORARY &&
last = (depth == 0) ? i : -2;
}
}
+ for (j=0; j < inst->tex_offset_num_offset; j++) {
+ if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
+ inst->tex_offsets[j].index == index)
+ last = (depth == 0) ? i : -2;
+ }
if (inst->op == TGSI_OPCODE_BGNLOOP)
depth++;
int last = -1; /* index of last instruction that writes to the temporary */
int i = 0;
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ foreach_list(node, &this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
last = (depth == 0) ? i : -2;
int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
int level = 0;
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ foreach_list(node, &this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
assert(inst->dst.file != PROGRAM_TEMPORARY
|| inst->dst.index < this->next_temp);
int acp_base = inst->src[r].index * 4;
if (inst->src[r].file != PROGRAM_TEMPORARY ||
- inst->src[r].reladdr)
+ inst->src[r].reladdr ||
+ inst->src[r].reladdr2)
continue;
/* See if we can find entries in the ACP consisting of MOVs
*/
inst->src[r].file = first->src[0].file;
inst->src[r].index = first->src[0].index;
+ inst->src[r].index2D = first->src[0].index2D;
+ inst->src[r].has_index2 = first->src[0].has_index2;
int swizzle = 0;
for (int i = 0; i < 4; i++) {
break;
case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_UIF:
++level;
break;
/* If this is a copy, add it to the ACP. */
if (inst->op == TGSI_OPCODE_MOV &&
inst->dst.file == PROGRAM_TEMPORARY &&
+ !(inst->dst.file == inst->src[0].file &&
+ inst->dst.index == inst->src[0].index) &&
!inst->dst.reladdr &&
!inst->saturate &&
!inst->src[0].reladdr &&
+ !inst->src[0].reladdr2 &&
!inst->src[0].negate) {
for (int i = 0; i < 4; i++) {
if (inst->dst.writemask & (1 << i)) {
}
/*
- * Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
+ * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
+ * code elimination.
*
* The glsl_to_tgsi_visitor lazily produces code assuming that this pass
* will occur. As an example, a TXP production after copy propagation but
* and after this pass:
*
* 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
- *
- * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
- * FIXME: doesn't eliminate all dead code inside of loops; it steps around them
- */
-void
-glsl_to_tgsi_visitor::eliminate_dead_code(void)
-{
- int i;
-
- for (i=0; i < this->next_temp; i++) {
- int last_read = get_last_temp_read(i);
- int j = 0;
-
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
-
- if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
- j > last_read)
- {
- iter.remove();
- delete inst;
- }
-
- j++;
- }
- }
-}
-
-/*
- * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
- * code elimination. This is less primitive than eliminate_dead_code(), as it
- * is per-channel and can detect consecutive writes without a read between them
- * as dead code. However, there is some dead code that can be eliminated by
- * eliminate_dead_code() but not this function - for example, this function
- * cannot eliminate an instruction writing to a register that is never read and
- * is the only instruction writing to that register.
- *
- * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
- * will occur.
*/
int
-glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
+glsl_to_tgsi_visitor::eliminate_dead_code(void)
{
glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
glsl_to_tgsi_instruction *,
int level = 0;
int removed = 0;
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ foreach_list(node, &this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
assert(inst->dst.file != PROGRAM_TEMPORARY
|| inst->dst.index < this->next_temp);
break;
case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_UIF:
++level;
/* fallthrough to default case to mark the condition as read */
}
}
}
+ for (unsigned i = 0; i < inst->tex_offset_num_offset; i++) {
+ if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY && inst->tex_offsets[i].reladdr){
+ /* Any temporary might be read, so no dead code elimination
+ * across this instruction.
+ */
+ memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+ } else if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY) {
+ /* Clear where it's used as src. */
+ int src_chans = 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 0);
+ src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 1);
+ src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 2);
+ src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 3);
+
+ for (int c = 0; c < 4; c++) {
+ if (src_chans & (1 << c)) {
+ writes[4 * inst->tex_offsets[i].index + c] = NULL;
+ }
+ }
+ }
+ }
break;
}
/* Now actually remove the instructions that are completely dead and update
* the writemask of other instructions with dead channels.
*/
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ foreach_list_safe(node, &this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
if (!inst->dead_mask || !inst->dst.writemask)
continue;
else if ((inst->dst.writemask & ~inst->dead_mask) == 0) {
- iter.remove();
+ inst->remove();
delete inst;
removed++;
} else
v->next_temp = original->next_temp;
v->num_address_regs = original->num_address_regs;
v->samplers_used = prog->SamplersUsed = original->samplers_used;
- v->indirect_addr_temps = original->indirect_addr_temps;
v->indirect_addr_consts = original->indirect_addr_consts;
memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
v->num_immediates = original->num_immediates;
* Get initial pixel color from the texture.
* TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
*/
- coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
+ coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
src0 = v->get_temp(glsl_type::vec4_type);
dst0 = st_dst_reg(src0);
inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
inst->sampler = 0;
inst->tex_target = TEXTURE_2D_INDEX;
- prog->InputsRead |= FRAG_BIT_TEX0;
+ prog->InputsRead |= VARYING_BIT_TEX0;
prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
v->samplers_used |= (1 << 0);
/* Now copy the instructions from the original glsl_to_tgsi_visitor into the
* new visitor. */
- foreach_iter(exec_list_iterator, iter, original->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ foreach_list(node, &original->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
glsl_to_tgsi_instruction *newinst;
st_src_reg src_regs[3];
for (int i=0; i<3; i++) {
src_regs[i] = inst->src[i];
if (src_regs[i].file == PROGRAM_INPUT &&
- src_regs[i].index == FRAG_ATTRIB_COL0)
+ src_regs[i].index == VARYING_SLOT_COL0)
{
src_regs[i].file = PROGRAM_TEMPORARY;
src_regs[i].index = src0.index;
v->next_temp = original->next_temp;
v->num_address_regs = original->num_address_regs;
v->samplers_used = prog->SamplersUsed = original->samplers_used;
- v->indirect_addr_temps = original->indirect_addr_temps;
v->indirect_addr_consts = original->indirect_addr_consts;
memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
v->num_immediates = original->num_immediates;
/* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
- coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
+ coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
src0 = v->get_temp(glsl_type::vec4_type);
dst0 = st_dst_reg(src0);
inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
inst->sampler = samplerIndex;
inst->tex_target = TEXTURE_2D_INDEX;
- prog->InputsRead |= FRAG_BIT_TEX0;
+ prog->InputsRead |= VARYING_BIT_TEX0;
prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
v->samplers_used |= (1 << samplerIndex);
src0.negate = NEGATE_XYZW;
if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
src0.swizzle = SWIZZLE_XXXX;
- inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0);
+ inst = v->emit(NULL, TGSI_OPCODE_KILL_IF, undef_dst, src0);
/* Now copy the instructions from the original glsl_to_tgsi_visitor into the
* new visitor. */
- foreach_iter(exec_list_iterator, iter, original->instructions) {
- glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ foreach_list(node, &original->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
glsl_to_tgsi_instruction *newinst;
st_src_reg src_regs[3];
struct ureg_program *ureg;
struct ureg_dst temps[MAX_TEMPS];
+ struct ureg_dst arrays[MAX_ARRAYS];
struct ureg_src *constants;
struct ureg_src *immediates;
struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
- struct ureg_dst address[1];
+ struct ureg_dst address[2];
struct ureg_src samplers[PIPE_MAX_SAMPLERS];
struct ureg_src systemValues[SYSTEM_VALUE_MAX];
+ struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
+ unsigned array_sizes[MAX_ARRAYS];
const GLuint *inputMapping;
const GLuint *outputMapping;
static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
TGSI_SEMANTIC_FACE,
TGSI_SEMANTIC_VERTEXID,
- TGSI_SEMANTIC_INSTANCEID
+ TGSI_SEMANTIC_INSTANCEID,
+ TGSI_SEMANTIC_SAMPLEID,
+ TGSI_SEMANTIC_SAMPLEPOS,
+ TGSI_SEMANTIC_SAMPLEMASK,
+ TGSI_SEMANTIC_INVOCATIONID,
};
/**
gl_register_file file,
GLuint index)
{
+ unsigned array;
+
switch(file) {
case PROGRAM_UNDEFINED:
return ureg_dst_undef();
case PROGRAM_TEMPORARY:
+ assert(index >= 0);
+ assert(index < (int) Elements(t->temps));
+
if (ureg_dst_is_undef(t->temps[index]))
t->temps[index] = ureg_DECL_local_temporary(t->ureg);
return t->temps[index];
+ case PROGRAM_ARRAY:
+ array = index >> 16;
+
+ assert(array >= 0);
+ assert(array < (int) Elements(t->arrays));
+
+ if (ureg_dst_is_undef(t->arrays[array]))
+ t->arrays[array] = ureg_DECL_array_temporary(
+ t->ureg, t->array_sizes[array], TRUE);
+
+ return ureg_dst_array_offset(t->arrays[array],
+ (int)(index & 0xFFFF) - 0x8000);
+
case PROGRAM_OUTPUT:
if (t->procType == TGSI_PROCESSOR_VERTEX)
- assert(index < VERT_RESULT_MAX);
+ assert(index < VARYING_SLOT_MAX);
else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
assert(index < FRAG_RESULT_MAX);
else
- assert(index < GEOM_RESULT_MAX);
+ assert(index < VARYING_SLOT_MAX);
assert(t->outputMapping[index] < Elements(t->outputs));
return ureg_src_undef();
case PROGRAM_TEMPORARY:
- assert(index >= 0);
- assert(index < (int) Elements(t->temps));
- if (ureg_dst_is_undef(t->temps[index]))
- t->temps[index] = ureg_DECL_local_temporary(t->ureg);
- return ureg_src(t->temps[index]);
+ case PROGRAM_ARRAY:
+ return ureg_src(dst_register(t, file, index));
- case PROGRAM_ENV_PARAM:
- case PROGRAM_LOCAL_PARAM:
case PROGRAM_UNIFORM:
assert(index >= 0);
return t->constants[index];
case TGSI_PROCESSOR_VERTEX:
/* XXX if the geometry shader is present, this must be done there
* instead of here. */
- if (dst_reg->index == VERT_RESULT_COL0 ||
- dst_reg->index == VERT_RESULT_COL1 ||
- dst_reg->index == VERT_RESULT_BFC0 ||
- dst_reg->index == VERT_RESULT_BFC1) {
+ if (dst_reg->index == VARYING_SLOT_COL0 ||
+ dst_reg->index == VARYING_SLOT_COL1 ||
+ dst_reg->index == VARYING_SLOT_BFC0 ||
+ dst_reg->index == VARYING_SLOT_BFC1) {
dst = ureg_saturate(dst);
}
break;
case TGSI_PROCESSOR_FRAGMENT:
- if (dst_reg->index >= FRAG_RESULT_COLOR) {
+ if (dst_reg->index == FRAG_RESULT_COLOR ||
+ dst_reg->index >= FRAG_RESULT_DATA0) {
dst = ureg_saturate(dst);
}
break;
}
}
- if (dst_reg->reladdr != NULL)
+ if (dst_reg->reladdr != NULL) {
+ assert(dst_reg->file != PROGRAM_TEMPORARY);
dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
+ }
return dst;
}
{
struct ureg_src src = src_register(t, src_reg->file, src_reg->index, src_reg->index2D);
+ if (t->procType == TGSI_PROCESSOR_GEOMETRY && src_reg->has_index2) {
+ src = src_register(t, src_reg->file, src_reg->index, src_reg->index2D);
+ if (src_reg->reladdr2)
+ src = ureg_src_dimension_indirect(src, ureg_src(t->address[1]),
+ src_reg->index2D);
+ else
+ src = ureg_src_dimension(src, src_reg->index2D);
+ }
+
src = ureg_swizzle(src,
GET_SWZ(src_reg->swizzle, 0) & 0x3,
GET_SWZ(src_reg->swizzle, 1) & 0x3,
src = ureg_negate(src);
if (src_reg->reladdr != NULL) {
- /* Normally ureg_src_indirect() would be used here, but a stupid compiler
- * bug in g++ makes ureg_src_indirect (an inline C function) erroneously
- * set the bit for src.Negate. So we have to do the operation manually
- * here to work around the compiler's problems. */
- /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/
- struct ureg_src addr = ureg_src(t->address[0]);
- src.Indirect = 1;
- src.IndirectFile = addr.File;
- src.IndirectIndex = addr.Index;
- src.IndirectSwizzle = addr.SwizzleX;
-
- if (src_reg->file != PROGRAM_INPUT &&
- src_reg->file != PROGRAM_OUTPUT) {
- /* If src_reg->index was negative, it was set to zero in
- * src_register(). Reassign it now. But don't do this
- * for input/output regs since they get remapped while
- * const buffers don't.
- */
- src.Index = src_reg->index;
- }
+ assert(src_reg->file != PROGRAM_TEMPORARY);
+ src = ureg_src_indirect(src, ureg_src(t->address[0]));
}
return src;
static struct tgsi_texture_offset
translate_tex_offset(struct st_translate *t,
- const struct tgsi_texture_offset *in_offset)
+ const st_src_reg *in_offset, int idx)
{
struct tgsi_texture_offset offset;
struct ureg_src imm_src;
+ struct ureg_dst dst;
+ int array;
- assert(in_offset->File == PROGRAM_IMMEDIATE);
- imm_src = t->immediates[in_offset->Index];
-
- offset.File = imm_src.File;
- offset.Index = imm_src.Index;
- offset.SwizzleX = imm_src.SwizzleX;
- offset.SwizzleY = imm_src.SwizzleY;
- offset.SwizzleZ = imm_src.SwizzleZ;
- offset.File = TGSI_FILE_IMMEDIATE;
- offset.Padding = 0;
-
+ switch (in_offset->file) {
+ case PROGRAM_IMMEDIATE:
+ imm_src = t->immediates[in_offset->index];
+
+ offset.File = imm_src.File;
+ offset.Index = imm_src.Index;
+ offset.SwizzleX = imm_src.SwizzleX;
+ offset.SwizzleY = imm_src.SwizzleY;
+ offset.SwizzleZ = imm_src.SwizzleZ;
+ offset.Padding = 0;
+ break;
+ case PROGRAM_TEMPORARY:
+ imm_src = ureg_src(t->temps[in_offset->index]);
+ offset.File = imm_src.File;
+ offset.Index = imm_src.Index;
+ offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
+ offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
+ offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
+ offset.Padding = 0;
+ break;
+ case PROGRAM_ARRAY:
+ array = in_offset->index >> 16;
+
+ assert(array >= 0);
+ assert(array < (int) Elements(t->arrays));
+
+ dst = t->arrays[array];
+ offset.File = dst.File;
+ offset.Index = dst.Index + (in_offset->index & 0xFFFF) - 0x8000;
+ offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
+ offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
+ offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
+ offset.Padding = 0;
+ break;
+ default:
+ break;
+ }
return offset;
}
case TGSI_OPCODE_ELSE:
case TGSI_OPCODE_ENDLOOP:
case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_UIF:
assert(num_dst == 0);
ureg_label_insn(ureg,
inst->op,
case TGSI_OPCODE_TEX2:
case TGSI_OPCODE_TXB2:
case TGSI_OPCODE_TXL2:
+ case TGSI_OPCODE_TG4:
+ case TGSI_OPCODE_LODQ:
src[num_src++] = t->samplers[inst->sampler];
for (i = 0; i < inst->tex_offset_num_offset; i++) {
- texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
+ texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i);
}
tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
- struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
+ struct ureg_src wpos_input = t->inputs[t->inputMapping[VARYING_SLOT_POS]];
/* First, apply the coordinate shift: */
if (adjX || adjY[0] || adjY[1]) {
/* Use wpos_temp as position input from here on:
*/
- t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
+ t->inputs[t->inputMapping[VARYING_SLOT_POS]] = ureg_src(wpos_temp);
}
{
struct ureg_program *ureg = t->ureg;
struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
- struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
+ struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]];
/* MOV_SAT face_temp, input[face] */
face_temp = ureg_saturate(face_temp);
ureg_MOV(ureg, face_temp, face_input);
/* Use face_temp as face input from here on: */
- t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
+ t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
}
static void
emit_edgeflags(struct st_translate *t)
{
struct ureg_program *ureg = t->ureg;
- struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
+ struct ureg_dst edge_dst = t->outputs[t->outputMapping[VARYING_SLOT_EDGE]];
struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
ureg_MOV(ureg, edge_dst, edge_src);
is_centroid[i]);
}
- if (proginfo->InputsRead & FRAG_BIT_WPOS) {
+ if (proginfo->InputsRead & VARYING_BIT_POS) {
/* Must do this after setting up t->inputs, and before
* emitting constant references, below:
*/
emit_wpos(st_context(ctx), t, proginfo, ureg);
}
- if (proginfo->InputsRead & FRAG_BIT_FACE)
+ if (proginfo->InputsRead & VARYING_BIT_FACE)
emit_face_var(t);
/*
TGSI_SEMANTIC_COLOR,
outputSemanticIndex[i]);
break;
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ t->outputs[i] = ureg_DECL_output(ureg,
+ TGSI_SEMANTIC_SAMPLEMASK,
+ outputSemanticIndex[i]);
+ /* TODO: If we ever support more than 32 samples, this will have
+ * to become an array.
+ */
+ t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
+ break;
default:
assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
ret = PIPE_ERROR_BAD_INPUT;
t->outputs[i] = ureg_DECL_output(ureg,
outputSemanticName[i],
outputSemanticIndex[i]);
+ if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) {
+ /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */
+ ureg_MOV(ureg,
+ ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW),
+ ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
+ t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
+ }
}
if (passthrough_edgeflags)
emit_edgeflags(t);
/* Declare address register.
*/
if (program->num_address_regs > 0) {
- assert(program->num_address_regs == 1);
+ assert(program->num_address_regs <= 2);
t->address[0] = ureg_DECL_address(ureg);
+ if (program->num_address_regs == 2)
+ t->address[1] = ureg_DECL_address(ureg);
}
/* Declare misc input registers
}
}
- if (program->indirect_addr_temps) {
- /* If temps are accessed with indirect addressing, declare temporaries
- * in sequential order. Else, we declare them on demand elsewhere.
- * (Note: the number of temporaries is equal to program->next_temp)
- */
- for (i = 0; i < (unsigned)program->next_temp; i++) {
- /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
- t->temps[i] = ureg_DECL_local_temporary(t->ureg);
- }
- }
+ /* Copy over array sizes
+ */
+ memcpy(t->array_sizes, program->array_sizes, sizeof(unsigned) * program->next_array);
/* Emit constants and uniforms. TGSI uses a single index space for these,
* so we put all the translated regs in t->constants.
for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
switch (proginfo->Parameters->Parameters[i].Type) {
- case PROGRAM_ENV_PARAM:
- case PROGRAM_LOCAL_PARAM:
case PROGRAM_STATE_VAR:
case PROGRAM_UNIFORM:
t->constants[i] = ureg_DECL_constant(ureg, i);
goto out;
}
i = 0;
- foreach_iter(exec_list_iterator, iter, program->immediates) {
- immediate_storage *imm = (immediate_storage *)iter.get();
+ foreach_list(node, &program->immediates) {
+ immediate_storage *imm = (immediate_storage *) node;
assert(i < program->num_immediates);
t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
}
assert(i == program->num_immediates);
/* texture samplers */
- for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+ for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) {
if (program->samplers_used & (1 << i)) {
t->samplers[i] = ureg_DECL_sampler(ureg, i);
}
/* Emit each instruction in turn:
*/
- foreach_iter(exec_list_iterator, iter, program->instructions) {
+ foreach_list(n, &program->instructions) {
set_insn_start(t, ureg_get_instruction_number(ureg));
- compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get(),
- clamp_color);
+ compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *) n, clamp_color);
}
/* Fix up all emitted labels:
* prog->ParameterValues to get reallocated (e.g., anything that adds a
* program constant) has to happen before creating this linkage.
*/
- for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (program->shader_program->_LinkedShaders[i] == NULL)
continue;
}
/* ----------------------------- End TGSI code ------------------------------ */
+
+static unsigned
+shader_stage_to_ptarget(gl_shader_stage stage)
+{
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
+ return PIPE_SHADER_VERTEX;
+ case MESA_SHADER_FRAGMENT:
+ return PIPE_SHADER_FRAGMENT;
+ case MESA_SHADER_GEOMETRY:
+ return PIPE_SHADER_GEOMETRY;
+ case MESA_SHADER_COMPUTE:
+ return PIPE_SHADER_COMPUTE;
+ }
+
+ assert(!"should not be reached");
+ return PIPE_SHADER_VERTEX;
+}
+
+
/**
* Convert a shader's GLSL IR into a Mesa gl_program, although without
* generating Mesa IR.
{
glsl_to_tgsi_visitor* v;
struct gl_program *prog;
- GLenum target;
- const char *target_string;
+ GLenum target = _mesa_shader_stage_to_program(shader->Stage);
bool progress;
struct gl_shader_compiler_options *options =
- &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
-
- switch (shader->Type) {
- case GL_VERTEX_SHADER:
- target = GL_VERTEX_PROGRAM_ARB;
- target_string = "vertex";
- break;
- case GL_FRAGMENT_SHADER:
- target = GL_FRAGMENT_PROGRAM_ARB;
- target_string = "fragment";
- break;
- case GL_GEOMETRY_SHADER:
- target = GL_GEOMETRY_PROGRAM_NV;
- target_string = "geometry";
- break;
- default:
- assert(!"should not be reached");
- return NULL;
- }
+ &ctx->ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(shader->Type)];
+ struct pipe_screen *pscreen = ctx->st->pipe->screen;
+ unsigned ptarget = shader_stage_to_ptarget(shader->Stage);
validate_ir_tree(shader->ir);
v->glsl_version = ctx->Const.GLSLVersion;
v->native_integers = ctx->Const.NativeIntegers;
+ v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
+ PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
+
_mesa_generate_parameters_list_for_uniforms(shader_program, shader,
prog->Parameters);
do {
progress = GL_FALSE;
- foreach_iter(exec_list_iterator, iter, v->function_signatures) {
- function_entry *entry = (function_entry *)iter.get();
+ foreach_list(node, &v->function_signatures) {
+ function_entry *entry = (function_entry *) node;
if (!entry->bgn_inst) {
v->current_function = entry;
/* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
v->simplify_cmp();
v->copy_propagate();
- while (v->eliminate_dead_code_advanced());
+ while (v->eliminate_dead_code());
- /* FIXME: These passes to optimize temporary registers don't work when there
- * is indirect addressing of the temporary register space. We need proper
- * array support so that we don't have to give up these passes in every
- * shader that uses arrays.
- */
- if (!v->indirect_addr_temps) {
- v->eliminate_dead_code();
- v->merge_registers();
- v->renumber_registers();
- }
+ v->merge_registers();
+ v->renumber_registers();
/* Write the END instruction. */
v->emit(NULL, TGSI_OPCODE_END);
- if (ctx->Shader.Flags & GLSL_DUMP) {
+ if (ctx->_Shader->Flags & GLSL_DUMP) {
printf("\n");
- printf("GLSL IR for linked %s program %d:\n", target_string,
+ printf("GLSL IR for linked %s program %d:\n",
+ _mesa_shader_stage_to_string(shader->Stage),
shader_program->Name);
- _mesa_print_ir(shader->ir, NULL);
+ _mesa_print_ir(stdout, shader->ir, NULL);
printf("\n");
printf("\n");
fflush(stdout);
prog->Instructions = NULL;
prog->NumInstructions = 0;
- do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER);
+ do_set_program_inouts(shader->ir, prog, shader->Stage);
count_resources(v, prog);
_mesa_reference_program(ctx, &shader->Program, prog);
case GL_GEOMETRY_SHADER:
stgp = (struct st_geometry_program *)prog;
stgp->glsl_to_tgsi = v;
+ stgp->Base.InputType = shader_program->Geom.InputType;
+ stgp->Base.OutputType = shader_program->Geom.OutputType;
+ stgp->Base.VerticesOut = shader_program->Geom.VerticesOut;
+ stgp->Base.Invocations = shader_program->Geom.Invocations;
break;
default:
assert(!"should not be reached");
shader = rzalloc(NULL, struct gl_shader);
if (shader) {
shader->Type = type;
+ shader->Stage = _mesa_shader_enum_to_shader_stage(type);
shader->Name = name;
_mesa_init_shader(ctx, shader);
}
GLboolean
st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
{
+ struct pipe_screen *pscreen = ctx->st->pipe->screen;
assert(prog->LinkStatus);
- for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i] == NULL)
continue;
bool progress;
exec_list *ir = prog->_LinkedShaders[i]->ir;
const struct gl_shader_compiler_options *options =
- &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
+ &ctx->ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(prog->_LinkedShaders[i]->Type)];
- do {
- unsigned what_to_lower = MOD_TO_FRACT | DIV_TO_MUL_RCP |
- EXP_TO_EXP2 | LOG_TO_LOG2;
- if (options->EmitNoPow)
- what_to_lower |= POW_TO_EXP2;
- if (!ctx->Const.NativeIntegers)
- what_to_lower |= INT_DIV_TO_MUL_RCP;
+ /* If there are forms of indirect addressing that the driver
+ * cannot handle, perform the lowering pass.
+ */
+ if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput ||
+ options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) {
+ lower_variable_index_to_cond_assign(ir,
+ options->EmitNoIndirectInput,
+ options->EmitNoIndirectOutput,
+ options->EmitNoIndirectTemp,
+ options->EmitNoIndirectUniform);
+ }
- progress = false;
+ if (ctx->Extensions.ARB_shading_language_packing) {
+ unsigned lower_inst = LOWER_PACK_SNORM_2x16 |
+ LOWER_UNPACK_SNORM_2x16 |
+ LOWER_PACK_UNORM_2x16 |
+ LOWER_UNPACK_UNORM_2x16 |
+ LOWER_PACK_SNORM_4x8 |
+ LOWER_UNPACK_SNORM_4x8 |
+ LOWER_UNPACK_UNORM_4x8 |
+ LOWER_PACK_UNORM_4x8 |
+ LOWER_PACK_HALF_2x16 |
+ LOWER_UNPACK_HALF_2x16;
+
+ lower_packing_builtins(ir, lower_inst);
+ }
+
+ if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
+ lower_offset_arrays(ir);
+ do_mat_op_to_vec(ir);
+ lower_instructions(ir,
+ MOD_TO_FRACT |
+ DIV_TO_MUL_RCP |
+ EXP_TO_EXP2 |
+ LOG_TO_LOG2 |
+ LDEXP_TO_ARITH |
+ CARRY_TO_ARITH |
+ BORROW_TO_ARITH |
+ (options->EmitNoPow ? POW_TO_EXP2 : 0) |
+ (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0));
+
+ lower_ubo_reference(prog->_LinkedShaders[i], ir);
+ do_vec_index_to_cond_assign(ir);
+ lower_vector_insert(ir, true);
+ lower_quadop_vector(ir, false);
+ lower_noise(ir);
+ if (options->MaxIfDepth == 0) {
+ lower_discard(ir);
+ }
- /* Lowering */
- do_mat_op_to_vec(ir);
- lower_instructions(ir, what_to_lower);
+ do {
+ progress = false;
progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
- progress = do_common_optimization(ir, true, true,
- options->MaxUnrollIterations)
+ progress = do_common_optimization(ir, true, true, options,
+ ctx->Const.NativeIntegers)
|| progress;
- progress = lower_quadop_vector(ir, false) || progress;
-
- if (options->MaxIfDepth == 0)
- progress = lower_discard(ir) || progress;
-
progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
- if (options->EmitNoNoise)
- progress = lower_noise(ir) || progress;
-
- /* If there are forms of indirect addressing that the driver
- * cannot handle, perform the lowering pass.
- */
- if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
- || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
- progress =
- lower_variable_index_to_cond_assign(ir,
- options->EmitNoIndirectInput,
- options->EmitNoIndirectOutput,
- options->EmitNoIndirectTemp,
- options->EmitNoIndirectUniform)
- || progress;
-
- progress = do_vec_index_to_cond_assign(ir) || progress;
-
- lower_ubo_reference(prog->_LinkedShaders[i], ir);
} while (progress);
validate_ir_tree(ir);
}
- for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
struct gl_program *linked_prog;
if (prog->_LinkedShaders[i] == NULL)
linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
if (linked_prog) {
- static const GLenum targets[] = {
- GL_VERTEX_PROGRAM_ARB,
- GL_FRAGMENT_PROGRAM_ARB,
- GL_GEOMETRY_PROGRAM_NV
- };
-
_mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
linked_prog);
- if (!ctx->Driver.ProgramStringNotify(ctx, targets[i], linked_prog)) {
+ if (!ctx->Driver.ProgramStringNotify(ctx,
+ _mesa_shader_stage_to_program(i),
+ linked_prog)) {
_mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
NULL);
_mesa_reference_program(ctx, &linked_prog, NULL);