#include "tgsi/tgsi_info.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "st_glsl_types.h"
#include "st_program.h"
#include "st_mesa_to_tgsi.h"
#include "st_format.h"
-#include "st_glsl_types.h"
#include "st_nir.h"
#include "st_shader_cache.h"
+#include "st_glsl_to_tgsi_temprename.h"
+#include "util/hash_table.h"
#include <algorithm>
#define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \
#define MAX_GLSL_TEXTURE_OFFSET 4
-class st_src_reg;
-class st_dst_reg;
-
-static int swizzle_for_size(int size);
-
-static int swizzle_for_type(const glsl_type *type, int component = 0)
+static unsigned is_precise(const ir_variable *ir)
{
- unsigned num_elements = 4;
-
- if (type) {
- type = type->without_array();
- if (type->is_scalar() || type->is_vector() || type->is_matrix())
- num_elements = type->vector_elements;
- }
-
- int swizzle = swizzle_for_size(num_elements);
- assert(num_elements + component <= 4);
-
- swizzle += component * MAKE_SWIZZLE4(1, 1, 1, 1);
- return swizzle;
+ if (!ir)
+ return 0;
+ return ir->data.precise || ir->data.invariant;
}
-/**
- * This struct is a corresponding struct to TGSI ureg_src.
- */
-class st_src_reg {
-public:
- st_src_reg(gl_register_file file, int index, const glsl_type *type,
- int component = 0, unsigned array_id = 0)
- {
- assert(file != PROGRAM_ARRAY || array_id != 0);
- this->file = file;
- this->index = index;
- this->swizzle = swizzle_for_type(type, component);
- this->negate = 0;
- this->abs = 0;
- this->index2D = 0;
- this->type = type ? type->base_type : GLSL_TYPE_ERROR;
- this->reladdr = NULL;
- this->reladdr2 = NULL;
- this->has_index2 = false;
- this->double_reg2 = false;
- this->array_id = array_id;
- this->is_double_vertex_input = false;
- }
-
- st_src_reg(gl_register_file file, int index, enum glsl_base_type type)
- {
- assert(file != PROGRAM_ARRAY); /* need array_id > 0 */
- this->type = type;
- this->file = file;
- this->index = index;
- this->index2D = 0;
- this->swizzle = SWIZZLE_XYZW;
- this->negate = 0;
- this->abs = 0;
- this->reladdr = NULL;
- this->reladdr2 = NULL;
- this->has_index2 = false;
- this->double_reg2 = false;
- this->array_id = 0;
- this->is_double_vertex_input = false;
- }
-
- st_src_reg(gl_register_file file, int index, enum glsl_base_type type, int index2D)
- {
- assert(file != PROGRAM_ARRAY); /* need array_id > 0 */
- this->type = type;
- this->file = file;
- this->index = index;
- this->index2D = index2D;
- this->swizzle = SWIZZLE_XYZW;
- this->negate = 0;
- this->abs = 0;
- this->reladdr = NULL;
- this->reladdr2 = NULL;
- this->has_index2 = false;
- this->double_reg2 = false;
- this->array_id = 0;
- this->is_double_vertex_input = false;
- }
-
- st_src_reg()
- {
- this->type = GLSL_TYPE_ERROR;
- this->file = PROGRAM_UNDEFINED;
- this->index = 0;
- this->index2D = 0;
- this->swizzle = 0;
- this->negate = 0;
- this->abs = 0;
- this->reladdr = NULL;
- this->reladdr2 = NULL;
- this->has_index2 = false;
- this->double_reg2 = false;
- this->array_id = 0;
- this->is_double_vertex_input = false;
- }
-
- explicit st_src_reg(st_dst_reg reg);
-
- int16_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
- int16_t index2D;
- uint16_t swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
- int negate:4; /**< NEGATE_XYZW mask from mesa */
- unsigned abs:1;
- enum glsl_base_type type:5; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
- unsigned has_index2:1;
- gl_register_file file:5; /**< PROGRAM_* from Mesa */
- /*
- * Is this the second half of a double register pair?
- * currently used for input mapping only.
- */
- unsigned double_reg2:1;
- unsigned is_double_vertex_input:1;
- unsigned array_id:10;
-
- /** Register index should be offset by the integer in this reg. */
- st_src_reg *reladdr;
- st_src_reg *reladdr2;
-
- st_src_reg get_abs()
- {
- st_src_reg reg = *this;
- reg.negate = 0;
- reg.abs = 1;
- return reg;
- }
-};
+class variable_storage {
+ DECLARE_RZALLOC_CXX_OPERATORS(variable_storage)
-class st_dst_reg {
-public:
- st_dst_reg(gl_register_file file, int writemask, enum glsl_base_type type, int index)
- {
- assert(file != PROGRAM_ARRAY); /* need array_id > 0 */
- this->file = file;
- this->index = index;
- this->index2D = 0;
- this->writemask = writemask;
- this->reladdr = NULL;
- this->reladdr2 = NULL;
- this->has_index2 = false;
- this->type = type;
- this->array_id = 0;
- }
-
- st_dst_reg(gl_register_file file, int writemask, enum glsl_base_type type)
- {
- assert(file != PROGRAM_ARRAY); /* need array_id > 0 */
- this->file = file;
- this->index = 0;
- this->index2D = 0;
- this->writemask = writemask;
- this->reladdr = NULL;
- this->reladdr2 = NULL;
- this->has_index2 = false;
- this->type = type;
- this->array_id = 0;
- }
-
- st_dst_reg()
- {
- this->type = GLSL_TYPE_ERROR;
- this->file = PROGRAM_UNDEFINED;
- this->index = 0;
- this->index2D = 0;
- this->writemask = 0;
- this->reladdr = NULL;
- this->reladdr2 = NULL;
- this->has_index2 = false;
- this->array_id = 0;
- }
-
- explicit st_dst_reg(st_src_reg reg);
-
- int16_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
- int16_t index2D;
- gl_register_file file:5; /**< PROGRAM_* from Mesa */
- unsigned writemask:4; /**< Bitfield of WRITEMASK_[XYZW] */
- enum glsl_base_type type:5; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
- unsigned has_index2:1;
- unsigned array_id:10;
-
- /** Register index should be offset by the integer in this reg. */
- st_src_reg *reladdr;
- st_src_reg *reladdr2;
-};
-
-st_src_reg::st_src_reg(st_dst_reg reg)
-{
- this->type = reg.type;
- this->file = reg.file;
- this->index = reg.index;
- this->swizzle = SWIZZLE_XYZW;
- this->negate = 0;
- this->abs = 0;
- this->reladdr = reg.reladdr;
- this->index2D = reg.index2D;
- this->reladdr2 = reg.reladdr2;
- this->has_index2 = reg.has_index2;
- this->double_reg2 = false;
- this->array_id = reg.array_id;
- this->is_double_vertex_input = false;
-}
-
-st_dst_reg::st_dst_reg(st_src_reg reg)
-{
- this->type = reg.type;
- this->file = reg.file;
- this->index = reg.index;
- this->writemask = WRITEMASK_XYZW;
- this->reladdr = reg.reladdr;
- this->index2D = reg.index2D;
- this->reladdr2 = reg.reladdr2;
- this->has_index2 = reg.has_index2;
- this->array_id = reg.array_id;
-}
-
-class glsl_to_tgsi_instruction : public exec_node {
-public:
- DECLARE_RALLOC_CXX_OPERATORS(glsl_to_tgsi_instruction)
-
- st_dst_reg dst[2];
- st_src_reg src[4];
- st_src_reg resource; /**< sampler or buffer register */
- st_src_reg *tex_offsets;
-
- /** Pointer to the ir source this tree came from for debugging */
- ir_instruction *ir;
-
- unsigned op:8; /**< TGSI opcode */
- unsigned saturate:1;
- unsigned is_64bit_expanded:1;
- unsigned sampler_base:5;
- unsigned sampler_array_size:6; /**< 1-based size of sampler array, 1 if not array */
- unsigned tex_target:4; /**< One of TEXTURE_*_INDEX */
- glsl_base_type tex_type:5;
- unsigned tex_shadow:1;
- unsigned image_format:9;
- unsigned tex_offset_num_offset:3;
- unsigned dead_mask:4; /**< Used in dead code elimination */
- unsigned buffer_access:3; /**< buffer access type */
-
- const struct tgsi_opcode_info *info;
-};
-
-class variable_storage : public exec_node {
public:
variable_storage(ir_variable *var, gl_register_file file, int index,
unsigned array_id = 0)
return GLSL_TYPE_ERROR;
}
-struct rename_reg_pair {
- int old_reg;
- int new_reg;
-};
-
struct glsl_to_tgsi_visitor : public ir_visitor {
public:
glsl_to_tgsi_visitor();
uint32_t samplers_used;
glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */
- int buffers_used;
int images_used;
int image_targets[PIPE_MAX_SHADER_IMAGES];
unsigned image_formats[PIPE_MAX_SHADER_IMAGES];
bool have_fma;
bool use_shared_memory;
bool has_tex_txf_lz;
+ bool precise;
variable_storage *find_variable_storage(ir_variable *var);
st_src_reg st_src_reg_for_double(double val);
st_src_reg st_src_reg_for_float(float val);
st_src_reg st_src_reg_for_int(int val);
+ st_src_reg st_src_reg_for_int64(int64_t val);
st_src_reg st_src_reg_for_type(enum glsl_base_type type, int val);
/**
st_src_reg result;
/** List of variable_storage */
- exec_list variables;
+ struct hash_table *variables;
/** List of immediate_storage */
exec_list immediates;
void simplify_cmp(void);
- void rename_temp_registers(int num_renames, struct rename_reg_pair *renames);
+ void rename_temp_registers(struct rename_reg_pair *renames);
void get_first_temp_read(int *first_reads);
void get_first_temp_write(int *first_writes);
void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes);
prog->data->LinkStatus = linking_failure;
}
-static int
+int
swizzle_for_size(int size)
{
static const int size_swizzles[4] = {
return size_swizzles[size - 1];
}
-static bool
-is_resource_instruction(unsigned opcode)
-{
- switch (opcode) {
- case TGSI_OPCODE_RESQ:
- case TGSI_OPCODE_LOAD:
- case TGSI_OPCODE_ATOMUADD:
- case TGSI_OPCODE_ATOMXCHG:
- case TGSI_OPCODE_ATOMCAS:
- case TGSI_OPCODE_ATOMAND:
- case TGSI_OPCODE_ATOMOR:
- case TGSI_OPCODE_ATOMXOR:
- case TGSI_OPCODE_ATOMUMIN:
- case TGSI_OPCODE_ATOMUMAX:
- case TGSI_OPCODE_ATOMIMIN:
- case TGSI_OPCODE_ATOMIMAX:
- return true;
- default:
- return false;
- }
-}
-
-static unsigned
-num_inst_dst_regs(const glsl_to_tgsi_instruction *op)
-{
- return op->info->num_dst;
-}
-
-static unsigned
-num_inst_src_regs(const glsl_to_tgsi_instruction *op)
-{
- return op->info->is_tex || is_resource_instruction(op->op) ?
- op->info->num_src - 1 : op->info->num_src;
-}
glsl_to_tgsi_instruction *
glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
STATIC_ASSERT(TGSI_OPCODE_LAST <= 255);
inst->op = op;
+ inst->precise = this->precise;
inst->info = tgsi_get_opcode_info(op);
inst->dst[0] = dst;
inst->dst[1] = dst1;
if (swz > 1) {
dinst->src[j].double_reg2 = true;
dinst->src[j].index++;
- }
+ }
if (swz & 1)
dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
else \
op = TGSI_OPCODE_##f; \
break;
-#define case5(c, f, i, u, d) \
- case TGSI_OPCODE_##c: \
- if (type == GLSL_TYPE_DOUBLE) \
- op = TGSI_OPCODE_##d; \
- else if (type == GLSL_TYPE_INT) \
- op = TGSI_OPCODE_##i; \
- else if (type == GLSL_TYPE_UINT) \
- op = TGSI_OPCODE_##u; \
- else \
- op = TGSI_OPCODE_##f; \
- break;
-
-#define case4(c, f, i, u) \
- case TGSI_OPCODE_##c: \
- if (type == GLSL_TYPE_INT) \
- op = TGSI_OPCODE_##i; \
- else if (type == GLSL_TYPE_UINT) \
- op = TGSI_OPCODE_##u; \
- else \
- op = TGSI_OPCODE_##f; \
- break;
-
-#define case3(f, i, u) case4(f, f, i, u)
-#define case6d(f, i, u, d, i64, u64) case7(f, f, i, u, d, i64, u64)
-#define case3fid(f, i, d) case5(f, f, i, i, d)
-#define case3fid64(f, i, d, i64) case7(f, f, i, i, d, i64, i64)
-#define case2fi(f, i) case4(f, f, i, i)
-#define case2iu(i, u) case4(i, LAST, i, u)
-
-#define case2iu64(i, i64) case7(i, LAST, i, i, LAST, i64, i64)
-#define case4iu64(i, u, i64, u64) case7(i, LAST, i, u, LAST, i64, u64)
#define casecomp(c, f, i, u, d, i64, ui64) \
case TGSI_OPCODE_##c: \
break;
switch(op) {
- case3fid64(ADD, UADD, DADD, U64ADD);
- case3fid64(MUL, UMUL, DMUL, U64MUL);
- case3fid(MAD, UMAD, DMAD);
- case3fid(FMA, UMAD, DFMA);
- case6d(DIV, IDIV, UDIV, DDIV, I64DIV, U64DIV);
- case6d(MAX, IMAX, UMAX, DMAX, I64MAX, U64MAX);
- case6d(MIN, IMIN, UMIN, DMIN, I64MIN, U64MIN);
- case4iu64(MOD, UMOD, I64MOD, U64MOD);
+ /* Some instructions are initially selected without considering the type.
+ * This fixes the type:
+ *
+ * INIT FLOAT SINT UINT DOUBLE SINT64 UINT64
+ */
+ case7(ADD, ADD, UADD, UADD, DADD, U64ADD, U64ADD);
+ case7(CEIL, CEIL, LAST, LAST, DCEIL, LAST, LAST);
+ case7(DIV, DIV, IDIV, UDIV, DDIV, I64DIV, U64DIV);
+ case7(FMA, FMA, UMAD, UMAD, DFMA, LAST, LAST);
+ case7(FLR, FLR, LAST, LAST, DFLR, LAST, LAST);
+ case7(FRC, FRC, LAST, LAST, DFRAC, LAST, LAST);
+ case7(MUL, MUL, UMUL, UMUL, DMUL, U64MUL, U64MUL);
+ case7(MAD, MAD, UMAD, UMAD, DMAD, LAST, LAST);
+ case7(MAX, MAX, IMAX, UMAX, DMAX, I64MAX, U64MAX);
+ case7(MIN, MIN, IMIN, UMIN, DMIN, I64MIN, U64MIN);
+ case7(RCP, RCP, LAST, LAST, DRCP, LAST, LAST);
+ case7(ROUND, ROUND,LAST, LAST, DROUND, LAST, LAST);
+ case7(RSQ, RSQ, LAST, LAST, DRSQ, LAST, LAST);
+ case7(SQRT, SQRT, LAST, LAST, DSQRT, LAST, LAST);
+ case7(SSG, SSG, ISSG, ISSG, DSSG, I64SSG, I64SSG);
+ case7(TRUNC, TRUNC,LAST, LAST, DTRUNC, LAST, LAST);
+
+ case7(MOD, LAST, MOD, UMOD, LAST, I64MOD, U64MOD);
+ case7(SHL, LAST, SHL, SHL, LAST, U64SHL, U64SHL);
+ case7(IBFE, LAST, IBFE, UBFE, LAST, LAST, LAST);
+ case7(IMSB, LAST, IMSB, UMSB, LAST, LAST, LAST);
+ case7(IMUL_HI, LAST, IMUL_HI, UMUL_HI, LAST, LAST, LAST);
+ case7(ISHR, LAST, ISHR, USHR, LAST, I64SHR, U64SHR);
+ case7(ATOMIMAX,LAST, ATOMIMAX,ATOMUMAX,LAST, LAST, LAST);
+ case7(ATOMIMIN,LAST, ATOMIMIN,ATOMUMIN,LAST, LAST, LAST);
casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ, U64SEQ, U64SEQ);
casecomp(SNE, FSNE, USNE, USNE, DSNE, U64SNE, U64SNE);
casecomp(SGE, FSGE, ISGE, USGE, DSGE, I64SGE, U64SGE);
casecomp(SLT, FSLT, ISLT, USLT, DSLT, I64SLT, U64SLT);
- case2iu64(SHL, U64SHL);
- case4iu64(ISHR, USHR, I64SHR, U64SHR);
-
- case3fid64(SSG, ISSG, DSSG, I64SSG);
-
- case2iu(IBFE, UBFE);
- case2iu(IMSB, UMSB);
- case2iu(IMUL_HI, UMUL_HI);
-
- case3fid(SQRT, SQRT, DSQRT);
-
- case3fid(RCP, RCP, DRCP);
- case3fid(RSQ, RSQ, DRSQ);
-
- case3fid(FRC, FRC, DFRAC);
- case3fid(TRUNC, TRUNC, DTRUNC);
- case3fid(CEIL, CEIL, DCEIL);
- case3fid(FLR, FLR, DFLR);
- case3fid(ROUND, ROUND, DROUND);
-
- case2iu(ATOMIMAX, ATOMUMAX);
- case2iu(ATOMIMIN, ATOMUMIN);
-
default: break;
}
return src;
}
+st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_int64(int64_t val)
+{
+ st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT64);
+ union gl_constant_value uval[2];
+
+ memcpy(uval, &val, sizeof(uval));
+ src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle);
+ src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
+
+ return src;
+}
+
st_src_reg
glsl_to_tgsi_visitor::st_src_reg_for_type(enum glsl_base_type type, int val)
{
static int
attrib_type_size(const struct glsl_type *type, bool is_vs_input)
{
- return st_glsl_attrib_type_size(type, is_vs_input);
+ return type->count_attribute_slots(is_vs_input);
}
static int
type_size(const struct glsl_type *type)
{
- return st_glsl_type_size(type);
+ return type->count_attribute_slots(false);
+}
+
+static void
+add_buffer_to_load_and_stores(glsl_to_tgsi_instruction *inst, st_src_reg *buf,
+ exec_list *instructions, ir_constant *access)
+{
+ /**
+ * emit_asm() might have actually split the op into pieces, e.g. for
+ * double stores. We have to go back and fix up all the generated ops.
+ */
+ unsigned op = inst->op;
+ do {
+ inst->resource = *buf;
+ if (access)
+ inst->buffer_access = access->value.u[0];
+
+ if (inst == instructions->get_head_raw())
+ break;
+ inst = (glsl_to_tgsi_instruction *)inst->get_prev();
+
+ if (inst->op == TGSI_OPCODE_UADD) {
+ if (inst == instructions->get_head_raw())
+ break;
+ inst = (glsl_to_tgsi_instruction *)inst->get_prev();
+ }
+ } while (inst->op == op && inst->resource.file == PROGRAM_UNDEFINED);
}
/**
variable_storage *
glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
{
+ struct hash_entry *entry;
- foreach_in_list(variable_storage, entry, &this->variables) {
- if (entry->var == var)
- return entry;
- }
+ entry = _mesa_hash_table_search(this->variables, var);
+ if (!entry)
+ return NULL;
- return NULL;
+ return (variable_storage *)entry->data;
}
void
if (i == ir->get_num_state_slots()) {
/* We'll set the index later. */
storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
- this->variables.push_tail(storage);
+
+ _mesa_hash_table_insert(this->variables, ir, storage);
dst = undef_dst;
} else {
storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index,
dst.array_id);
- this->variables.push_tail(storage);
+ _mesa_hash_table_insert(this->variables, ir, storage);
}
/* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
*/
- if (ir->operation == ir_binop_add) {
+ if (!this->precise && ir->operation == ir_binop_add) {
if (try_emit_mad(ir, 1))
return;
if (try_emit_mad(ir, 0))
if (ir->operation == ir_quadop_vector)
assert(!"ir_quadop_vector should have been lowered");
- for (unsigned int operand = 0; operand < ir->get_num_operands(); operand++) {
+ for (unsigned int operand = 0; operand < ir->num_operands; operand++) {
this->result.file = PROGRAM_UNDEFINED;
ir->operands[operand]->accept(this);
if (this->result.file == PROGRAM_UNDEFINED) {
break;
case ir_binop_ubo_load: {
- ir_constant *const_uniform_block = ir->operands[0]->as_constant();
- ir_constant *const_offset_ir = ir->operands[1]->as_constant();
- unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
- unsigned const_block = const_uniform_block ? const_uniform_block->value.u[0] + 1 : 0;
- st_src_reg index_reg = get_temp(glsl_type::uint_type);
- st_src_reg cbuf;
-
- cbuf.type = ir->type->base_type;
- cbuf.file = PROGRAM_CONSTANT;
- cbuf.index = 0;
- cbuf.reladdr = NULL;
- cbuf.negate = 0;
- cbuf.abs = 0;
-
- assert(ir->type->is_vector() || ir->type->is_scalar());
-
- if (const_offset_ir) {
- /* Constant index into constant buffer */
- cbuf.reladdr = NULL;
- cbuf.index = const_offset / 16;
- }
- else {
- ir_expression *offset_expr = ir->operands[1]->as_expression();
- st_src_reg offset = op[1];
-
- /* The OpenGL spec is written in such a way that accesses with
- * non-constant offset are almost always vec4-aligned. The only
- * exception to this are members of structs in arrays of structs:
- * each struct in an array of structs is at least vec4-aligned,
- * but single-element and [ui]vec2 members of the struct may be at
- * an offset that is not a multiple of 16 bytes.
- *
- * Here, we extract that offset, relying on previous passes to always
- * generate offset expressions of the form (+ expr constant_offset).
- *
- * Note that the std430 layout, which allows more cases of alignment
- * less than vec4 in arrays, is not supported for uniform blocks, so
- * we do not have to deal with it here.
- */
- if (offset_expr && offset_expr->operation == ir_binop_add) {
- const_offset_ir = offset_expr->operands[1]->as_constant();
- if (const_offset_ir) {
- const_offset = const_offset_ir->value.u[0];
- cbuf.index = const_offset / 16;
- offset_expr->operands[0]->accept(this);
- offset = this->result;
- }
- }
+ if (ctx->Const.UseSTD430AsDefaultPacking) {
+ ir_rvalue *block = ir->operands[0];
+ ir_rvalue *offset = ir->operands[1];
+ ir_constant *const_block = block->as_constant();
- /* Relative/variable index into constant buffer */
- emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), offset,
- st_src_reg_for_int(4));
- cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
- memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
- }
+ st_src_reg cbuf(PROGRAM_CONSTANT,
+ (const_block ? const_block->value.u[0] + 1 : 1),
+ ir->type->base_type);
- if (const_uniform_block) {
- /* Constant constant buffer */
- cbuf.reladdr2 = NULL;
- cbuf.index2D = const_block;
cbuf.has_index2 = true;
- }
- else {
- /* Relative/variable constant buffer */
- cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg);
- cbuf.index2D = 1;
- memcpy(cbuf.reladdr2, &op[0], sizeof(st_src_reg));
- cbuf.has_index2 = true;
- }
- cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
- if (glsl_base_type_is_64bit(cbuf.type))
- cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8,
- const_offset % 16 / 8,
- const_offset % 16 / 8,
- const_offset % 16 / 8);
- else
- cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4,
- const_offset % 16 / 4,
- const_offset % 16 / 4,
- const_offset % 16 / 4);
+ if (!const_block) {
+ block->accept(this);
+ cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
+ *cbuf.reladdr = this->result;
+ emit_arl(ir, sampler_reladdr, this->result);
+ }
+
+ /* Calculate the surface offset */
+ offset->accept(this);
+ st_src_reg off = this->result;
+
+ glsl_to_tgsi_instruction *inst =
+ emit_asm(ir, TGSI_OPCODE_LOAD, result_dst, off);
- if (ir->type->is_boolean()) {
- emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0));
+ if (result_dst.type == GLSL_TYPE_BOOL)
+ emit_asm(ir, TGSI_OPCODE_USNE, result_dst, st_src_reg(result_dst),
+ st_src_reg_for_int(0));
+
+ add_buffer_to_load_and_stores(inst, &cbuf, &this->instructions,
+ NULL);
} else {
- emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
+ ir_constant *const_uniform_block = ir->operands[0]->as_constant();
+ ir_constant *const_offset_ir = ir->operands[1]->as_constant();
+ unsigned const_offset = const_offset_ir ?
+ const_offset_ir->value.u[0] : 0;
+ unsigned const_block = const_uniform_block ?
+ const_uniform_block->value.u[0] + 1 : 1;
+ st_src_reg index_reg = get_temp(glsl_type::uint_type);
+ st_src_reg cbuf;
+
+ cbuf.type = ir->type->base_type;
+ cbuf.file = PROGRAM_CONSTANT;
+ cbuf.index = 0;
+ cbuf.reladdr = NULL;
+ cbuf.negate = 0;
+ cbuf.abs = 0;
+ cbuf.index2D = const_block;
+
+ assert(ir->type->is_vector() || ir->type->is_scalar());
+
+ if (const_offset_ir) {
+ /* Constant index into constant buffer */
+ cbuf.reladdr = NULL;
+ cbuf.index = const_offset / 16;
+ } else {
+ ir_expression *offset_expr = ir->operands[1]->as_expression();
+ st_src_reg offset = op[1];
+
+ /* The OpenGL spec is written in such a way that accesses with
+ * non-constant offset are almost always vec4-aligned. The only
+ * exception to this are members of structs in arrays of structs:
+ * each struct in an array of structs is at least vec4-aligned,
+ * but single-element and [ui]vec2 members of the struct may be at
+ * an offset that is not a multiple of 16 bytes.
+ *
+ * Here, we extract that offset, relying on previous passes to
+ * always generate offset expressions of the form
+ * (+ expr constant_offset).
+ *
+ * Note that the std430 layout, which allows more cases of
+ * alignment less than vec4 in arrays, is not supported for
+ * uniform blocks, so we do not have to deal with it here.
+ */
+ if (offset_expr && offset_expr->operation == ir_binop_add) {
+ const_offset_ir = offset_expr->operands[1]->as_constant();
+ if (const_offset_ir) {
+ const_offset = const_offset_ir->value.u[0];
+ cbuf.index = const_offset / 16;
+ offset_expr->operands[0]->accept(this);
+ offset = this->result;
+ }
+ }
+
+ /* Relative/variable index into constant buffer */
+ emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), offset,
+ st_src_reg_for_int(4));
+ cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
+ memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
+ }
+
+ if (const_uniform_block) {
+ /* Constant constant buffer */
+ cbuf.reladdr2 = NULL;
+ } else {
+ /* Relative/variable constant buffer */
+ cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg);
+ memcpy(cbuf.reladdr2, &op[0], sizeof(st_src_reg));
+ }
+ cbuf.has_index2 = true;
+
+ cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
+ if (glsl_base_type_is_64bit(cbuf.type))
+ cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8,
+ const_offset % 16 / 8,
+ const_offset % 16 / 8,
+ const_offset % 16 / 8);
+ else
+ cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4,
+ const_offset % 16 / 4,
+ const_offset % 16 / 4,
+ const_offset % 16 / 4);
+
+ if (ir->type->is_boolean()) {
+ emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf,
+ st_src_reg_for_int(0));
+ } else {
+ emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
+ }
}
break;
}
case ir_unop_pack_int_2x32:
case ir_unop_unpack_uint_2x32:
case ir_unop_pack_uint_2x32:
+ case ir_unop_unpack_sampler_2x32:
+ case ir_unop_pack_sampler_2x32:
+ case ir_unop_unpack_image_2x32:
+ case ir_unop_pack_image_2x32:
emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
break;
st_src_reg temp = get_temp(glsl_type::uvec4_type);
st_dst_reg temp_dst = st_dst_reg(temp);
unsigned orig_swz = op[0].swizzle;
- /*
+ /*
* To convert unsigned to 64-bit:
* zero Y channel, copy X channel.
*/
break;
}
case ir_unop_i642b:
- emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], st_src_reg_for_int(0));
+ emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], st_src_reg_for_int64(0));
break;
case ir_unop_i642f:
emit_asm(ir, TGSI_OPCODE_I642F, result_dst, op[0]);
case ir_unop_unpack_snorm_4x8:
case ir_unop_unpack_unorm_4x8:
- case ir_unop_unpack_sampler_2x32:
- case ir_unop_pack_sampler_2x32:
- case ir_unop_unpack_image_2x32:
- case ir_unop_pack_image_2x32:
-
case ir_quadop_vector:
case ir_binop_vector_extract:
case ir_triop_vector_insert:
case ir_var_uniform:
entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
var->data.param_index);
- this->variables.push_tail(entry);
+ _mesa_hash_table_insert(this->variables, var, entry);
break;
case ir_var_shader_in: {
/* The linker assigns locations for varyings and attributes,
decl->array_id);
entry->component = component;
- this->variables.push_tail(entry);
+ _mesa_hash_table_insert(this->variables, var, entry);
+
break;
}
case ir_var_shader_out: {
}
entry->component = component;
- this->variables.push_tail(entry);
+ _mesa_hash_table_insert(this->variables, var, entry);
+
break;
}
case ir_var_system_value:
entry = new(mem_ctx) variable_storage(var, src.file, src.index,
src.array_id);
- this->variables.push_tail(entry);
+ _mesa_hash_table_insert(this->variables, var, entry);
break;
}
this->result = st_src_reg(entry->file, entry->index, var->type,
entry->component, entry->array_id);
- if (this->shader->Stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in && var->type->is_double())
+ if (this->shader->Stage == MESA_SHADER_VERTEX &&
+ var->data.mode == ir_var_shader_in &&
+ var->type->without_array()->is_double())
this->result.is_double_vertex_input = true;
if (!native_integers)
this->result.type = GLSL_TYPE_FLOAT;
{
ir_constant *index;
st_src_reg src;
- int element_size = type_size(ir->type);
bool is_2D = false;
+ ir_variable *var = ir->variable_referenced();
+
+ /* We only need the logic provided by st_glsl_storage_type_size()
+ * for arrays of structs. Indirect sampler and image indexing is handled
+ * elsewhere.
+ */
+ int element_size = ir->type->without_array()->is_record() ?
+ st_glsl_storage_type_size(ir->type, var->data.bindless) :
+ type_size(ir->type);
- index = ir->array_index->constant_expression_value();
+ index = ir->array_index->constant_expression_value(ralloc_parent(ir));
ir->array->accept(this);
src = this->result;
- if (ir->array->ir_type != ir_type_dereference_array) {
+ if (!src.has_index2) {
switch (this->prog->Target) {
case GL_TESS_CONTROL_PROGRAM_NV:
is_2D = (src.file == PROGRAM_INPUT || src.file == PROGRAM_OUTPUT) &&
if (index) {
if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
- src.file == PROGRAM_INPUT)
- element_size = attrib_type_size(ir->type, true);
+ src.file == PROGRAM_INPUT)
+ element_size = attrib_type_size(ir->type, true);
if (is_2D) {
src.index2D = index->value.i[0];
src.has_index2 = true;
{
unsigned int i;
const glsl_type *struct_type = ir->record->type;
+ ir_variable *var = ir->record->variable_referenced();
int offset = 0;
ir->record->accept(this);
+ assert(ir->field_idx >= 0);
+ assert(var);
for (i = 0; i < struct_type->length; i++) {
- if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
+ if (i == (unsigned) ir->field_idx)
break;
- offset += type_size(struct_type->fields.structure[i].type);
+ const glsl_type *member_type = struct_type->fields.structure[i].type;
+ offset += st_glsl_storage_type_size(member_type, var->data.bindless);
}
/* If the type is smaller than a vec4, replicate the last channel out. */
ir_expression *const expr = ir->as_expression();
if (native_integers) {
- if ((expr != NULL) && (expr->get_num_operands() == 2)) {
+ if ((expr != NULL) && (expr->num_operands == 2)) {
enum glsl_base_type type = expr->operands[0]->type->base_type;
if (type == GLSL_TYPE_INT || type == GLSL_TYPE_UINT ||
type == GLSL_TYPE_BOOL) {
return switch_order;
}
- if ((expr != NULL) && (expr->get_num_operands() == 2)) {
+ if ((expr != NULL) && (expr->num_operands == 2)) {
bool zero_on_left = false;
if (expr->operands[0]->is_zero()) {
if (type->is_dual_slot()) {
l->index++;
if (r->is_double_vertex_input == false)
- r->index++;
+ r->index++;
}
}
st_dst_reg l;
st_src_reg r;
+ /* all generated instructions need to be flaged as precise */
+ this->precise = is_precise(ir->lhs->variable_referenced());
ir->rhs->accept(this);
r = this->result;
inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
new_inst = emit_asm(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2], inst->src[3]);
new_inst->saturate = inst->saturate;
+ new_inst->resource = inst->resource;
inst->dead_mask = inst->dst[0].writemask;
} else {
emit_block_mov(ir, ir->rhs->type, &l, &r, NULL, false);
}
+ this->precise = 0;
}
st_src_reg temp_base = get_temp(ir->type);
st_dst_reg temp = st_dst_reg(temp_base);
- foreach_in_list(ir_constant, field_value, &ir->components) {
+ for (i = 0; i < ir->type->length; i++) {
+ ir_constant *const field_value = ir->get_record_field(i);
int size = type_size(field_value->type);
assert(size > 0);
in_array++;
for (i = 0; i < ir->type->length; i++) {
- ir->array_elements[i]->accept(this);
+ ir->const_elements[i]->accept(this);
src = this->result;
for (int j = 0; j < size; j++) {
emit_asm(ir, TGSI_OPCODE_MOV, temp, src);
assert(access);
}
- /* The emit_asm() might have actually split the op into pieces, e.g. for
- * double stores. We have to go back and fix up all the generated ops.
- */
- unsigned op = inst->op;
- do {
- inst->resource = buffer;
- if (access)
- inst->buffer_access = access->value.u[0];
-
- if (inst == this->instructions.get_head_raw())
- break;
- inst = (glsl_to_tgsi_instruction *)inst->get_prev();
-
- if (inst->op == TGSI_OPCODE_UADD) {
- if (inst == this->instructions.get_head_raw())
- break;
- inst = (glsl_to_tgsi_instruction *)inst->get_prev();
- }
- } while (inst->op == op && inst->resource.file == PROGRAM_UNDEFINED);
+ add_buffer_to_load_and_stores(inst, &buffer, &this->instructions, access);
}
void
}
}
+static void
+get_image_qualifiers(ir_dereference *ir, const glsl_type **type,
+ bool *memory_coherent, bool *memory_volatile,
+ bool *memory_restrict, unsigned *image_format)
+{
+
+ switch (ir->ir_type) {
+ case ir_type_dereference_record: {
+ ir_dereference_record *deref_record = ir->as_dereference_record();
+ const glsl_type *struct_type = deref_record->record->type;
+ int fild_idx = deref_record->field_idx;
+
+ *type = struct_type->fields.structure[fild_idx].type->without_array();
+ *memory_coherent =
+ struct_type->fields.structure[fild_idx].memory_coherent;
+ *memory_volatile =
+ struct_type->fields.structure[fild_idx].memory_volatile;
+ *memory_restrict =
+ struct_type->fields.structure[fild_idx].memory_restrict;
+ *image_format =
+ struct_type->fields.structure[fild_idx].image_format;
+ break;
+ }
+
+ case ir_type_dereference_array: {
+ ir_dereference_array *deref_arr = ir->as_dereference_array();
+ get_image_qualifiers((ir_dereference *)deref_arr->array, type,
+ memory_coherent, memory_volatile, memory_restrict,
+ image_format);
+ break;
+ }
+
+ case ir_type_dereference_variable: {
+ ir_variable *var = ir->variable_referenced();
+
+ *type = var->type->without_array();
+ *memory_coherent = var->data.memory_coherent;
+ *memory_volatile = var->data.memory_volatile;
+ *memory_restrict = var->data.memory_restrict;
+ *image_format = var->data.image_format;
+ break;
+ }
+
+ default:
+ break;
+ }
+}
+
void
glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir)
{
ir_dereference *img = (ir_dereference *)param;
const ir_variable *imgvar = img->variable_referenced();
- const glsl_type *type = imgvar->type->without_array();
unsigned sampler_array_size = 1, sampler_base = 0;
+ bool memory_coherent = false, memory_volatile = false, memory_restrict = false;
+ unsigned image_format = 0;
+ const glsl_type *type = NULL;
+
+ get_image_qualifiers(img, &type, &memory_coherent, &memory_volatile,
+ &memory_restrict, &image_format);
st_src_reg reladdr;
st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT);
-
+ uint16_t index = 0;
get_deref_offsets(img, &sampler_array_size, &sampler_base,
- (uint16_t*)&image.index, &reladdr, true);
+ &index, &reladdr, !imgvar->contains_bindless());
+ image.index = index;
if (reladdr.file != PROGRAM_UNDEFINED) {
image.reladdr = ralloc(mem_ctx, st_src_reg);
*image.reladdr = reladdr;
inst->dst[0].writemask = WRITEMASK_XYZW;
}
- inst->resource = image;
- inst->sampler_array_size = sampler_array_size;
- inst->sampler_base = sampler_base;
+ if (imgvar->contains_bindless()) {
+ img->accept(this);
+ inst->resource = this->result;
+ inst->resource.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
+ SWIZZLE_X, SWIZZLE_Y);
+ } else {
+ inst->resource = image;
+ inst->sampler_array_size = sampler_array_size;
+ inst->sampler_base = sampler_base;
+ }
inst->tex_target = type->sampler_index();
inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx),
- _mesa_get_shader_image_format(imgvar->data.image_format));
+ _mesa_get_shader_image_format(image_format));
- if (imgvar->data.memory_coherent)
+ if (memory_coherent)
inst->buffer_access |= TGSI_MEMORY_COHERENT;
- if (imgvar->data.memory_restrict)
+ if (memory_restrict)
inst->buffer_access |= TGSI_MEMORY_RESTRICT;
- if (imgvar->data.memory_volatile)
+ if (memory_volatile)
inst->buffer_access |= TGSI_MEMORY_VOLATILE;
}
ir->return_deref->accept(this);
st_dst_reg dst = st_dst_reg(this->result);
+ dst.writemask = u_bit_consecutive(0, ir->return_deref->var->type->vector_elements);
+
st_src_reg src[4] = { undef_src, undef_src, undef_src, undef_src };
unsigned num_src = 0;
foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
case ir_type_dereference_record: {
ir_dereference_record *deref_record = tail->as_dereference_record();
const glsl_type *struct_type = deref_record->record->type;
- int field_index = deref_record->record->type->field_index(deref_record->field);
+ int field_index = deref_record->field_idx;
calc_deref_offsets(deref_record->record->as_dereference(), array_elements, index, indirect, location);
case ir_type_dereference_array: {
ir_dereference_array *deref_arr = tail->as_dereference_array();
- ir_constant *array_index = deref_arr->array_index->constant_expression_value();
+
+ void *mem_ctx = ralloc_parent(deref_arr);
+ ir_constant *array_index =
+ deref_arr->array_index->constant_expression_value(mem_ctx);
if (!array_index) {
st_src_reg temp_reg;
unsigned opcode = TGSI_OPCODE_NOP;
const glsl_type *sampler_type = ir->sampler->type;
unsigned sampler_array_size = 1, sampler_base = 0;
- uint16_t sampler_index = 0;
bool is_cube_array = false, is_cube_shadow = false;
+ ir_variable *var = ir->sampler->variable_referenced();
unsigned i;
/* if we are a cube array sampler or a cube shadow */
coord_dst.writemask = WRITEMASK_XYZW;
}
+ st_src_reg sampler(PROGRAM_SAMPLER, 0, GLSL_TYPE_UINT);
+
+ uint16_t index = 0;
get_deref_offsets(ir->sampler, &sampler_array_size, &sampler_base,
- &sampler_index, &reladdr, true);
- if (reladdr.file != PROGRAM_UNDEFINED)
+ &index, &reladdr, !var->contains_bindless());
+
+ sampler.index = index;
+ if (reladdr.file != PROGRAM_UNDEFINED) {
+ sampler.reladdr = ralloc(mem_ctx, st_src_reg);
+ *sampler.reladdr = reladdr;
emit_arl(ir, sampler_reladdr, reladdr);
+ }
if (opcode == TGSI_OPCODE_TXD)
inst = emit_asm(ir, opcode, result_dst, coord, dx, dy);
if (ir->shadow_comparator)
inst->tex_shadow = GL_TRUE;
- inst->resource.index = sampler_index;
- inst->sampler_array_size = sampler_array_size;
- inst->sampler_base = sampler_base;
-
- if (reladdr.file != PROGRAM_UNDEFINED) {
- inst->resource.reladdr = ralloc(mem_ctx, st_src_reg);
- memcpy(inst->resource.reladdr, &reladdr, sizeof(reladdr));
+ if (var->contains_bindless()) {
+ ir->sampler->accept(this);
+ inst->resource = this->result;
+ inst->resource.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
+ SWIZZLE_X, SWIZZLE_Y);
+ } else {
+ inst->resource = sampler;
+ inst->sampler_array_size = sampler_array_size;
+ inst->sampler_base = sampler_base;
}
if (ir->offset) {
num_immediates = 0;
num_address_regs = 0;
samplers_used = 0;
- buffers_used = 0;
images_used = 0;
indirect_addr_consts = false;
wpos_transform_const = -1;
mem_ctx = ralloc_context(NULL);
ctx = NULL;
prog = NULL;
+ precise = 0;
shader_program = NULL;
shader = NULL;
options = NULL;
have_fma = false;
use_shared_memory = false;
has_tex_txf_lz = false;
+ variables = NULL;
+}
+
+static void var_destroy(struct hash_entry *entry)
+{
+ variable_storage *storage = (variable_storage *)entry->data;
+
+ delete storage;
}
glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
{
+ _mesa_hash_table_destroy(variables, var_destroy);
free(array_sizes);
ralloc_free(mem_ctx);
}
count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
{
v->samplers_used = 0;
- v->buffers_used = 0;
v->images_used = 0;
foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
if (inst->resource.file != PROGRAM_UNDEFINED && (
is_resource_instruction(inst->op) ||
inst->op == TGSI_OPCODE_STORE)) {
- if (inst->resource.file == PROGRAM_BUFFER) {
- v->buffers_used |= 1 << inst->resource.index;
- } else if (inst->resource.file == PROGRAM_MEMORY) {
+ if (inst->resource.file == PROGRAM_MEMORY) {
v->use_shared_memory = true;
- } else {
- assert(inst->resource.file == PROGRAM_IMAGE);
+ } else if (inst->resource.file == PROGRAM_IMAGE) {
for (int i = 0; i < inst->sampler_array_size; i++) {
unsigned idx = inst->sampler_base + i;
v->images_used |= 1 << idx;
/* Replaces all references to a temporary register index with another index. */
void
-glsl_to_tgsi_visitor::rename_temp_registers(int num_renames, struct rename_reg_pair *renames)
+glsl_to_tgsi_visitor::rename_temp_registers(struct rename_reg_pair *renames)
{
foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
unsigned j;
- int k;
for (j = 0; j < num_inst_src_regs(inst); j++) {
- if (inst->src[j].file == PROGRAM_TEMPORARY)
- for (k = 0; k < num_renames; k++)
- if (inst->src[j].index == renames[k].old_reg)
- inst->src[j].index = renames[k].new_reg;
+ if (inst->src[j].file == PROGRAM_TEMPORARY) {
+ int old_idx = inst->src[j].index;
+ if (renames[old_idx].valid)
+ inst->src[j].index = renames[old_idx].new_reg;
+ }
}
for (j = 0; j < inst->tex_offset_num_offset; j++) {
- if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
- for (k = 0; k < num_renames; k++)
- if (inst->tex_offsets[j].index == renames[k].old_reg)
- inst->tex_offsets[j].index = renames[k].new_reg;
+ if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) {
+ int old_idx = inst->tex_offsets[j].index;
+ if (renames[old_idx].valid)
+ inst->tex_offsets[j].index = renames[old_idx].new_reg;
+ }
+ }
+
+ if (inst->resource.file == PROGRAM_TEMPORARY) {
+ int old_idx = inst->resource.index;
+ if (renames[old_idx].valid)
+ inst->resource.index = renames[old_idx].new_reg;
}
for (j = 0; j < num_inst_dst_regs(inst); j++) {
- if (inst->dst[j].file == PROGRAM_TEMPORARY)
- for (k = 0; k < num_renames; k++)
- if (inst->dst[j].index == renames[k].old_reg)
- inst->dst[j].index = renames[k].new_reg;
+ if (inst->dst[j].file == PROGRAM_TEMPORARY) {
+ int old_idx = inst->dst[j].index;
+ if (renames[old_idx].valid)
+ inst->dst[j].index = renames[old_idx].new_reg;}
}
}
}
}
}
}
+
+ if (inst->resource.file == PROGRAM_TEMPORARY) {
+ int src_chans;
+
+ src_chans = 1 << GET_SWZ(inst->resource.swizzle, 0);
+ src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 1);
+ src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 2);
+ src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 3);
+
+ for (int c = 0; c < 4; c++) {
+ if (src_chans & (1 << c))
+ writes[4 * inst->resource.index + c] = NULL;
+ }
+ }
+
break;
}
void
glsl_to_tgsi_visitor::merge_registers(void)
{
- int *last_reads = ralloc_array(mem_ctx, int, this->next_temp);
- int *first_writes = ralloc_array(mem_ctx, int, this->next_temp);
- struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
- int i, j;
- int num_renames = 0;
- /* Read the indices of the last read and first write to each temp register
- * into an array so that we don't have to traverse the instruction list as
- * much. */
- for (i = 0; i < this->next_temp; i++) {
- last_reads[i] = -1;
- first_writes[i] = -1;
- }
- get_last_temp_read_first_temp_write(last_reads, first_writes);
+ struct lifetime *lifetimes =
+ rzalloc_array(mem_ctx, struct lifetime, this->next_temp);
- /* Start looking for registers with non-overlapping usages that can be
- * merged together. */
- for (i = 0; i < this->next_temp; i++) {
- /* Don't touch unused registers. */
- if (last_reads[i] < 0 || first_writes[i] < 0) continue;
-
- for (j = 0; j < this->next_temp; j++) {
- /* Don't touch unused registers. */
- if (last_reads[j] < 0 || first_writes[j] < 0) continue;
-
- /* We can merge the two registers if the first write to j is after or
- * in the same instruction as the last read from i. Note that the
- * register at index i will always be used earlier or at the same time
- * as the register at index j. */
- if (first_writes[i] <= first_writes[j] &&
- last_reads[i] <= first_writes[j]) {
- renames[num_renames].old_reg = j;
- renames[num_renames].new_reg = i;
- num_renames++;
-
- /* Update the first_writes and last_reads arrays with the new
- * values for the merged register index, and mark the newly unused
- * register index as such. */
- assert(last_reads[j] >= last_reads[i]);
- last_reads[i] = last_reads[j];
- first_writes[j] = -1;
- last_reads[j] = -1;
- }
- }
+ if (get_temp_registers_required_lifetimes(mem_ctx, &this->instructions,
+ this->next_temp, lifetimes)) {
+ struct rename_reg_pair *renames =
+ rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
+ get_temp_registers_remapping(mem_ctx, this->next_temp, lifetimes, renames);
+ rename_temp_registers(renames);
+ ralloc_free(renames);
}
- rename_temp_registers(num_renames, renames);
- ralloc_free(renames);
- ralloc_free(last_reads);
- ralloc_free(first_writes);
+ ralloc_free(lifetimes);
}
/* Reassign indices to temporary registers by reusing unused indices created
int new_index = 0;
int *first_writes = ralloc_array(mem_ctx, int, this->next_temp);
struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
- int num_renames = 0;
for (i = 0; i < this->next_temp; i++) {
first_writes[i] = -1;
for (i = 0; i < this->next_temp; i++) {
if (first_writes[i] < 0) continue;
if (i != new_index) {
- renames[num_renames].old_reg = i;
- renames[num_renames].new_reg = new_index;
- num_renames++;
+ renames[i].new_reg = new_index;
+ renames[i].valid = true;
}
new_index++;
}
- rename_temp_registers(num_renames, renames);
+ rename_temp_registers(renames);
this->next_temp = new_index;
ralloc_free(renames);
ralloc_free(first_writes);
}
/**
- * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
+ * Create a TGSI ureg_dst register from an st_dst_reg.
+ */
+static struct ureg_dst
+translate_dst(struct st_translate *t,
+ const st_dst_reg *dst_reg,
+ bool saturate)
+{
+ struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index,
+ dst_reg->array_id);
+
+ if (dst.File == TGSI_FILE_NULL)
+ return dst;
+
+ dst = ureg_writemask(dst, dst_reg->writemask);
+
+ if (saturate)
+ dst = ureg_saturate(dst);
+
+ if (dst_reg->reladdr != NULL) {
+ assert(dst_reg->file != PROGRAM_TEMPORARY);
+ dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
+ }
+
+ if (dst_reg->has_index2) {
+ if (dst_reg->reladdr2)
+ dst = ureg_dst_dimension_indirect(dst, ureg_src(t->address[1]),
+ dst_reg->index2D);
+ else
+ dst = ureg_dst_dimension(dst, dst_reg->index2D);
+ }
+
+ return dst;
+}
+
+/**
+ * Create a TGSI ureg_src register from an st_src_reg.
*/
static struct ureg_src
-src_register(struct st_translate *t, const st_src_reg *reg)
+translate_src(struct st_translate *t, const st_src_reg *src_reg)
{
- int index = reg->index;
- int double_reg2 = reg->double_reg2 ? 1 : 0;
+ struct ureg_src src;
+ int index = src_reg->index;
+ int double_reg2 = src_reg->double_reg2 ? 1 : 0;
- switch(reg->file) {
+ switch(src_reg->file) {
case PROGRAM_UNDEFINED:
- return ureg_imm4f(t->ureg, 0, 0, 0, 0);
+ src = ureg_imm4f(t->ureg, 0, 0, 0, 0);
+ break;
case PROGRAM_TEMPORARY:
case PROGRAM_ARRAY:
- return ureg_src(dst_register(t, reg->file, reg->index, reg->array_id));
+ src = ureg_src(dst_register(t, src_reg->file, src_reg->index, src_reg->array_id));
+ break;
case PROGRAM_OUTPUT: {
- struct ureg_dst dst = dst_register(t, reg->file, reg->index, reg->array_id);
+ struct ureg_dst dst = dst_register(t, src_reg->file, src_reg->index, src_reg->array_id);
assert(dst.WriteMask != 0);
unsigned shift = ffs(dst.WriteMask) - 1;
- return ureg_swizzle(ureg_src(dst),
- shift,
- MIN2(shift + 1, 3),
- MIN2(shift + 2, 3),
- MIN2(shift + 3, 3));
+ src = ureg_swizzle(ureg_src(dst),
+ shift,
+ MIN2(shift + 1, 3),
+ MIN2(shift + 2, 3),
+ MIN2(shift + 3, 3));
+ break;
}
case PROGRAM_UNIFORM:
- assert(reg->index >= 0);
- return reg->index < t->num_constants ?
- t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0);
+ assert(src_reg->index >= 0);
+ src = src_reg->index < t->num_constants ?
+ t->constants[src_reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0);
+ break;
case PROGRAM_STATE_VAR:
case PROGRAM_CONSTANT: /* ie, immediate */
- if (reg->has_index2)
- return ureg_src_register(TGSI_FILE_CONSTANT, reg->index);
+ if (src_reg->has_index2)
+ src = ureg_src_register(TGSI_FILE_CONSTANT, src_reg->index);
else
- return reg->index >= 0 && reg->index < t->num_constants ?
- t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0);
+ src = src_reg->index >= 0 && src_reg->index < t->num_constants ?
+ t->constants[src_reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0);
+ break;
case PROGRAM_IMMEDIATE:
- assert(reg->index >= 0 && reg->index < t->num_immediates);
- return t->immediates[reg->index];
+ assert(src_reg->index >= 0 && src_reg->index < t->num_immediates);
+ src = t->immediates[src_reg->index];
+ break;
case PROGRAM_INPUT:
/* GLSL inputs are 64-bit containers, so we have to
* map back to the original index and add the offset after
* mapping. */
index -= double_reg2;
- if (!reg->array_id) {
+ if (!src_reg->array_id) {
assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
assert(t->inputs[t->inputMapping[index]].File != TGSI_FILE_NULL);
- return t->inputs[t->inputMapping[index] + double_reg2];
+ src = t->inputs[t->inputMapping[index] + double_reg2];
}
else {
- struct inout_decl *decl = find_inout_array(t->input_decls, t->num_input_decls, reg->array_id);
+ struct inout_decl *decl = find_inout_array(t->input_decls, t->num_input_decls,
+ src_reg->array_id);
unsigned mesa_index = decl->mesa_index;
int slot = t->inputMapping[mesa_index];
assert(slot != -1 && t->inputs[slot].File == TGSI_FILE_INPUT);
- struct ureg_src src = t->inputs[slot];
- src.ArrayID = reg->array_id;
- return ureg_src_array_offset(src, index + double_reg2 - mesa_index);
+ src = t->inputs[slot];
+ src.ArrayID = src_reg->array_id;
+ src = ureg_src_array_offset(src, index + double_reg2 - mesa_index);
}
+ break;
case PROGRAM_ADDRESS:
- return ureg_src(t->address[reg->index]);
+ src = ureg_src(t->address[src_reg->index]);
+ break;
case PROGRAM_SYSTEM_VALUE:
- assert(reg->index < (int) ARRAY_SIZE(t->systemValues));
- return t->systemValues[reg->index];
+ assert(src_reg->index < (int) ARRAY_SIZE(t->systemValues));
+ src = t->systemValues[src_reg->index];
+ break;
default:
assert(!"unknown src register file");
return ureg_src_undef();
}
-}
-
-/**
- * Create a TGSI ureg_dst register from an st_dst_reg.
- */
-static struct ureg_dst
-translate_dst(struct st_translate *t,
- const st_dst_reg *dst_reg,
- bool saturate)
-{
- struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index,
- dst_reg->array_id);
-
- if (dst.File == TGSI_FILE_NULL)
- return dst;
-
- dst = ureg_writemask(dst, dst_reg->writemask);
-
- if (saturate)
- dst = ureg_saturate(dst);
-
- if (dst_reg->reladdr != NULL) {
- assert(dst_reg->file != PROGRAM_TEMPORARY);
- dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
- }
-
- if (dst_reg->has_index2) {
- if (dst_reg->reladdr2)
- dst = ureg_dst_dimension_indirect(dst, ureg_src(t->address[1]),
- dst_reg->index2D);
- else
- dst = ureg_dst_dimension(dst, dst_reg->index2D);
- }
-
- return dst;
-}
-
-/**
- * Create a TGSI ureg_src register from an st_src_reg.
- */
-static struct ureg_src
-translate_src(struct st_translate *t, const st_src_reg *src_reg)
-{
- struct ureg_src src = src_register(t, src_reg);
if (src_reg->has_index2) {
/* 2D indexes occur with geometry shader inputs (attrib, vertex)
case TGSI_OPCODE_IF:
case TGSI_OPCODE_UIF:
assert(num_dst == 0);
- ureg_insn(ureg, inst->op, NULL, 0, src, num_src);
+ ureg_insn(ureg, inst->op, NULL, 0, src, num_src, inst->precise);
return;
case TGSI_OPCODE_TEX:
case TGSI_OPCODE_TXL2:
case TGSI_OPCODE_TG4:
case TGSI_OPCODE_LODQ:
- src[num_src] = t->samplers[inst->resource.index];
+ if (inst->resource.file == PROGRAM_SAMPLER) {
+ src[num_src] = t->samplers[inst->resource.index];
+ } else {
+ /* Bindless samplers. */
+ src[num_src] = translate_src(t, &inst->resource);
+ }
assert(src[num_src].File != TGSI_FILE_NULL);
if (inst->resource.reladdr)
src[num_src] =
inst->op,
dst, num_dst,
tex_target,
+ st_translate_texture_type(inst->tex_type),
texoffsets, inst->tex_offset_num_offset,
src, num_src);
return;
src[0] = t->shared_memory;
} else if (inst->resource.file == PROGRAM_BUFFER) {
src[0] = t->buffers[inst->resource.index];
+ } else if (inst->resource.file == PROGRAM_CONSTANT) {
+ assert(inst->resource.has_index2);
+ src[0] = ureg_src_register(TGSI_FILE_CONSTBUF, inst->resource.index);
} else {
- src[0] = t->images[inst->resource.index];
+ assert(inst->resource.file != PROGRAM_UNDEFINED);
+ if (inst->resource.file == PROGRAM_IMAGE) {
+ src[0] = t->images[inst->resource.index];
+ } else {
+ /* Bindless images. */
+ src[0] = translate_src(t, &inst->resource);
+ }
tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
}
if (inst->resource.reladdr)
} else if (inst->resource.file == PROGRAM_BUFFER) {
dst[0] = ureg_dst(t->buffers[inst->resource.index]);
} else {
- dst[0] = ureg_dst(t->images[inst->resource.index]);
+ if (inst->resource.file == PROGRAM_IMAGE) {
+ dst[0] = ureg_dst(t->images[inst->resource.index]);
+ } else {
+ /* Bindless images. */
+ dst[0] = ureg_dst(translate_src(t, &inst->resource));
+ }
tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
}
dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask);
tex_target, inst->image_format);
break;
- case TGSI_OPCODE_SCS:
- dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
- ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
- break;
-
default:
ureg_insn(ureg,
inst->op,
dst, num_dst,
- src, num_src);
+ src, num_src, inst->precise);
break;
}
}
}
if (procType == PIPE_SHADER_FRAGMENT) {
- if (program->shader->Program->info.fs.early_fragment_tests)
+ if (program->shader->Program->info.fs.early_fragment_tests ||
+ program->shader->Program->info.fs.post_depth_coverage) {
ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1);
+ if (program->shader->Program->info.fs.post_depth_coverage)
+ ureg_property(ureg, TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE, 1);
+ }
+
if (proginfo->info.inputs_read & VARYING_BIT_POS) {
/* Must do this after setting up t->inputs. */
emit_wpos(st_context(ctx), t, proginfo, ureg,
/* texture samplers */
for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
if (program->samplers_used & (1u << i)) {
- unsigned type;
+ unsigned type = st_translate_texture_type(program->sampler_types[i]);
t->samplers[i] = ureg_DECL_sampler(ureg, i);
- switch (program->sampler_types[i]) {
- case GLSL_TYPE_INT:
- type = TGSI_RETURN_TYPE_SINT;
- break;
- case GLSL_TYPE_UINT:
- type = TGSI_RETURN_TYPE_UINT;
- break;
- case GLSL_TYPE_FLOAT:
- type = TGSI_RETURN_TYPE_FLOAT;
- break;
- default:
- unreachable("not reached");
- }
-
ureg_DECL_sampler_view( ureg, i, program->sampler_targets[i],
type, type, type, type );
}
}
- for (i = 0; i < frag_const->MaxAtomicBuffers; i++) {
- if (program->buffers_used & (1 << i)) {
- t->buffers[i] = ureg_DECL_buffer(ureg, i, true);
+ /* Declare atomic and shader storage buffers. */
+ {
+ struct gl_program *prog = program->prog;
+
+ for (i = 0; i < prog->info.num_abos; i++) {
+ unsigned index = prog->sh.AtomicBuffers[i]->Binding;
+ assert(index < frag_const->MaxAtomicBuffers);
+ t->buffers[index] = ureg_DECL_buffer(ureg, index, true);
}
- }
- for (; i < frag_const->MaxAtomicBuffers + frag_const->MaxShaderStorageBlocks;
- i++) {
- if (program->buffers_used & (1 << i)) {
- t->buffers[i] = ureg_DECL_buffer(ureg, i, false);
+ assert(prog->info.num_ssbos <= frag_const->MaxShaderStorageBlocks);
+ for (i = 0; i < prog->info.num_ssbos; i++) {
+ unsigned index = frag_const->MaxAtomicBuffers + i;
+ t->buffers[index] = ureg_DECL_buffer(ureg, index, false);
}
}
for (i = program->shader->Stage+1; i <= MESA_SHADER_FRAGMENT; i++) {
if (program->shader_program->_LinkedShaders[i]) {
- unsigned next;
-
- switch (i) {
- case MESA_SHADER_TESS_CTRL:
- next = PIPE_SHADER_TESS_CTRL;
- break;
- case MESA_SHADER_TESS_EVAL:
- next = PIPE_SHADER_TESS_EVAL;
- break;
- case MESA_SHADER_GEOMETRY:
- next = PIPE_SHADER_GEOMETRY;
- break;
- case MESA_SHADER_FRAGMENT:
- next = PIPE_SHADER_FRAGMENT;
- break;
- default:
- assert(0);
- continue;
- }
-
- ureg_set_next_shader_processor(ureg, next);
+ ureg_set_next_shader_processor(
+ ureg, pipe_shader_type_from_mesa((gl_shader_stage)i));
break;
}
}
struct gl_shader_compiler_options *options =
&ctx->Const.ShaderCompilerOptions[shader->Stage];
struct pipe_screen *pscreen = ctx->st->pipe->screen;
- enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(shader->Stage);
+ enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(shader->Stage);
unsigned skip_merge_registers;
validate_ir_tree(shader->ir);
PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED);
v->has_tex_txf_lz = pscreen->get_param(pscreen,
PIPE_CAP_TGSI_TEX_TXF_LZ);
+
+ v->variables = _mesa_hash_table_create(v->mem_ctx, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
skip_merge_registers =
pscreen->get_shader_param(pscreen, ptarget,
PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS);
- _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
+ _mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader,
prog->Parameters);
/* Remove reads from output registers. */
* prog->ParameterValues to get reallocated (e.g., anything that adds a
* program constant) has to happen before creating this linkage.
*/
- _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters,
- true);
+ _mesa_associate_uniform_storage(ctx, shader_program, prog, true);
if (!shader_program->data->LinkStatus) {
free_glsl_to_tgsi_visitor(v);
_mesa_reference_program(ctx, &shader->Program, NULL);
struct st_vertex_program *stvp;
struct st_fragment_program *stfp;
- struct st_common_program *stgp;
- struct st_common_program *sttcp;
- struct st_common_program *sttep;
+ struct st_common_program *stp;
struct st_compute_program *stcp;
switch (shader->Stage) {
stfp = (struct st_fragment_program *)prog;
stfp->glsl_to_tgsi = v;
break;
- case MESA_SHADER_GEOMETRY:
- stgp = (struct st_common_program *)prog;
- stgp->glsl_to_tgsi = v;
- break;
case MESA_SHADER_TESS_CTRL:
- sttcp = (struct st_common_program *)prog;
- sttcp->glsl_to_tgsi = v;
- break;
case MESA_SHADER_TESS_EVAL:
- sttep = (struct st_common_program *)prog;
- sttep->glsl_to_tgsi = v;
+ case MESA_SHADER_GEOMETRY:
+ stp = st_common_program(prog);
+ stp->glsl_to_tgsi = v;
break;
case MESA_SHADER_COMPUTE:
stcp = (struct st_compute_program *)prog;
gl_shader_stage stage = shader->Stage;
const struct gl_shader_compiler_options *options =
&ctx->Const.ShaderCompilerOptions[stage];
- enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(stage);
+ enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(stage);
bool have_dround = pscreen->get_shader_param(pscreen, ptarget,
PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED);
bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget,
continue;
enum pipe_shader_type ptarget =
- st_shader_stage_to_ptarget(shader->Stage);
+ pipe_shader_type_from_mesa(shader->Stage);
enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
pscreen->get_shader_param(pscreen, ptarget,
PIPE_SHADER_CAP_PREFERRED_IR);