X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fstate_tracker%2Fst_glsl_to_tgsi.cpp;h=de7fe7837a40551dca893cb00886880c7e6f0516;hb=cacc823c39044307e6befe12c3f51317f09973e2;hp=81ea233a8ecfbd02bb98d14bc67df3925bed10cc;hpb=ecd6fce2611e88ff8468a354cff8eda39f260a31;p=mesa.git diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 81ea233a8ec..de7fe7837a4 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -54,7 +54,9 @@ #include "st_format.h" #include "st_glsl_types.h" #include "st_nir.h" +#include "st_shader_cache.h" +#include #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \ (1 << PROGRAM_CONSTANT) | \ @@ -67,38 +69,57 @@ class st_dst_reg; static int swizzle_for_size(int size); +static int swizzle_for_type(const glsl_type *type, int component = 0) +{ + unsigned num_elements = 4; + + if (type) { + type = type->without_array(); + if (type->is_scalar() || type->is_vector() || type->is_matrix()) + num_elements = type->vector_elements; + } + + int swizzle = swizzle_for_size(num_elements); + assert(num_elements + component <= 4); + + swizzle += component * MAKE_SWIZZLE4(1, 1, 1, 1); + return swizzle; +} + /** * This struct is a corresponding struct to TGSI ureg_src. */ class st_src_reg { public: - st_src_reg(gl_register_file file, int index, const glsl_type *type) + st_src_reg(gl_register_file file, int index, const glsl_type *type, + int component = 0, unsigned array_id = 0) { + assert(file != PROGRAM_ARRAY || array_id != 0); this->file = file; this->index = index; - if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) - this->swizzle = swizzle_for_size(type->vector_elements); - else - this->swizzle = SWIZZLE_XYZW; + this->swizzle = swizzle_for_type(type, component); this->negate = 0; + this->abs = 0; this->index2D = 0; this->type = type ? type->base_type : GLSL_TYPE_ERROR; this->reladdr = NULL; this->reladdr2 = NULL; this->has_index2 = false; this->double_reg2 = false; - this->array_id = 0; + this->array_id = array_id; this->is_double_vertex_input = false; } st_src_reg(gl_register_file file, int index, enum glsl_base_type type) { + assert(file != PROGRAM_ARRAY); /* need array_id > 0 */ this->type = type; this->file = file; this->index = index; this->index2D = 0; this->swizzle = SWIZZLE_XYZW; this->negate = 0; + this->abs = 0; this->reladdr = NULL; this->reladdr2 = NULL; this->has_index2 = false; @@ -109,12 +130,14 @@ public: st_src_reg(gl_register_file file, int index, enum glsl_base_type type, int index2D) { + assert(file != PROGRAM_ARRAY); /* need array_id > 0 */ this->type = type; this->file = file; this->index = index; this->index2D = index2D; this->swizzle = SWIZZLE_XYZW; this->negate = 0; + this->abs = 0; this->reladdr = NULL; this->reladdr2 = NULL; this->has_index2 = false; @@ -131,6 +154,7 @@ public: this->index2D = 0; this->swizzle = 0; this->negate = 0; + this->abs = 0; this->reladdr = NULL; this->reladdr2 = NULL; this->has_index2 = false; @@ -141,29 +165,40 @@ public: explicit st_src_reg(st_dst_reg reg); - gl_register_file file; /**< PROGRAM_* from Mesa */ - int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ - int index2D; - GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ - int negate; /**< NEGATE_XYZW mask from mesa */ - enum glsl_base_type type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ - /** Register index should be offset by the integer in this reg. */ - st_src_reg *reladdr; - st_src_reg *reladdr2; - bool has_index2; + int16_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ + int16_t index2D; + uint16_t swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ + int negate:4; /**< NEGATE_XYZW mask from mesa */ + unsigned abs:1; + enum glsl_base_type type:5; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ + unsigned has_index2:1; + gl_register_file file:5; /**< PROGRAM_* from Mesa */ /* * Is this the second half of a double register pair? * currently used for input mapping only. */ - bool double_reg2; - unsigned array_id; - bool is_double_vertex_input; + unsigned double_reg2:1; + unsigned is_double_vertex_input:1; + unsigned array_id:10; + + /** Register index should be offset by the integer in this reg. */ + st_src_reg *reladdr; + st_src_reg *reladdr2; + + st_src_reg get_abs() + { + st_src_reg reg = *this; + reg.negate = 0; + reg.abs = 1; + return reg; + } }; class st_dst_reg { public: st_dst_reg(gl_register_file file, int writemask, enum glsl_base_type type, int index) { + assert(file != PROGRAM_ARRAY); /* need array_id > 0 */ this->file = file; this->index = index; this->index2D = 0; @@ -177,6 +212,7 @@ public: st_dst_reg(gl_register_file file, int writemask, enum glsl_base_type type) { + assert(file != PROGRAM_ARRAY); /* need array_id > 0 */ this->file = file; this->index = 0; this->index2D = 0; @@ -203,16 +239,17 @@ public: explicit st_dst_reg(st_src_reg reg); - gl_register_file file; /**< PROGRAM_* from Mesa */ - int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ - int index2D; - int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ - enum glsl_base_type type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ + int16_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ + int16_t index2D; + gl_register_file file:5; /**< PROGRAM_* from Mesa */ + unsigned writemask:4; /**< Bitfield of WRITEMASK_[XYZW] */ + enum glsl_base_type type:5; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ + unsigned has_index2:1; + unsigned array_id:10; + /** Register index should be offset by the integer in this reg. */ st_src_reg *reladdr; st_src_reg *reladdr2; - bool has_index2; - unsigned array_id; }; st_src_reg::st_src_reg(st_dst_reg reg) @@ -222,6 +259,7 @@ st_src_reg::st_src_reg(st_dst_reg reg) this->index = reg.index; this->swizzle = SWIZZLE_XYZW; this->negate = 0; + this->abs = 0; this->reladdr = reg.reladdr; this->index2D = reg.index2D; this->reladdr2 = reg.reladdr2; @@ -248,29 +286,27 @@ class glsl_to_tgsi_instruction : public exec_node { public: DECLARE_RALLOC_CXX_OPERATORS(glsl_to_tgsi_instruction) - unsigned op; st_dst_reg dst[2]; st_src_reg src[4]; + st_src_reg resource; /**< sampler or buffer register */ + st_src_reg *tex_offsets; + /** Pointer to the ir source this tree came from for debugging */ ir_instruction *ir; - GLboolean cond_update; - bool saturate; - st_src_reg sampler; /**< sampler register */ - int sampler_base; - int sampler_array_size; /**< 1-based size of sampler array, 1 if not array */ - int tex_target; /**< One of TEXTURE_*_INDEX */ - glsl_base_type tex_type; - GLboolean tex_shadow; - unsigned image_format; - - st_src_reg tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; - unsigned tex_offset_num_offset; - int dead_mask; /**< Used in dead code elimination */ - - st_src_reg buffer; /**< buffer register */ - unsigned buffer_access; /**< buffer access type */ - - class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ + + unsigned op:8; /**< TGSI opcode */ + unsigned saturate:1; + unsigned is_64bit_expanded:1; + unsigned sampler_base:5; + unsigned sampler_array_size:6; /**< 1-based size of sampler array, 1 if not array */ + unsigned tex_target:4; /**< One of TEXTURE_*_INDEX */ + glsl_base_type tex_type:5; + unsigned tex_shadow:1; + unsigned image_format:9; + unsigned tex_offset_num_offset:3; + unsigned dead_mask:4; /**< Used in dead code elimination */ + unsigned buffer_access:3; /**< buffer access type */ + const struct tgsi_opcode_info *info; }; @@ -278,13 +314,19 @@ class variable_storage : public exec_node { public: variable_storage(ir_variable *var, gl_register_file file, int index, unsigned array_id = 0) - : file(file), index(index), var(var), array_id(array_id) + : file(file), index(index), component(0), var(var), array_id(array_id) { - /* empty */ + assert(file != PROGRAM_ARRAY || array_id != 0); } gl_register_file file; int index; + + /* Explicit component location. This is given in terms of the GLSL-style + * swizzles where each double is a single component, i.e. for 64-bit types + * it can only be 0 or 1. + */ + int component; ir_variable *var; /* variable that maps to this, if any */ unsigned array_id; }; @@ -304,60 +346,44 @@ public: int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ }; -class function_entry : public exec_node { -public: - ir_function_signature *sig; - - /** - * identifier of this function signature used by the program. - * - * At the point that TGSI instructions for function calls are - * generated, we don't know the address of the first instruction of - * the function body. So we make the BranchTarget that is called a - * small integer and rewrite them during set_branchtargets(). - */ - int sig_id; - - /** - * Pointer to first instruction of the function body. - * - * Set during function body emits after main() is processed. - */ - glsl_to_tgsi_instruction *bgn_inst; - - /** - * Index of the first instruction of the function body in actual TGSI. - * - * Set after conversion from glsl_to_tgsi_instruction to TGSI. - */ - int inst; - - /** Storage for the return value. */ - st_src_reg return_reg; -}; - static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); -struct array_decl { +struct inout_decl { unsigned mesa_index; - unsigned array_id; - unsigned array_size; - enum glsl_base_type array_type; + unsigned array_id; /* TGSI ArrayID; 1-based: 0 means not an array */ + unsigned size; + unsigned interp_loc; + unsigned gs_out_streams; + enum glsl_interp_mode interp; + enum glsl_base_type base_type; + ubyte usage_mask; /* GLSL-style usage-mask, i.e. single bit per double */ }; -static enum glsl_base_type -find_array_type(struct array_decl *arrays, unsigned count, unsigned array_id) +static struct inout_decl * +find_inout_array(struct inout_decl *decls, unsigned count, unsigned array_id) { - unsigned i; + assert(array_id != 0); - for (i = 0; i < count; i++) { - struct array_decl *decl = &arrays[i]; + for (unsigned i = 0; i < count; i++) { + struct inout_decl *decl = &decls[i]; if (array_id == decl->array_id) { - return decl->array_type; + return decl; } } + + return NULL; +} + +static enum glsl_base_type +find_array_type(struct inout_decl *decls, unsigned count, unsigned array_id) +{ + if (!array_id) + return GLSL_TYPE_ERROR; + struct inout_decl *decl = find_inout_array(decls, count, array_id); + if (decl) + return decl->base_type; return GLSL_TYPE_ERROR; } @@ -371,8 +397,6 @@ public: glsl_to_tgsi_visitor(); ~glsl_to_tgsi_visitor(); - function_entry *current_function; - struct gl_context *ctx; struct gl_program *prog; struct gl_shader_program *shader_program; @@ -385,9 +409,11 @@ public: unsigned max_num_arrays; unsigned next_array; - struct array_decl input_arrays[PIPE_MAX_SHADER_INPUTS]; + struct inout_decl inputs[4 * PIPE_MAX_SHADER_INPUTS]; + unsigned num_inputs; unsigned num_input_arrays; - struct array_decl output_arrays[PIPE_MAX_SHADER_OUTPUTS]; + struct inout_decl outputs[4 * PIPE_MAX_SHADER_OUTPUTS]; + unsigned num_outputs; unsigned num_output_arrays; int num_address_regs; @@ -406,13 +432,12 @@ public: bool have_sqrt; bool have_fma; bool use_shared_memory; + bool has_tex_txf_lz; variable_storage *find_variable_storage(ir_variable *var); int add_constant(gl_register_file file, gl_constant_value values[8], - int size, int datatype, GLuint *swizzle_out); - - function_entry *get_function_signature(ir_function_signature *sig); + int size, int datatype, uint16_t *swizzle_out); st_src_reg get_temp(const glsl_type *type); void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); @@ -469,10 +494,6 @@ public: exec_list immediates; unsigned num_immediates; - /** List of function_entry */ - exec_list function_signatures; - int next_signature_id; - /** List of glsl_to_tgsi_instruction */ exec_list instructions; @@ -490,7 +511,7 @@ public: st_src_reg src2 = undef_src, st_src_reg src3 = undef_src); - unsigned get_opcode(ir_instruction *ir, unsigned op, + unsigned get_opcode(unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1); @@ -514,15 +535,15 @@ public: void get_deref_offsets(ir_dereference *ir, unsigned *array_size, unsigned *base, - unsigned *index, - st_src_reg *reladdr); - void calc_deref_offsets(ir_dereference *head, - ir_dereference *tail, + uint16_t *index, + st_src_reg *reladdr, + bool opaque); + void calc_deref_offsets(ir_dereference *tail, unsigned *array_elements, - unsigned *base, - unsigned *index, + uint16_t *index, st_src_reg *indirect, unsigned *location); + st_src_reg canonicalize_gather_offset(st_src_reg offset); bool try_emit_mad(ir_expression *ir, int mul_operand); @@ -566,10 +587,10 @@ fail_link(struct gl_shader_program *prog, const char *fmt, ...) { va_list args; va_start(args, fmt); - ralloc_vasprintf_append(&prog->InfoLog, fmt, args); + ralloc_vasprintf_append(&prog->data->InfoLog, fmt, args); va_end(args); - prog->LinkStatus = GL_FALSE; + prog->data->LinkStatus = linking_failure; } static int @@ -631,7 +652,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, int num_reladdr = 0, i, j; bool dst_is_64bit[2]; - op = get_opcode(ir, op, dst, src0, src1); + op = get_opcode(op, dst, src0, src1); /* If we have to do relative addressing, we want to load the ARL * reg directly for one of the regs, and preload the other reladdr @@ -662,6 +683,9 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, } assert(num_reladdr == 0); + /* inst->op has only 8 bits. */ + STATIC_ASSERT(TGSI_OPCODE_LAST <= 255); + inst->op = op; inst->info = tgsi_get_opcode_info(op); inst->dst[0] = dst; @@ -670,15 +694,18 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, inst->src[1] = src1; inst->src[2] = src2; inst->src[3] = src3; + inst->is_64bit_expanded = false; inst->ir = ir; inst->dead_mask = 0; + inst->tex_offsets = NULL; + inst->tex_offset_num_offset = 0; + inst->saturate = 0; + inst->tex_shadow = 0; /* default to float, for paths where this is not initialized * (since 0==UINT which is likely wrong): */ inst->tex_type = GLSL_TYPE_FLOAT; - inst->function = NULL; - /* Update indirect addressing status used by TGSI */ if (dst.reladdr || dst.reladdr2) { switch(dst.file) { @@ -734,7 +761,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, for (j = 0; j < 2; j++) { dst_is_64bit[j] = glsl_base_type_is_64bit(inst->dst[j].type); if (!dst_is_64bit[j] && inst->dst[j].file == PROGRAM_OUTPUT && inst->dst[j].type == GLSL_TYPE_ARRAY) { - enum glsl_base_type type = find_array_type(this->output_arrays, this->num_output_arrays, inst->dst[j].array_id); + enum glsl_base_type type = find_array_type(this->outputs, this->num_outputs, inst->dst[j].array_id); if (glsl_base_type_is_64bit(type)) dst_is_64bit[j] = true; } @@ -768,9 +795,9 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, int i = u_bit_scan(&writemask); - /* before emitting the instruction, see if we have to adjust store + /* before emitting the instruction, see if we have to adjust load / store * address */ - if (i > 1 && inst->op == TGSI_OPCODE_STORE && + if (i > 1 && (inst->op == TGSI_OPCODE_LOAD || inst->op == TGSI_OPCODE_STORE) && addr.file == PROGRAM_UNDEFINED) { /* We have to advance the buffer address by 16 */ addr = get_temp(glsl_type::uint_type); @@ -778,7 +805,6 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, inst->src[0], st_src_reg_for_int(16)); } - /* first time use previous instruction */ if (dinst == NULL) { dinst = inst; @@ -790,6 +816,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, dinst->prev = NULL; } this->instructions.push_tail(dinst); + dinst->is_64bit_expanded = true; /* modify the destination if we are splitting */ for (j = 0; j < 2; j++) { @@ -797,11 +824,10 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY; dinst->dst[j].index = initial_dst_idx[j]; if (i > 1) { - if (dinst->op == TGSI_OPCODE_STORE) { + if (dinst->op == TGSI_OPCODE_LOAD || dinst->op == TGSI_OPCODE_STORE) dinst->src[0] = addr; - } else { + if (dinst->op != TGSI_OPCODE_STORE) dinst->dst[j].index++; - } } } else { /* if we aren't writing to a double, just get the bit of the initial writemask @@ -828,8 +854,9 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, } else { /* some opcodes are special case in what they use as sources - - F2D is a float src0, DLDEXP is integer src1 */ - if (op == TGSI_OPCODE_F2D || + - [FUI]2D/[UI]2I64 is a float/[u]int src0, DLDEXP is integer src1 */ + if (op == TGSI_OPCODE_F2D || op == TGSI_OPCODE_U2D || op == TGSI_OPCODE_I2D || + op == TGSI_OPCODE_I2I64 || op == TGSI_OPCODE_U2I64 || op == TGSI_OPCODE_DLDEXP || (op == TGSI_OPCODE_UCMP && dst_is_64bit[0])) { dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz); @@ -860,7 +887,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, * based on the operands and input opcode, then emits the result. */ unsigned -glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, +glsl_to_tgsi_visitor::get_opcode(unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1) { @@ -876,6 +903,10 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, if (is_resource_instruction(op)) type = src1.type; + else if (src0.type == GLSL_TYPE_INT64 || src1.type == GLSL_TYPE_INT64) + type = GLSL_TYPE_INT64; + else if (src0.type == GLSL_TYPE_UINT64 || src1.type == GLSL_TYPE_UINT64) + type = GLSL_TYPE_UINT64; else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE) type = GLSL_TYPE_DOUBLE; else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) @@ -883,6 +914,21 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, else if (native_integers) type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type; +#define case7(c, f, i, u, d, i64, ui64) \ + case TGSI_OPCODE_##c: \ + if (type == GLSL_TYPE_UINT64) \ + op = TGSI_OPCODE_##ui64; \ + else if (type == GLSL_TYPE_INT64) \ + op = TGSI_OPCODE_##i64; \ + else if (type == GLSL_TYPE_DOUBLE) \ + op = TGSI_OPCODE_##d; \ + else if (type == GLSL_TYPE_INT) \ + op = TGSI_OPCODE_##i; \ + else if (type == GLSL_TYPE_UINT) \ + op = TGSI_OPCODE_##u; \ + else \ + op = TGSI_OPCODE_##f; \ + break; #define case5(c, f, i, u, d) \ case TGSI_OPCODE_##c: \ if (type == GLSL_TYPE_DOUBLE) \ @@ -906,14 +952,22 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, break; #define case3(f, i, u) case4(f, f, i, u) -#define case4d(f, i, u, d) case5(f, f, i, u, d) +#define case6d(f, i, u, d, i64, u64) case7(f, f, i, u, d, i64, u64) #define case3fid(f, i, d) case5(f, f, i, i, d) +#define case3fid64(f, i, d, i64) case7(f, f, i, i, d, i64, i64) #define case2fi(f, i) case4(f, f, i, i) #define case2iu(i, u) case4(i, LAST, i, u) -#define casecomp(c, f, i, u, d) \ +#define case2iu64(i, i64) case7(i, LAST, i, i, LAST, i64, i64) +#define case4iu64(i, u, i64, u64) case7(i, LAST, i, u, LAST, i64, u64) + +#define casecomp(c, f, i, u, d, i64, ui64) \ case TGSI_OPCODE_##c: \ - if (type == GLSL_TYPE_DOUBLE) \ + if (type == GLSL_TYPE_INT64) \ + op = TGSI_OPCODE_##i64; \ + else if (type == GLSL_TYPE_UINT64) \ + op = TGSI_OPCODE_##ui64; \ + else if (type == GLSL_TYPE_DOUBLE) \ op = TGSI_OPCODE_##d; \ else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE) \ op = TGSI_OPCODE_##i; \ @@ -926,24 +980,24 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, break; switch(op) { - case3fid(ADD, UADD, DADD); - case3fid(MUL, UMUL, DMUL); + case3fid64(ADD, UADD, DADD, U64ADD); + case3fid64(MUL, UMUL, DMUL, U64MUL); case3fid(MAD, UMAD, DMAD); case3fid(FMA, UMAD, DFMA); - case3(DIV, IDIV, UDIV); - case4d(MAX, IMAX, UMAX, DMAX); - case4d(MIN, IMIN, UMIN, DMIN); - case2iu(MOD, UMOD); + case6d(DIV, IDIV, UDIV, DDIV, I64DIV, U64DIV); + case6d(MAX, IMAX, UMAX, DMAX, I64MAX, U64MAX); + case6d(MIN, IMIN, UMIN, DMIN, I64MIN, U64MIN); + case4iu64(MOD, UMOD, I64MOD, U64MOD); - casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ); - casecomp(SNE, FSNE, USNE, USNE, DSNE); - casecomp(SGE, FSGE, ISGE, USGE, DSGE); - casecomp(SLT, FSLT, ISLT, USLT, DSLT); + casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ, U64SEQ, U64SEQ); + casecomp(SNE, FSNE, USNE, USNE, DSNE, U64SNE, U64SNE); + casecomp(SGE, FSGE, ISGE, USGE, DSGE, I64SGE, U64SGE); + casecomp(SLT, FSLT, ISLT, USLT, DSLT, I64SLT, U64SLT); - case2iu(ISHR, USHR); + case2iu64(SHL, U64SHL); + case4iu64(ISHR, USHR, I64SHR, U64SHR); - case3fid(SSG, ISSG, DSSG); - case3fid(ABS, IABS, DABS); + case3fid64(SSG, ISSG, DSSG, I64SSG); case2iu(IBFE, UBFE); case2iu(IMSB, UMSB); @@ -1064,18 +1118,24 @@ glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, int glsl_to_tgsi_visitor::add_constant(gl_register_file file, gl_constant_value values[8], int size, int datatype, - GLuint *swizzle_out) + uint16_t *swizzle_out) { if (file == PROGRAM_CONSTANT) { - return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, - size, datatype, swizzle_out); + GLuint swizzle = swizzle_out ? *swizzle_out : 0; + int result = _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, + size, datatype, &swizzle); + if (swizzle_out) + *swizzle_out = swizzle; + return result; } assert(file == PROGRAM_IMMEDIATE); int index = 0; immediate_storage *entry; - int size32 = size * (datatype == GL_DOUBLE ? 2 : 1); + int size32 = size * ((datatype == GL_DOUBLE || + datatype == GL_INT64_ARB || + datatype == GL_UNSIGNED_INT64_ARB)? 2 : 1); int i; /* Search immediate storage to see if we already have an identical @@ -1213,6 +1273,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; src.reladdr = NULL; src.negate = 0; + src.abs = 0; if (!options->EmitNoIndirectTemp && type_has_array_or_matrix(type)) { if (next_array >= max_num_arrays) { @@ -1222,7 +1283,8 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) } src.file = PROGRAM_ARRAY; - src.index = next_array << 16 | 0x8000; + src.index = 0; + src.array_id = next_array + 1; array_sizes[next_array] = type_size(type); ++next_array; @@ -1257,10 +1319,8 @@ void glsl_to_tgsi_visitor::visit(ir_variable *ir) { if (strcmp(ir->name, "gl_FragCoord") == 0) { - struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; - - fp->OriginUpperLeft = ir->data.origin_upper_left; - fp->PixelCenterInteger = ir->data.pixel_center_integer; + this->prog->OriginUpperLeft = ir->data.origin_upper_left; + this->prog->PixelCenterInteger = ir->data.pixel_center_integer; } if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { @@ -1297,7 +1357,8 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) dst = st_dst_reg(get_temp(ir->type)); - storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index); + storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index, + dst.array_id); this->variables.push_tail(storage); } @@ -1565,7 +1626,9 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) } break; case ir_unop_neg: - if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT) + if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64) + emit_asm(ir, TGSI_OPCODE_I64NEG, result_dst, op[0]); + else if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT) emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]); else if (result_dst.type == GLSL_TYPE_DOUBLE) emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]); @@ -1578,7 +1641,14 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); break; case ir_unop_abs: - emit_asm(ir, TGSI_OPCODE_ABS, result_dst, op[0]); + if (result_dst.type == GLSL_TYPE_FLOAT) + emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0].get_abs()); + else if (result_dst.type == GLSL_TYPE_DOUBLE) + emit_asm(ir, TGSI_OPCODE_DABS, result_dst, op[0]); + else if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64) + emit_asm(ir, TGSI_OPCODE_I64ABS, result_dst, op[0]); + else + emit_asm(ir, TGSI_OPCODE_IABS, result_dst, op[0]); break; case ir_unop_sign: emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]); @@ -1591,8 +1661,10 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); break; case ir_unop_exp: + assert(!"not reached: should be handled by exp_to_exp2"); + break; case ir_unop_log: - assert(!"not reached: should be handled by ir_explog_to_explog2"); + assert(!"not reached: should be handled by log_to_log2"); break; case ir_unop_log2: emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); @@ -1668,17 +1740,15 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); break; case ir_binop_sub: - emit_asm(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); + op[1].negate = ~op[1].negate; + emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); break; case ir_binop_mul: emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); break; case ir_binop_div: - if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE) - assert(!"not reached: should be handled by ir_div_to_mul_rcp"); - else - emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); break; case ir_binop_mod: if (result_dst.type == GLSL_TYPE_FLOAT) @@ -1904,8 +1974,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) * we want, I choose to use ABS to match DX9 and pre-GLSL RSQ * behavior. */ - emit_scalar(ir, TGSI_OPCODE_ABS, result_dst, op[0]); - emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, result_src); + emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0].get_abs()); emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, result_src); } break; @@ -1926,6 +1995,8 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) /* fallthrough to next case otherwise */ case ir_unop_i2u: case ir_unop_u2i: + case ir_unop_i642u64: + case ir_unop_u642i64: /* Converting between signed and unsigned integers is a no-op. */ result_src = op[0]; result_src.type = result_dst.type; @@ -1959,7 +2030,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) case ir_unop_bitcast_f2i: case ir_unop_bitcast_f2u: /* Make sure we don't propagate the negate modifier to integer opcodes. */ - if (op[0].negate) + if (op[0].negate || op[0].abs) emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); else result_src = op[0]; @@ -1983,6 +2054,19 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) else emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); break; + case ir_unop_bitcast_u642d: + case ir_unop_bitcast_i642d: + result_src = op[0]; + result_src.type = GLSL_TYPE_DOUBLE; + break; + case ir_unop_bitcast_d2i64: + result_src = op[0]; + result_src.type = GLSL_TYPE_INT64; + break; + case ir_unop_bitcast_d2u64: + result_src = op[0]; + result_src.type = GLSL_TYPE_UINT64; + break; case ir_unop_trunc: emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; @@ -2020,13 +2104,23 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) break; } case ir_binop_lshift: - if (native_integers) { - emit_asm(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]); - break; - } case ir_binop_rshift: if (native_integers) { - emit_asm(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]); + unsigned opcode = ir->operation == ir_binop_lshift ? TGSI_OPCODE_SHL + : TGSI_OPCODE_ISHR; + st_src_reg count; + + if (glsl_base_type_is_64bit(op[0].type)) { + /* GLSL shift operations have 32-bit shift counts, but TGSI uses + * 64 bits. + */ + count = get_temp(glsl_type::u64vec(ir->operands[1]->type->components())); + emit_asm(ir, TGSI_OPCODE_U2I64, st_dst_reg(count), op[1]); + } else { + count = op[1]; + } + + emit_asm(ir, opcode, result_dst, op[0], count); break; } case ir_binop_bit_and: @@ -2061,6 +2155,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) cbuf.index = 0; cbuf.reladdr = NULL; cbuf.negate = 0; + cbuf.abs = 0; assert(ir->type->is_vector() || ir->type->is_scalar()); @@ -2070,8 +2165,35 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) cbuf.index = const_offset / 16; } else { + ir_expression *offset_expr = ir->operands[1]->as_expression(); + st_src_reg offset = op[1]; + + /* The OpenGL spec is written in such a way that accesses with + * non-constant offset are almost always vec4-aligned. The only + * exception to this are members of structs in arrays of structs: + * each struct in an array of structs is at least vec4-aligned, + * but single-element and [ui]vec2 members of the struct may be at + * an offset that is not a multiple of 16 bytes. + * + * Here, we extract that offset, relying on previous passes to always + * generate offset expressions of the form (+ expr constant_offset). + * + * Note that the std430 layout, which allows more cases of alignment + * less than vec4 in arrays, is not supported for uniform blocks, so + * we do not have to deal with it here. + */ + if (offset_expr && offset_expr->operation == ir_binop_add) { + const_offset_ir = offset_expr->operands[1]->as_constant(); + if (const_offset_ir) { + const_offset = const_offset_ir->value.u[0]; + cbuf.index = const_offset / 16; + offset_expr->operands[0]->accept(this); + offset = this->result; + } + } + /* Relative/variable index into constant buffer */ - emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1], + emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), offset, st_src_reg_for_int(4)); cbuf.reladdr = ralloc(mem_ctx, st_src_reg); memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg)); @@ -2103,7 +2225,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) const_offset % 16 / 4, const_offset % 16 / 4); - if (ir->type->base_type == GLSL_TYPE_BOOL) { + if (ir->type->is_boolean()) { emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0)); } else { emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf); @@ -2200,11 +2322,15 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) break; case ir_unop_unpack_double_2x32: case ir_unop_pack_double_2x32: + case ir_unop_unpack_int_2x32: + case ir_unop_pack_int_2x32: + case ir_unop_unpack_uint_2x32: + case ir_unop_pack_uint_2x32: emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); break; case ir_binop_ldexp: - if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) { + if (ir->operands[0]->type->is_double()) { emit_asm(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]); } else { assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()"); @@ -2230,7 +2356,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) *buffer.reladdr = op[0]; emit_arl(ir, sampler_reladdr, op[0]); } - emit_asm(ir, TGSI_OPCODE_RESQ, result_dst)->buffer = buffer; + emit_asm(ir, TGSI_OPCODE_RESQ, result_dst)->resource = buffer; break; } @@ -2243,7 +2369,129 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) case ir_unop_vote_eq: emit_asm(ir, TGSI_OPCODE_VOTE_EQ, result_dst, op[0]); break; - + case ir_unop_ballot: + emit_asm(ir, TGSI_OPCODE_BALLOT, result_dst, op[0]); + break; + case ir_unop_read_first_invocation: + emit_asm(ir, TGSI_OPCODE_READ_FIRST, result_dst, op[0]); + break; + case ir_binop_read_invocation: + emit_asm(ir, TGSI_OPCODE_READ_INVOC, result_dst, op[0], op[1]); + break; + case ir_unop_u2i64: + case ir_unop_u2u64: + case ir_unop_b2i64: { + st_src_reg temp = get_temp(glsl_type::uvec4_type); + st_dst_reg temp_dst = st_dst_reg(temp); + unsigned orig_swz = op[0].swizzle; + /* + * To convert unsigned to 64-bit: + * zero Y channel, copy X channel. + */ + temp_dst.writemask = WRITEMASK_Y; + if (vector_elements > 1) + temp_dst.writemask |= WRITEMASK_W; + emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0)); + temp_dst.writemask = WRITEMASK_X; + if (vector_elements > 1) + temp_dst.writemask |= WRITEMASK_Z; + op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 0), GET_SWZ(orig_swz, 0), + GET_SWZ(orig_swz, 1), GET_SWZ(orig_swz, 1)); + if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64) + emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]); + else + emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1)); + result_src = temp; + result_src.type = GLSL_TYPE_UINT64; + if (vector_elements > 2) { + /* Subtle: We rely on the fact that get_temp here returns the next + * TGSI temporary register directly after the temp register used for + * the first two components, so that the result gets picked up + * automatically. + */ + st_src_reg temp = get_temp(glsl_type::uvec4_type); + st_dst_reg temp_dst = st_dst_reg(temp); + temp_dst.writemask = WRITEMASK_Y; + if (vector_elements > 3) + temp_dst.writemask |= WRITEMASK_W; + emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0)); + + temp_dst.writemask = WRITEMASK_X; + if (vector_elements > 3) + temp_dst.writemask |= WRITEMASK_Z; + op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 2), GET_SWZ(orig_swz, 2), + GET_SWZ(orig_swz, 3), GET_SWZ(orig_swz, 3)); + if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64) + emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]); + else + emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1)); + } + break; + } + case ir_unop_i642i: + case ir_unop_u642i: + case ir_unop_u642u: + case ir_unop_i642u: { + st_src_reg temp = get_temp(glsl_type::uvec4_type); + st_dst_reg temp_dst = st_dst_reg(temp); + unsigned orig_swz = op[0].swizzle; + unsigned orig_idx = op[0].index; + int el; + temp_dst.writemask = WRITEMASK_X; + + for (el = 0; el < vector_elements; el++) { + unsigned swz = GET_SWZ(orig_swz, el); + if (swz & 1) + op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z); + else + op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); + if (swz > 2) + op[0].index = orig_idx + 1; + op[0].type = GLSL_TYPE_UINT; + temp_dst.writemask = WRITEMASK_X << el; + emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]); + } + result_src = temp; + if (ir->operation == ir_unop_u642u || ir->operation == ir_unop_i642u) + result_src.type = GLSL_TYPE_UINT; + else + result_src.type = GLSL_TYPE_INT; + break; + } + case ir_unop_i642b: + emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], st_src_reg_for_int(0)); + break; + case ir_unop_i642f: + emit_asm(ir, TGSI_OPCODE_I642F, result_dst, op[0]); + break; + case ir_unop_u642f: + emit_asm(ir, TGSI_OPCODE_U642F, result_dst, op[0]); + break; + case ir_unop_i642d: + emit_asm(ir, TGSI_OPCODE_I642D, result_dst, op[0]); + break; + case ir_unop_u642d: + emit_asm(ir, TGSI_OPCODE_U642D, result_dst, op[0]); + break; + case ir_unop_i2i64: + emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]); + break; + case ir_unop_f2i64: + emit_asm(ir, TGSI_OPCODE_F2I64, result_dst, op[0]); + break; + case ir_unop_d2i64: + emit_asm(ir, TGSI_OPCODE_D2I64, result_dst, op[0]); + break; + case ir_unop_i2u64: + emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]); + break; + case ir_unop_f2u64: + emit_asm(ir, TGSI_OPCODE_F2U64, result_dst, op[0]); + break; + case ir_unop_d2u64: + emit_asm(ir, TGSI_OPCODE_D2U64, result_dst, op[0]); + break; + /* these might be needed */ case ir_unop_pack_snorm_2x16: case ir_unop_pack_unorm_2x16: case ir_unop_pack_snorm_4x8: @@ -2321,16 +2569,16 @@ glsl_to_tgsi_visitor::visit(ir_swizzle *ir) * for patch inputs), so only the array element type is considered. */ static bool -is_inout_array(unsigned stage, ir_variable *var, bool *is_2d) +is_inout_array(unsigned stage, ir_variable *var, bool *remove_array) { const glsl_type *type = var->type; + *remove_array = false; + if ((stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) || (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out)) return false; - *is_2d = false; - if (((stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) || (stage == MESA_SHADER_TESS_EVAL && var->data.mode == ir_var_shader_in) || stage == MESA_SHADER_TESS_CTRL) && @@ -2339,18 +2587,29 @@ is_inout_array(unsigned stage, ir_variable *var, bool *is_2d) return false; /* a system value probably */ type = var->type->fields.array; - *is_2d = true; + *remove_array = true; } return type->is_array() || type->is_matrix(); } +static unsigned +st_translate_interp_loc(ir_variable *var) +{ + if (var->data.centroid) + return TGSI_INTERPOLATE_LOC_CENTROID; + else if (var->data.sample) + return TGSI_INTERPOLATE_LOC_SAMPLE; + else + return TGSI_INTERPOLATE_LOC_CENTER; +} + void glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) { variable_storage *entry = find_variable_storage(ir->var); ir_variable *var = ir->var; - bool is_2d; + bool remove_array; if (!entry) { switch (var->data.mode) { @@ -2359,7 +2618,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) var->data.param_index); this->variables.push_tail(entry); break; - case ir_var_shader_in: + case ir_var_shader_in: { /* The linker assigns locations for varyings and attributes, * including deprecated builtins (like gl_Color), user-assign * generic attributes (glBindVertexLocation), and @@ -2367,63 +2626,104 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) */ assert(var->data.location != -1); - if (is_inout_array(shader->Stage, var, &is_2d)) { - struct array_decl *decl = &input_arrays[num_input_arrays]; + const glsl_type *type_without_array = var->type->without_array(); + struct inout_decl *decl = &inputs[num_inputs]; + unsigned component = var->data.location_frac; + unsigned num_components; + num_inputs++; + + if (type_without_array->is_64bit()) + component = component / 2; + if (type_without_array->vector_elements) + num_components = type_without_array->vector_elements; + else + num_components = 4; + + decl->mesa_index = var->data.location; + decl->interp = (glsl_interp_mode) var->data.interpolation; + decl->interp_loc = st_translate_interp_loc(var); + decl->base_type = type_without_array->base_type; + decl->usage_mask = u_bit_consecutive(component, num_components); - decl->mesa_index = var->data.location; + if (is_inout_array(shader->Stage, var, &remove_array)) { decl->array_id = num_input_arrays + 1; - if (is_2d) { - decl->array_size = type_size(var->type->fields.array); - decl->array_type = var->type->fields.array->without_array()->base_type; - } else { - decl->array_size = type_size(var->type); - decl->array_type = var->type->without_array()->base_type; - } num_input_arrays++; - - entry = new(mem_ctx) variable_storage(var, - PROGRAM_INPUT, - var->data.location, - decl->array_id); - } - else { - entry = new(mem_ctx) variable_storage(var, - PROGRAM_INPUT, - var->data.location); + } else { + decl->array_id = 0; } + + if (remove_array) + decl->size = type_size(var->type->fields.array); + else + decl->size = type_size(var->type); + + entry = new(mem_ctx) variable_storage(var, + PROGRAM_INPUT, + decl->mesa_index, + decl->array_id); + entry->component = component; + this->variables.push_tail(entry); break; - case ir_var_shader_out: + } + case ir_var_shader_out: { assert(var->data.location != -1); - if (is_inout_array(shader->Stage, var, &is_2d)) { - struct array_decl *decl = &output_arrays[num_output_arrays]; + const glsl_type *type_without_array = var->type->without_array(); + struct inout_decl *decl = &outputs[num_outputs]; + unsigned component = var->data.location_frac; + unsigned num_components; + num_outputs++; - decl->mesa_index = var->data.location; + if (type_without_array->is_64bit()) + component = component / 2; + if (type_without_array->vector_elements) + num_components = type_without_array->vector_elements; + else + num_components = 4; + + decl->mesa_index = var->data.location + FRAG_RESULT_MAX * var->data.index; + decl->base_type = type_without_array->base_type; + decl->usage_mask = u_bit_consecutive(component, num_components); + if (var->data.stream & (1u << 31)) { + decl->gs_out_streams = var->data.stream & ~(1u << 31); + } else { + assert(var->data.stream < 4); + decl->gs_out_streams = 0; + for (unsigned i = 0; i < num_components; ++i) + decl->gs_out_streams |= var->data.stream << (2 * (component + i)); + } + + if (is_inout_array(shader->Stage, var, &remove_array)) { decl->array_id = num_output_arrays + 1; - if (is_2d) { - decl->array_size = type_size(var->type->fields.array); - decl->array_type = var->type->fields.array->without_array()->base_type; - } else { - decl->array_size = type_size(var->type); - decl->array_type = var->type->without_array()->base_type; - } num_output_arrays++; + } else { + decl->array_id = 0; + } + if (remove_array) + decl->size = type_size(var->type->fields.array); + else + decl->size = type_size(var->type); + + if (var->data.fb_fetch_output) { + st_dst_reg dst = st_dst_reg(get_temp(var->type)); + st_src_reg src = st_src_reg(PROGRAM_OUTPUT, decl->mesa_index, + var->type, component, decl->array_id); + emit_asm(NULL, TGSI_OPCODE_FBFETCH, dst, src); + entry = new(mem_ctx) variable_storage(var, dst.file, dst.index, + dst.array_id); + } else { entry = new(mem_ctx) variable_storage(var, PROGRAM_OUTPUT, - var->data.location, + decl->mesa_index, decl->array_id); } - else { - entry = new(mem_ctx) variable_storage(var, - PROGRAM_OUTPUT, - var->data.location - + FRAG_RESULT_MAX * - var->data.index); - } + entry->component = component; + this->variables.push_tail(entry); break; + } case ir_var_system_value: entry = new(mem_ctx) variable_storage(var, PROGRAM_SYSTEM_VALUE, @@ -2433,7 +2733,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) case ir_var_temporary: st_src_reg src = get_temp(var->type); - entry = new(mem_ctx) variable_storage(var, src.file, src.index); + entry = new(mem_ctx) variable_storage(var, src.file, src.index, + src.array_id); this->variables.push_tail(entry); break; @@ -2445,8 +2746,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) } } - this->result = st_src_reg(entry->file, entry->index, var->type); - this->result.array_id = entry->array_id; + this->result = st_src_reg(entry->file, entry->index, var->type, + entry->component, entry->array_id); if (this->shader->Stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in && var->type->is_double()) this->result.is_double_vertex_input = true; if (!native_integers) @@ -2454,10 +2755,10 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) } static void -shrink_array_declarations(struct array_decl *arrays, unsigned count, - GLbitfield64 usage_mask, +shrink_array_declarations(struct inout_decl *decls, unsigned count, + GLbitfield64* usage_mask, GLbitfield64 double_usage_mask, - GLbitfield patch_usage_mask) + GLbitfield* patch_usage_mask) { unsigned i; int j; @@ -2466,42 +2767,60 @@ shrink_array_declarations(struct array_decl *arrays, unsigned count, * of the arrays. For example, mat4[3] where only mat[1] is used. */ for (i = 0; i < count; i++) { - struct array_decl *decl = &arrays[i]; + struct inout_decl *decl = &decls[i]; + if (!decl->array_id) + continue; /* Shrink the beginning. */ - for (j = 0; j < (int)decl->array_size; j++) { + for (j = 0; j < (int)decl->size; j++) { if (decl->mesa_index >= VARYING_SLOT_PATCH0) { - if (patch_usage_mask & + if (*patch_usage_mask & BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j)) break; } else { - if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) + if (*usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) break; if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1)) break; } decl->mesa_index++; - decl->array_size--; + decl->size--; j--; } /* Shrink the end. */ - for (j = decl->array_size-1; j >= 0; j--) { + for (j = decl->size-1; j >= 0; j--) { if (decl->mesa_index >= VARYING_SLOT_PATCH0) { - if (patch_usage_mask & + if (*patch_usage_mask & BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j)) break; } else { - if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) + if (*usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) break; if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1)) break; } - decl->array_size--; + decl->size--; + } + + /* When not all entries of an array are accessed, we mark them as used + * here anyway, to ensure that the input/output mapping logic doesn't get + * confused. + * + * TODO This happens when an array isn't used via indirect access, which + * some game ports do (at least eON-based). There is an optimization + * opportunity here by replacing the array declaration with non-array + * declarations of those slots that are actually used. + */ + for (j = 1; j < (int)decl->size; ++j) { + if (decl->mesa_index >= VARYING_SLOT_PATCH0) + *patch_usage_mask |= BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j); + else + *usage_mask |= BITFIELD64_BIT(decl->mesa_index + j); } } } @@ -2591,12 +2910,6 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) } } - /* If the type is smaller than a vec4, replicate the last channel out. */ - if (ir->type->is_scalar() || ir->type->is_vector()) - src.swizzle = swizzle_for_size(ir->type->vector_elements); - else - src.swizzle = SWIZZLE_NOOP; - /* Change the register type to the element type of the array. */ src.type = ir->type->base_type; @@ -2634,7 +2947,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) * ir_dereference handler. */ static st_dst_reg -get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) +get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v, int *component) { /* The LHS must be a dereference. If the LHS is a variable indexed array * access of a vector, it must be separated into a series conditional moves @@ -2646,10 +2959,12 @@ get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) assert(!deref_array->array->type->is_vector()); } - /* Use the rvalue deref handler for the most part. We'll ignore - * swizzles in it and write swizzles using writemask, though. + /* Use the rvalue deref handler for the most part. We write swizzles using + * the writemask, but we do extract the base component for enhanced layouts + * from the source swizzle. */ ir->accept(v); + *component = GET_SWZ(v->result.swizzle, 0); return st_dst_reg(v->result); } @@ -2776,7 +3091,7 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type * st_dst_reg *l, st_src_reg *r, st_src_reg *cond, bool cond_swap) { - if (type->base_type == GLSL_TYPE_STRUCT) { + if (type->is_record()) { for (unsigned int i = 0; i < type->length; i++) { emit_block_mov(ir, type->fields.structure[i].type, l, r, cond, cond_swap); @@ -2805,6 +3120,7 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type * assert(type->is_scalar() || type->is_vector()); + l->type = type->base_type; r->type = type->base_type; if (cond) { st_src_reg l_src = st_src_reg(*l); @@ -2834,53 +3150,48 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type * void glsl_to_tgsi_visitor::visit(ir_assignment *ir) { + int dst_component; st_dst_reg l; st_src_reg r; ir->rhs->accept(this); r = this->result; - l = get_assignment_lhs(ir->lhs, this); + l = get_assignment_lhs(ir->lhs, this, &dst_component); - /* FINISHME: This should really set to the correct maximal writemask for each - * FINISHME: component written (in the loops below). This case can only - * FINISHME: occur for matrices, arrays, and structures. - */ - if (ir->write_mask == 0) { - assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); - - if (ir->lhs->type->is_array() || ir->lhs->type->without_array()->is_matrix()) { - if (ir->lhs->type->without_array()->is_64bit()) { - switch (ir->lhs->type->without_array()->vector_elements) { - case 1: - l.writemask = WRITEMASK_X; - break; - case 2: - l.writemask = WRITEMASK_XY; - break; - case 3: - l.writemask = WRITEMASK_XYZ; - break; - case 4: - l.writemask = WRITEMASK_XYZW; - break; - } - } else - l.writemask = WRITEMASK_XYZW; - } - } else if (ir->lhs->type->is_scalar() && - !ir->lhs->type->is_64bit() && - ir->lhs->variable_referenced()->data.mode == ir_var_shader_out) { - /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the - * FINISHME: W component of fragment shader output zero, work correctly. - */ - l.writemask = WRITEMASK_XYZW; - } else { + { int swizzles[4]; int first_enabled_chan = 0; int rhs_chan = 0; + ir_variable *variable = ir->lhs->variable_referenced(); + + if (shader->Stage == MESA_SHADER_FRAGMENT && + variable->data.mode == ir_var_shader_out && + (variable->data.location == FRAG_RESULT_DEPTH || + variable->data.location == FRAG_RESULT_STENCIL)) { + assert(ir->lhs->type->is_scalar()); + assert(ir->write_mask == WRITEMASK_X); + + if (variable->data.location == FRAG_RESULT_DEPTH) + l.writemask = WRITEMASK_Z; + else { + assert(variable->data.location == FRAG_RESULT_STENCIL); + l.writemask = WRITEMASK_Y; + } + } else if (ir->write_mask == 0) { + assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); - l.writemask = ir->write_mask; + unsigned num_elements = ir->lhs->type->without_array()->vector_elements; + + if (num_elements) { + l.writemask = u_bit_consecutive(0, num_elements); + } else { + /* The type is a struct or an array of (array of) structs. */ + l.writemask = WRITEMASK_XYZW; + } + } else { + l.writemask = ir->write_mask; + } for (int i = 0; i < 4; i++) { if (l.writemask & (1 << i)) { @@ -2889,6 +3200,8 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) } } + l.writemask = l.writemask << dst_component; + /* Swizzle a small RHS vector into the channels being written. * * glsl ir treats write_mask as dictating how many channels are @@ -2916,6 +3229,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) } else if (ir->rhs->as_expression() && this->instructions.get_tail() && ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && + !((glsl_to_tgsi_instruction *)this->instructions.get_tail())->is_64bit_expanded && type_size(ir->lhs->type) == 1 && l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst[0].writemask) { /* To avoid emitting an extra MOV when assigning an expression to a @@ -2950,7 +3264,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) * aggregate constant and move each constant value into it. If we * get lucky, copy propagation will eliminate the extra moves. */ - if (ir->type->base_type == GLSL_TYPE_STRUCT) { + if (ir->type->is_record()) { st_src_reg temp_base = get_temp(ir->type); st_dst_reg temp = st_dst_reg(temp_base); @@ -3069,6 +3383,18 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) memcpy(&values[i * 2], &ir->value.d[i], sizeof(double)); } break; + case GLSL_TYPE_INT64: + gl_type = GL_INT64_ARB; + for (i = 0; i < ir->type->vector_elements; i++) { + memcpy(&values[i * 2], &ir->value.d[i], sizeof(int64_t)); + } + break; + case GLSL_TYPE_UINT64: + gl_type = GL_UNSIGNED_INT64_ARB; + for (i = 0; i < ir->type->vector_elements; i++) { + memcpy(&values[i * 2], &ir->value.d[i], sizeof(uint64_t)); + } + break; case GLSL_TYPE_UINT: gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { @@ -3105,46 +3431,9 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) &this->result.swizzle); } -function_entry * -glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) -{ - foreach_in_list_use_after(function_entry, entry, &this->function_signatures) { - if (entry->sig == sig) - return entry; - } - - entry = ralloc(mem_ctx, function_entry); - entry->sig = sig; - entry->sig_id = this->next_signature_id++; - entry->bgn_inst = NULL; - - /* Allocate storage for all the parameters. */ - foreach_in_list(ir_variable, param, &sig->parameters) { - variable_storage *storage; - - storage = find_variable_storage(param); - assert(!storage); - - st_src_reg src = get_temp(param->type); - - storage = new(mem_ctx) variable_storage(param, src.file, src.index); - this->variables.push_tail(storage); - } - - if (!sig->return_type->is_void()) { - entry->return_reg = get_temp(sig->return_type); - } else { - entry->return_reg = undef_src; - } - - this->function_signatures.push_tail(entry); - return entry; -} - void glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) { - const char *callee = ir->callee->function_name(); exec_node *param = ir->actual_parameters.get_head(); ir_dereference *deref = static_cast(param); ir_variable *location = deref->variable_referenced(); @@ -3154,9 +3443,10 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) /* Calculate the surface offset */ st_src_reg offset; - unsigned array_size = 0, base = 0, index = 0; + unsigned array_size = 0, base = 0; + uint16_t index = 0; - get_deref_offsets(deref, &array_size, &base, &index, &offset); + get_deref_offsets(deref, &array_size, &base, &index, &offset, false); if (offset.file != PROGRAM_UNDEFINED) { emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset), @@ -3173,12 +3463,12 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) glsl_to_tgsi_instruction *inst; - if (!strcmp("__intrinsic_atomic_read", callee)) { + if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_read) { inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset); - } else if (!strcmp("__intrinsic_atomic_increment", callee)) { + } else if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_increment) { inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset, st_src_reg_for_int(1)); - } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) { + } else if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_predecrement) { inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset, st_src_reg_for_int(-1)); emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1)); @@ -3189,34 +3479,37 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) st_src_reg data = this->result, data2 = undef_src; unsigned opcode; - if (!strcmp("__intrinsic_atomic_add", callee)) + switch (ir->callee->intrinsic_id) { + case ir_intrinsic_atomic_counter_add: opcode = TGSI_OPCODE_ATOMUADD; - else if (!strcmp("__intrinsic_atomic_min", callee)) + break; + case ir_intrinsic_atomic_counter_min: opcode = TGSI_OPCODE_ATOMIMIN; - else if (!strcmp("__intrinsic_atomic_max", callee)) + break; + case ir_intrinsic_atomic_counter_max: opcode = TGSI_OPCODE_ATOMIMAX; - else if (!strcmp("__intrinsic_atomic_and", callee)) + break; + case ir_intrinsic_atomic_counter_and: opcode = TGSI_OPCODE_ATOMAND; - else if (!strcmp("__intrinsic_atomic_or", callee)) + break; + case ir_intrinsic_atomic_counter_or: opcode = TGSI_OPCODE_ATOMOR; - else if (!strcmp("__intrinsic_atomic_xor", callee)) + break; + case ir_intrinsic_atomic_counter_xor: opcode = TGSI_OPCODE_ATOMXOR; - else if (!strcmp("__intrinsic_atomic_exchange", callee)) + break; + case ir_intrinsic_atomic_counter_exchange: opcode = TGSI_OPCODE_ATOMXCHG; - else if (!strcmp("__intrinsic_atomic_comp_swap", callee)) { + break; + case ir_intrinsic_atomic_counter_comp_swap: { opcode = TGSI_OPCODE_ATOMCAS; param = param->get_next(); val = ((ir_instruction *)param)->as_rvalue(); val->accept(this); data2 = this->result; - } else if (!strcmp("__intrinsic_atomic_sub", callee)) { - opcode = TGSI_OPCODE_ATOMUADD; - st_src_reg res = get_temp(glsl_type::uvec4_type); - st_dst_reg dstres = st_dst_reg(res); - dstres.writemask = dst.writemask; - emit_asm(ir, TGSI_OPCODE_INEG, dstres, data); - data = res; - } else { + break; + } + default: assert(!"Unexpected intrinsic"); return; } @@ -3224,13 +3517,12 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) inst = emit_asm(ir, opcode, dst, offset, data, data2); } - inst->buffer = buffer; + inst->resource = buffer; } void glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir) { - const char *callee = ir->callee->function_name(); exec_node *param = ir->actual_parameters.get_head(); ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); @@ -3266,11 +3558,11 @@ glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir) glsl_to_tgsi_instruction *inst; - if (!strcmp("__intrinsic_load_ssbo", callee)) { + if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_load) { inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off); if (dst.type == GLSL_TYPE_BOOL) emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), st_src_reg_for_int(0)); - } else if (!strcmp("__intrinsic_store_ssbo", callee)) { + } else if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_store) { param = param->get_next(); ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); val->accept(this); @@ -3289,27 +3581,36 @@ glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir) st_src_reg data = this->result, data2 = undef_src; unsigned opcode; - if (!strcmp("__intrinsic_atomic_add_ssbo", callee)) + switch (ir->callee->intrinsic_id) { + case ir_intrinsic_ssbo_atomic_add: opcode = TGSI_OPCODE_ATOMUADD; - else if (!strcmp("__intrinsic_atomic_min_ssbo", callee)) + break; + case ir_intrinsic_ssbo_atomic_min: opcode = TGSI_OPCODE_ATOMIMIN; - else if (!strcmp("__intrinsic_atomic_max_ssbo", callee)) + break; + case ir_intrinsic_ssbo_atomic_max: opcode = TGSI_OPCODE_ATOMIMAX; - else if (!strcmp("__intrinsic_atomic_and_ssbo", callee)) + break; + case ir_intrinsic_ssbo_atomic_and: opcode = TGSI_OPCODE_ATOMAND; - else if (!strcmp("__intrinsic_atomic_or_ssbo", callee)) + break; + case ir_intrinsic_ssbo_atomic_or: opcode = TGSI_OPCODE_ATOMOR; - else if (!strcmp("__intrinsic_atomic_xor_ssbo", callee)) + break; + case ir_intrinsic_ssbo_atomic_xor: opcode = TGSI_OPCODE_ATOMXOR; - else if (!strcmp("__intrinsic_atomic_exchange_ssbo", callee)) + break; + case ir_intrinsic_ssbo_atomic_exchange: opcode = TGSI_OPCODE_ATOMXCHG; - else if (!strcmp("__intrinsic_atomic_comp_swap_ssbo", callee)) { + break; + case ir_intrinsic_ssbo_atomic_comp_swap: opcode = TGSI_OPCODE_ATOMCAS; param = param->get_next(); val = ((ir_instruction *)param)->as_rvalue(); val->accept(this); data2 = this->result; - } else { + break; + default: assert(!"Unexpected intrinsic"); return; } @@ -3329,53 +3630,65 @@ glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir) */ unsigned op = inst->op; do { - inst->buffer = buffer; + inst->resource = buffer; if (access) inst->buffer_access = access->value.u[0]; + + if (inst == this->instructions.get_head_raw()) + break; inst = (glsl_to_tgsi_instruction *)inst->get_prev(); - if (inst->op == TGSI_OPCODE_UADD) + + if (inst->op == TGSI_OPCODE_UADD) { + if (inst == this->instructions.get_head_raw()) + break; inst = (glsl_to_tgsi_instruction *)inst->get_prev(); - } while (inst && inst->op == op && inst->buffer.file == PROGRAM_UNDEFINED); + } + } while (inst->op == op && inst->resource.file == PROGRAM_UNDEFINED); } void glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir) { - const char *callee = ir->callee->function_name(); - - if (!strcmp("__intrinsic_memory_barrier", callee)) + switch (ir->callee->intrinsic_id) { + case ir_intrinsic_memory_barrier: emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER | TGSI_MEMBAR_SHADER_IMAGE | TGSI_MEMBAR_SHARED)); - else if (!strcmp("__intrinsic_memory_barrier_atomic_counter", callee)) + break; + case ir_intrinsic_memory_barrier_atomic_counter: emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, st_src_reg_for_int(TGSI_MEMBAR_ATOMIC_BUFFER)); - else if (!strcmp("__intrinsic_memory_barrier_buffer", callee)) + break; + case ir_intrinsic_memory_barrier_buffer: emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER)); - else if (!strcmp("__intrinsic_memory_barrier_image", callee)) + break; + case ir_intrinsic_memory_barrier_image: emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, st_src_reg_for_int(TGSI_MEMBAR_SHADER_IMAGE)); - else if (!strcmp("__intrinsic_memory_barrier_shared", callee)) + break; + case ir_intrinsic_memory_barrier_shared: emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, st_src_reg_for_int(TGSI_MEMBAR_SHARED)); - else if (!strcmp("__intrinsic_group_memory_barrier", callee)) + break; + case ir_intrinsic_group_memory_barrier: emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER | TGSI_MEMBAR_SHADER_IMAGE | TGSI_MEMBAR_SHARED | TGSI_MEMBAR_THREAD_GROUP)); - else + break; + default: assert(!"Unexpected memory barrier intrinsic"); + } } void glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir) { - const char *callee = ir->callee->function_name(); exec_node *param = ir->actual_parameters.get_head(); ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); @@ -3395,10 +3708,10 @@ glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir) glsl_to_tgsi_instruction *inst; - if (!strcmp("__intrinsic_load_shared", callee)) { + if (ir->callee->intrinsic_id == ir_intrinsic_shared_load) { inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off); - inst->buffer = buffer; - } else if (!strcmp("__intrinsic_store_shared", callee)) { + inst->resource = buffer; + } else if (ir->callee->intrinsic_id == ir_intrinsic_shared_store) { param = param->get_next(); ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); val->accept(this); @@ -3410,7 +3723,7 @@ glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir) dst.type = this->result.type; inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result); - inst->buffer = buffer; + inst->resource = buffer; } else { param = param->get_next(); ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); @@ -3418,40 +3731,48 @@ glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir) st_src_reg data = this->result, data2 = undef_src; unsigned opcode; - if (!strcmp("__intrinsic_atomic_add_shared", callee)) + switch (ir->callee->intrinsic_id) { + case ir_intrinsic_shared_atomic_add: opcode = TGSI_OPCODE_ATOMUADD; - else if (!strcmp("__intrinsic_atomic_min_shared", callee)) + break; + case ir_intrinsic_shared_atomic_min: opcode = TGSI_OPCODE_ATOMIMIN; - else if (!strcmp("__intrinsic_atomic_max_shared", callee)) + break; + case ir_intrinsic_shared_atomic_max: opcode = TGSI_OPCODE_ATOMIMAX; - else if (!strcmp("__intrinsic_atomic_and_shared", callee)) + break; + case ir_intrinsic_shared_atomic_and: opcode = TGSI_OPCODE_ATOMAND; - else if (!strcmp("__intrinsic_atomic_or_shared", callee)) + break; + case ir_intrinsic_shared_atomic_or: opcode = TGSI_OPCODE_ATOMOR; - else if (!strcmp("__intrinsic_atomic_xor_shared", callee)) + break; + case ir_intrinsic_shared_atomic_xor: opcode = TGSI_OPCODE_ATOMXOR; - else if (!strcmp("__intrinsic_atomic_exchange_shared", callee)) + break; + case ir_intrinsic_shared_atomic_exchange: opcode = TGSI_OPCODE_ATOMXCHG; - else if (!strcmp("__intrinsic_atomic_comp_swap_shared", callee)) { + break; + case ir_intrinsic_shared_atomic_comp_swap: opcode = TGSI_OPCODE_ATOMCAS; param = param->get_next(); val = ((ir_instruction *)param)->as_rvalue(); val->accept(this); data2 = this->result; - } else { + break; + default: assert(!"Unexpected intrinsic"); return; } inst = emit_asm(ir, opcode, dst, off, data, data2); - inst->buffer = buffer; + inst->resource = buffer; } } void glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) { - const char *callee = ir->callee->function_name(); exec_node *param = ir->actual_parameters.get_head(); ir_dereference *img = (ir_dereference *)param; @@ -3463,7 +3784,8 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT); get_deref_offsets(img, &sampler_array_size, &sampler_base, - (unsigned int *)&image.index, &reladdr); + (uint16_t*)&image.index, &reladdr, true); + if (reladdr.file != PROGRAM_UNDEFINED) { image.reladdr = ralloc(mem_ctx, st_src_reg); *image.reladdr = reladdr; @@ -3479,10 +3801,10 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) glsl_to_tgsi_instruction *inst; - if (!strcmp("__intrinsic_image_size", callee)) { + if (ir->callee->intrinsic_id == ir_intrinsic_image_size) { dst.writemask = WRITEMASK_XYZ; inst = emit_asm(ir, TGSI_OPCODE_RESQ, dst); - } else if (!strcmp("__intrinsic_image_samples", callee)) { + } else if (ir->callee->intrinsic_id == ir_intrinsic_image_samples) { st_src_reg res = get_temp(glsl_type::ivec4_type); st_dst_reg dstres = st_dst_reg(res); dstres.writemask = WRITEMASK_W; @@ -3534,27 +3856,38 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) assert(param->is_tail_sentinel()); unsigned opcode; - if (!strcmp("__intrinsic_image_load", callee)) + switch (ir->callee->intrinsic_id) { + case ir_intrinsic_image_load: opcode = TGSI_OPCODE_LOAD; - else if (!strcmp("__intrinsic_image_store", callee)) + break; + case ir_intrinsic_image_store: opcode = TGSI_OPCODE_STORE; - else if (!strcmp("__intrinsic_image_atomic_add", callee)) + break; + case ir_intrinsic_image_atomic_add: opcode = TGSI_OPCODE_ATOMUADD; - else if (!strcmp("__intrinsic_image_atomic_min", callee)) + break; + case ir_intrinsic_image_atomic_min: opcode = TGSI_OPCODE_ATOMIMIN; - else if (!strcmp("__intrinsic_image_atomic_max", callee)) + break; + case ir_intrinsic_image_atomic_max: opcode = TGSI_OPCODE_ATOMIMAX; - else if (!strcmp("__intrinsic_image_atomic_and", callee)) + break; + case ir_intrinsic_image_atomic_and: opcode = TGSI_OPCODE_ATOMAND; - else if (!strcmp("__intrinsic_image_atomic_or", callee)) + break; + case ir_intrinsic_image_atomic_or: opcode = TGSI_OPCODE_ATOMOR; - else if (!strcmp("__intrinsic_image_atomic_xor", callee)) + break; + case ir_intrinsic_image_atomic_xor: opcode = TGSI_OPCODE_ATOMXOR; - else if (!strcmp("__intrinsic_image_atomic_exchange", callee)) + break; + case ir_intrinsic_image_atomic_exchange: opcode = TGSI_OPCODE_ATOMXCHG; - else if (!strcmp("__intrinsic_image_atomic_comp_swap", callee)) + break; + case ir_intrinsic_image_atomic_comp_swap: opcode = TGSI_OPCODE_ATOMCAS; - else { + break; + default: assert(!"Unexpected intrinsic"); return; } @@ -3564,43 +3897,11 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) inst->dst[0].writemask = WRITEMASK_XYZW; } - inst->buffer = image; + inst->resource = image; inst->sampler_array_size = sampler_array_size; inst->sampler_base = sampler_base; - switch (type->sampler_dimensionality) { - case GLSL_SAMPLER_DIM_1D: - inst->tex_target = (type->sampler_array) - ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; - break; - case GLSL_SAMPLER_DIM_2D: - inst->tex_target = (type->sampler_array) - ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; - break; - case GLSL_SAMPLER_DIM_3D: - inst->tex_target = TEXTURE_3D_INDEX; - break; - case GLSL_SAMPLER_DIM_CUBE: - inst->tex_target = (type->sampler_array) - ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX; - break; - case GLSL_SAMPLER_DIM_RECT: - inst->tex_target = TEXTURE_RECT_INDEX; - break; - case GLSL_SAMPLER_DIM_BUF: - inst->tex_target = TEXTURE_BUFFER_INDEX; - break; - case GLSL_SAMPLER_DIM_EXTERNAL: - inst->tex_target = TEXTURE_EXTERNAL_INDEX; - break; - case GLSL_SAMPLER_DIM_MS: - inst->tex_target = (type->sampler_array) - ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX; - break; - default: - assert(!"Should not get here."); - } - + inst->tex_target = type->sampler_index(); inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx), _mesa_get_shader_image_format(imgvar->data.image_format)); @@ -3615,155 +3916,103 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) void glsl_to_tgsi_visitor::visit(ir_call *ir) { - glsl_to_tgsi_instruction *call_inst; ir_function_signature *sig = ir->callee; - const char *callee = sig->function_name(); - function_entry *entry; - int i; /* Filter out intrinsics */ - if (!strcmp("__intrinsic_atomic_read", callee) || - !strcmp("__intrinsic_atomic_increment", callee) || - !strcmp("__intrinsic_atomic_predecrement", callee) || - !strcmp("__intrinsic_atomic_add", callee) || - !strcmp("__intrinsic_atomic_sub", callee) || - !strcmp("__intrinsic_atomic_min", callee) || - !strcmp("__intrinsic_atomic_max", callee) || - !strcmp("__intrinsic_atomic_and", callee) || - !strcmp("__intrinsic_atomic_or", callee) || - !strcmp("__intrinsic_atomic_xor", callee) || - !strcmp("__intrinsic_atomic_exchange", callee) || - !strcmp("__intrinsic_atomic_comp_swap", callee)) { + switch (sig->intrinsic_id) { + case ir_intrinsic_atomic_counter_read: + case ir_intrinsic_atomic_counter_increment: + case ir_intrinsic_atomic_counter_predecrement: + case ir_intrinsic_atomic_counter_add: + case ir_intrinsic_atomic_counter_min: + case ir_intrinsic_atomic_counter_max: + case ir_intrinsic_atomic_counter_and: + case ir_intrinsic_atomic_counter_or: + case ir_intrinsic_atomic_counter_xor: + case ir_intrinsic_atomic_counter_exchange: + case ir_intrinsic_atomic_counter_comp_swap: visit_atomic_counter_intrinsic(ir); return; - } - if (!strcmp("__intrinsic_load_ssbo", callee) || - !strcmp("__intrinsic_store_ssbo", callee) || - !strcmp("__intrinsic_atomic_add_ssbo", callee) || - !strcmp("__intrinsic_atomic_min_ssbo", callee) || - !strcmp("__intrinsic_atomic_max_ssbo", callee) || - !strcmp("__intrinsic_atomic_and_ssbo", callee) || - !strcmp("__intrinsic_atomic_or_ssbo", callee) || - !strcmp("__intrinsic_atomic_xor_ssbo", callee) || - !strcmp("__intrinsic_atomic_exchange_ssbo", callee) || - !strcmp("__intrinsic_atomic_comp_swap_ssbo", callee)) { + case ir_intrinsic_ssbo_load: + case ir_intrinsic_ssbo_store: + case ir_intrinsic_ssbo_atomic_add: + case ir_intrinsic_ssbo_atomic_min: + case ir_intrinsic_ssbo_atomic_max: + case ir_intrinsic_ssbo_atomic_and: + case ir_intrinsic_ssbo_atomic_or: + case ir_intrinsic_ssbo_atomic_xor: + case ir_intrinsic_ssbo_atomic_exchange: + case ir_intrinsic_ssbo_atomic_comp_swap: visit_ssbo_intrinsic(ir); return; - } - if (!strcmp("__intrinsic_memory_barrier", callee) || - !strcmp("__intrinsic_memory_barrier_atomic_counter", callee) || - !strcmp("__intrinsic_memory_barrier_buffer", callee) || - !strcmp("__intrinsic_memory_barrier_image", callee) || - !strcmp("__intrinsic_memory_barrier_shared", callee) || - !strcmp("__intrinsic_group_memory_barrier", callee)) { + case ir_intrinsic_memory_barrier: + case ir_intrinsic_memory_barrier_atomic_counter: + case ir_intrinsic_memory_barrier_buffer: + case ir_intrinsic_memory_barrier_image: + case ir_intrinsic_memory_barrier_shared: + case ir_intrinsic_group_memory_barrier: visit_membar_intrinsic(ir); return; - } - if (!strcmp("__intrinsic_load_shared", callee) || - !strcmp("__intrinsic_store_shared", callee) || - !strcmp("__intrinsic_atomic_add_shared", callee) || - !strcmp("__intrinsic_atomic_min_shared", callee) || - !strcmp("__intrinsic_atomic_max_shared", callee) || - !strcmp("__intrinsic_atomic_and_shared", callee) || - !strcmp("__intrinsic_atomic_or_shared", callee) || - !strcmp("__intrinsic_atomic_xor_shared", callee) || - !strcmp("__intrinsic_atomic_exchange_shared", callee) || - !strcmp("__intrinsic_atomic_comp_swap_shared", callee)) { + case ir_intrinsic_shared_load: + case ir_intrinsic_shared_store: + case ir_intrinsic_shared_atomic_add: + case ir_intrinsic_shared_atomic_min: + case ir_intrinsic_shared_atomic_max: + case ir_intrinsic_shared_atomic_and: + case ir_intrinsic_shared_atomic_or: + case ir_intrinsic_shared_atomic_xor: + case ir_intrinsic_shared_atomic_exchange: + case ir_intrinsic_shared_atomic_comp_swap: visit_shared_intrinsic(ir); return; - } - if (!strcmp("__intrinsic_image_load", callee) || - !strcmp("__intrinsic_image_store", callee) || - !strcmp("__intrinsic_image_atomic_add", callee) || - !strcmp("__intrinsic_image_atomic_min", callee) || - !strcmp("__intrinsic_image_atomic_max", callee) || - !strcmp("__intrinsic_image_atomic_and", callee) || - !strcmp("__intrinsic_image_atomic_or", callee) || - !strcmp("__intrinsic_image_atomic_xor", callee) || - !strcmp("__intrinsic_image_atomic_exchange", callee) || - !strcmp("__intrinsic_image_atomic_comp_swap", callee) || - !strcmp("__intrinsic_image_size", callee) || - !strcmp("__intrinsic_image_samples", callee)) { + case ir_intrinsic_image_load: + case ir_intrinsic_image_store: + case ir_intrinsic_image_atomic_add: + case ir_intrinsic_image_atomic_min: + case ir_intrinsic_image_atomic_max: + case ir_intrinsic_image_atomic_and: + case ir_intrinsic_image_atomic_or: + case ir_intrinsic_image_atomic_xor: + case ir_intrinsic_image_atomic_exchange: + case ir_intrinsic_image_atomic_comp_swap: + case ir_intrinsic_image_size: + case ir_intrinsic_image_samples: visit_image_intrinsic(ir); return; - } - - entry = get_function_signature(sig); - /* Process in parameters. */ - foreach_two_lists(formal_node, &sig->parameters, - actual_node, &ir->actual_parameters) { - ir_rvalue *param_rval = (ir_rvalue *) actual_node; - ir_variable *param = (ir_variable *) formal_node; - if (param->data.mode == ir_var_function_in || - param->data.mode == ir_var_function_inout) { - variable_storage *storage = find_variable_storage(param); - assert(storage); - - param_rval->accept(this); - st_src_reg r = this->result; + case ir_intrinsic_shader_clock: { + ir->return_deref->accept(this); - st_dst_reg l; - l.file = storage->file; - l.index = storage->index; - l.reladdr = NULL; - l.writemask = WRITEMASK_XYZW; + st_dst_reg dst = st_dst_reg(this->result); + dst.writemask = TGSI_WRITEMASK_XY; - for (i = 0; i < type_size(param->type); i++) { - emit_asm(ir, TGSI_OPCODE_MOV, l, r); - l.index++; - r.index++; - } - } + emit_asm(ir, TGSI_OPCODE_CLOCK, dst); + return; } - /* Emit call instruction */ - call_inst = emit_asm(ir, TGSI_OPCODE_CAL); - call_inst->function = entry; - - /* Process out parameters. */ - foreach_two_lists(formal_node, &sig->parameters, - actual_node, &ir->actual_parameters) { - ir_rvalue *param_rval = (ir_rvalue *) actual_node; - ir_variable *param = (ir_variable *) formal_node; - - if (param->data.mode == ir_var_function_out || - param->data.mode == ir_var_function_inout) { - variable_storage *storage = find_variable_storage(param); - assert(storage); - - st_src_reg r; - r.file = storage->file; - r.index = storage->index; - r.reladdr = NULL; - r.swizzle = SWIZZLE_NOOP; - r.negate = 0; - - param_rval->accept(this); - st_dst_reg l = st_dst_reg(this->result); - - for (i = 0; i < type_size(param->type); i++) { - emit_asm(ir, TGSI_OPCODE_MOV, l, r); - l.index++; - r.index++; - } - } + case ir_intrinsic_invalid: + case ir_intrinsic_generic_load: + case ir_intrinsic_generic_store: + case ir_intrinsic_generic_atomic_add: + case ir_intrinsic_generic_atomic_and: + case ir_intrinsic_generic_atomic_or: + case ir_intrinsic_generic_atomic_xor: + case ir_intrinsic_generic_atomic_min: + case ir_intrinsic_generic_atomic_max: + case ir_intrinsic_generic_atomic_exchange: + case ir_intrinsic_generic_atomic_comp_swap: + unreachable("Invalid intrinsic"); } - - /* Process return value. */ - this->result = entry->return_reg; } void -glsl_to_tgsi_visitor::calc_deref_offsets(ir_dereference *head, - ir_dereference *tail, +glsl_to_tgsi_visitor::calc_deref_offsets(ir_dereference *tail, unsigned *array_elements, - unsigned *base, - unsigned *index, + uint16_t *index, st_src_reg *indirect, unsigned *location) { @@ -3773,7 +4022,7 @@ glsl_to_tgsi_visitor::calc_deref_offsets(ir_dereference *head, const glsl_type *struct_type = deref_record->record->type; int field_index = deref_record->record->type->field_index(deref_record->field); - calc_deref_offsets(head, deref_record->record->as_dereference(), array_elements, base, index, indirect, location); + calc_deref_offsets(deref_record->record->as_dereference(), array_elements, index, indirect, location); assert(field_index >= 0); *location += struct_type->record_location_offset(field_index); @@ -3810,7 +4059,7 @@ glsl_to_tgsi_visitor::calc_deref_offsets(ir_dereference *head, *array_elements *= deref_arr->array->type->length; - calc_deref_offsets(head, deref_arr->array->as_dereference(), array_elements, base, index, indirect, location); + calc_deref_offsets(deref_arr->array->as_dereference(), array_elements, index, indirect, location); break; } default: @@ -3822,8 +4071,9 @@ void glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir, unsigned *array_size, unsigned *base, - unsigned *index, - st_src_reg *reladdr) + uint16_t *index, + st_src_reg *reladdr, + bool opaque) { GLuint shader = _mesa_program_enum_to_shader_stage(this->prog->Target); unsigned location = 0; @@ -3837,7 +4087,7 @@ glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir, assert(var); location = var->data.location; - calc_deref_offsets(ir, ir, array_size, base, index, reladdr, &location); + calc_deref_offsets(ir, array_size, index, reladdr, &location); /* * If we end up with no indirect then adjust the base to the index, @@ -3848,12 +4098,27 @@ glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir, *array_size = 1; } - if (location != 0xffffffff) { - *base += this->shader_program->UniformStorage[location].opaque[shader].index; - *index += this->shader_program->UniformStorage[location].opaque[shader].index; + if (opaque) { + assert(location != 0xffffffff); + *base += this->shader_program->data->UniformStorage[location].opaque[shader].index; + *index += this->shader_program->data->UniformStorage[location].opaque[shader].index; } } +st_src_reg +glsl_to_tgsi_visitor::canonicalize_gather_offset(st_src_reg offset) +{ + if (offset.reladdr || offset.reladdr2) { + st_src_reg tmp = get_temp(glsl_type::ivec2_type); + st_dst_reg tmp_dst = st_dst_reg(tmp); + tmp_dst.writemask = WRITEMASK_XY; + emit_asm(NULL, TGSI_OPCODE_MOV, tmp_dst, offset); + return tmp; + } + + return offset; +} + void glsl_to_tgsi_visitor::visit(ir_texture *ir) { @@ -3864,14 +4129,15 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) glsl_to_tgsi_instruction *inst = NULL; unsigned opcode = TGSI_OPCODE_NOP; const glsl_type *sampler_type = ir->sampler->type; - unsigned sampler_array_size = 1, sampler_index = 0, sampler_base = 0; - bool is_cube_array = false; + unsigned sampler_array_size = 1, sampler_base = 0; + uint16_t sampler_index = 0; + bool is_cube_array = false, is_cube_shadow = false; unsigned i; - /* if we are a cube array sampler */ - if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && - sampler_type->sampler_array)) { - is_cube_array = true; + /* if we are a cube array sampler or a cube shadow */ + if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { + is_cube_array = sampler_type->sampler_array; + is_cube_shadow = sampler_type->sampler_shadow; } if (ir->coordinate) { @@ -3898,18 +4164,18 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) */ result_src = get_temp(ir->type); result_dst = st_dst_reg(result_src); + result_dst.writemask = (1 << ir->type->vector_elements) - 1; switch (ir->op) { case ir_tex: - opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX; + opcode = (is_cube_array && ir->shadow_comparator) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX; if (ir->offset) { ir->offset->accept(this); offset[0] = this->result; } break; case ir_txb: - if (is_cube_array || - sampler_type == glsl_type::samplerCubeShadow_type) { + if (is_cube_array || is_cube_shadow) { opcode = TGSI_OPCODE_TXB2; } else { @@ -3923,9 +4189,13 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) } break; case ir_txl: - opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL; - ir->lod_info.lod->accept(this); - lod_info = this->result; + if (this->has_tex_txf_lz && ir->lod_info.lod->is_zero()) { + opcode = TGSI_OPCODE_TEX_LZ; + } else { + opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL; + ir->lod_info.lod->accept(this); + lod_info = this->result; + } if (ir->offset) { ir->offset->accept(this); offset[0] = this->result; @@ -3953,9 +4223,13 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) levels_src = get_temp(ir->type); break; case ir_txf: - opcode = TGSI_OPCODE_TXF; - ir->lod_info.lod->accept(this); - lod_info = this->result; + if (this->has_tex_txf_lz && ir->lod_info.lod->is_zero()) { + opcode = TGSI_OPCODE_TXF_LZ; + } else { + opcode = TGSI_OPCODE_TXF; + ir->lod_info.lod->accept(this); + lod_info = this->result; + } if (ir->offset) { ir->offset->accept(this); offset[0] = this->result; @@ -3972,16 +4246,17 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) component = this->result; if (ir->offset) { ir->offset->accept(this); - if (ir->offset->type->base_type == GLSL_TYPE_ARRAY) { + if (ir->offset->type->is_array()) { const glsl_type *elt_type = ir->offset->type->fields.array; for (i = 0; i < ir->offset->type->length; i++) { offset[i] = this->result; offset[i].index += i * type_size(elt_type); offset[i].type = elt_type->base_type; offset[i].swizzle = swizzle_for_size(elt_type->vector_elements); + offset[i] = canonicalize_gather_offset(offset[i]); } } else { - offset[0] = this->result; + offset[0] = canonicalize_gather_offset(this->result); } } break; @@ -4017,11 +4292,11 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) * the shadow comparator value must also be projected. */ st_src_reg tmp_src = coord; - if (ir->shadow_comparitor) { + if (ir->shadow_comparator) { /* Slot the shadow value in as the second to last component of the * coord. */ - ir->shadow_comparitor->accept(this); + ir->shadow_comparator->accept(this); tmp_src = get_temp(glsl_type::vec4_type); st_dst_reg tmp_dst = st_dst_reg(tmp_src); @@ -4048,11 +4323,11 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) * comparator was put in the correct place (and projected) by the code, * above, that handles by-hand projection. */ - if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { + if (ir->shadow_comparator && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { /* Slot the shadow value in as the second to last component of the * coord. */ - ir->shadow_comparitor->accept(this); + ir->shadow_comparator->accept(this); if (is_cube_array) { cube_sc = get_temp(glsl_type::float_type); @@ -4087,7 +4362,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) } get_deref_offsets(ir->sampler, &sampler_array_size, &sampler_base, - &sampler_index, &reladdr); + &sampler_index, &reladdr, true); if (reladdr.file != PROGRAM_UNDEFINED) emit_arl(ir, sampler_reladdr, reladdr); @@ -4104,14 +4379,12 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) inst = emit_asm(ir, opcode, result_dst, lod_info); } else if (opcode == TGSI_OPCODE_TXQS) { inst = emit_asm(ir, opcode, result_dst); - } else if (opcode == TGSI_OPCODE_TXF) { - inst = emit_asm(ir, opcode, result_dst, coord); } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) { inst = emit_asm(ir, opcode, result_dst, coord, lod_info); } else if (opcode == TGSI_OPCODE_TEX2) { inst = emit_asm(ir, opcode, result_dst, coord, cube_sc); } else if (opcode == TGSI_OPCODE_TG4) { - if (is_cube_array && ir->shadow_comparitor) { + if (is_cube_array && ir->shadow_comparator) { inst = emit_asm(ir, opcode, result_dst, coord, cube_sc); } else { inst = emit_asm(ir, opcode, result_dst, coord, component); @@ -4119,57 +4392,28 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) } else inst = emit_asm(ir, opcode, result_dst, coord); - if (ir->shadow_comparitor) + if (ir->shadow_comparator) inst->tex_shadow = GL_TRUE; - inst->sampler.index = sampler_index; + inst->resource.index = sampler_index; inst->sampler_array_size = sampler_array_size; inst->sampler_base = sampler_base; if (reladdr.file != PROGRAM_UNDEFINED) { - inst->sampler.reladdr = ralloc(mem_ctx, st_src_reg); - memcpy(inst->sampler.reladdr, &reladdr, sizeof(reladdr)); + inst->resource.reladdr = ralloc(mem_ctx, st_src_reg); + memcpy(inst->resource.reladdr, &reladdr, sizeof(reladdr)); } if (ir->offset) { + if (!inst->tex_offsets) + inst->tex_offsets = rzalloc_array(inst, st_src_reg, MAX_GLSL_TEXTURE_OFFSET); + for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && offset[i].file != PROGRAM_UNDEFINED; i++) inst->tex_offsets[i] = offset[i]; inst->tex_offset_num_offset = i; } - switch (sampler_type->sampler_dimensionality) { - case GLSL_SAMPLER_DIM_1D: - inst->tex_target = (sampler_type->sampler_array) - ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; - break; - case GLSL_SAMPLER_DIM_2D: - inst->tex_target = (sampler_type->sampler_array) - ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; - break; - case GLSL_SAMPLER_DIM_3D: - inst->tex_target = TEXTURE_3D_INDEX; - break; - case GLSL_SAMPLER_DIM_CUBE: - inst->tex_target = (sampler_type->sampler_array) - ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX; - break; - case GLSL_SAMPLER_DIM_RECT: - inst->tex_target = TEXTURE_RECT_INDEX; - break; - case GLSL_SAMPLER_DIM_BUF: - inst->tex_target = TEXTURE_BUFFER_INDEX; - break; - case GLSL_SAMPLER_DIM_EXTERNAL: - inst->tex_target = TEXTURE_EXTERNAL_INDEX; - break; - case GLSL_SAMPLER_DIM_MS: - inst->tex_target = (sampler_type->sampler_array) - ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX; - break; - default: - assert(!"Should not get here."); - } - + inst->tex_target = sampler_type->sampler_index(); inst->tex_type = ir->type->base_type; this->result = result_src; @@ -4178,23 +4422,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) void glsl_to_tgsi_visitor::visit(ir_return *ir) { - if (ir->get_value()) { - st_dst_reg l; - int i; - - assert(current_function); - - ir->get_value()->accept(this); - st_src_reg r = this->result; - - l = st_dst_reg(current_function->return_reg); - - for (i = 0; i < type_size(current_function->sig->return_type); i++) { - emit_asm(ir, TGSI_OPCODE_MOV, l, r); - l.index++; - r.index++; - } - } + assert(!ir->get_value()); emit_asm(ir, TGSI_OPCODE_RET); } @@ -4284,11 +4512,11 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() array_sizes = NULL; max_num_arrays = 0; next_array = 0; + num_inputs = 0; + num_outputs = 0; num_input_arrays = 0; num_output_arrays = 0; - next_signature_id = 1; num_immediates = 0; - current_function = NULL; num_address_regs = 0; samplers_used = 0; buffers_used = 0; @@ -4306,6 +4534,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() have_sqrt = false; have_fma = false; use_shared_memory = false; + has_tex_txf_lz = false; } glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() @@ -4343,23 +4572,23 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) st_translate_texture_target(inst->tex_target, inst->tex_shadow); if (inst->tex_shadow) { - prog->ShadowSamplers |= 1 << (inst->sampler.index + i); + prog->ShadowSamplers |= 1 << (inst->resource.index + i); } } } if (inst->tex_target == TEXTURE_EXTERNAL_INDEX) - prog->ExternalSamplersUsed |= 1 << inst->sampler.index; + prog->ExternalSamplersUsed |= 1 << inst->resource.index; - if (inst->buffer.file != PROGRAM_UNDEFINED && ( + if (inst->resource.file != PROGRAM_UNDEFINED && ( is_resource_instruction(inst->op) || inst->op == TGSI_OPCODE_STORE)) { - if (inst->buffer.file == PROGRAM_BUFFER) { - v->buffers_used |= 1 << inst->buffer.index; - } else if (inst->buffer.file == PROGRAM_MEMORY) { + if (inst->resource.file == PROGRAM_BUFFER) { + v->buffers_used |= 1 << inst->resource.index; + } else if (inst->resource.file == PROGRAM_MEMORY) { v->use_shared_memory = true; } else { - assert(inst->buffer.file == PROGRAM_IMAGE); + assert(inst->resource.file == PROGRAM_IMAGE); for (int i = 0; i < inst->sampler_array_size; i++) { unsigned idx = inst->sampler_base + i; v->images_used |= 1 << idx; @@ -4435,10 +4664,8 @@ glsl_to_tgsi_visitor::simplify_cmp(void) if (inst->dst[0].reladdr || inst->dst[0].reladdr2 || inst->dst[1].reladdr || inst->dst[1].reladdr2 || tgsi_get_opcode_info(inst->op)->is_branch || - inst->op == TGSI_OPCODE_BGNSUB || inst->op == TGSI_OPCODE_CONT || inst->op == TGSI_OPCODE_END || - inst->op == TGSI_OPCODE_ENDSUB || inst->op == TGSI_OPCODE_RET) { break; } @@ -4802,7 +5029,8 @@ glsl_to_tgsi_visitor::copy_propagate(void) inst->src[0].file != PROGRAM_ARRAY && !inst->src[0].reladdr && !inst->src[0].reladdr2 && - !inst->src[0].negate) { + !inst->src[0].negate && + !inst->src[0].abs) { for (int i = 0; i < 4; i++) { if (inst->dst[0].writemask & (1 << i)) { acp[4 * inst->dst[0].index + i] = inst; @@ -5126,10 +5354,6 @@ glsl_to_tgsi_visitor::renumber_registers(void) } /* ------------------------- TGSI conversion stuff -------------------------- */ -struct label { - unsigned branch_target; - unsigned token; -}; /** * Intermediate state used during shader translation. @@ -5154,33 +5378,16 @@ struct st_translate { struct ureg_src images[PIPE_MAX_SHADER_IMAGES]; struct ureg_src systemValues[SYSTEM_VALUE_MAX]; struct ureg_src shared_memory; - struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; unsigned *array_sizes; - struct array_decl *input_arrays; - struct array_decl *output_arrays; + struct inout_decl *input_decls; + unsigned num_input_decls; + struct inout_decl *output_decls; + unsigned num_output_decls; const GLuint *inputMapping; const GLuint *outputMapping; - /* For every instruction that contains a label (eg CALL), keep - * details so that we can go back afterwards and emit the correct - * tgsi instruction number for each label. - */ - struct label *labels; - unsigned labels_size; - unsigned labels_count; - - /* Keep a record of the tgsi instruction number that each mesa - * instruction starts at, will be used to fix up labels after - * translation. - */ - unsigned *insn; - unsigned insn_size; - unsigned insn_count; - unsigned procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */ - - boolean error; }; /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ @@ -5239,6 +5446,24 @@ _mesa_sysval_to_semantic(unsigned sysval) return TGSI_SEMANTIC_BLOCK_ID; case SYSTEM_VALUE_NUM_WORK_GROUPS: return TGSI_SEMANTIC_GRID_SIZE; + case SYSTEM_VALUE_LOCAL_GROUP_SIZE: + return TGSI_SEMANTIC_BLOCK_SIZE; + + /* ARB_shader_ballot */ + case SYSTEM_VALUE_SUBGROUP_SIZE: + return TGSI_SEMANTIC_SUBGROUP_SIZE; + case SYSTEM_VALUE_SUBGROUP_INVOCATION: + return TGSI_SEMANTIC_SUBGROUP_INVOCATION; + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: + return TGSI_SEMANTIC_SUBGROUP_EQ_MASK; + case SYSTEM_VALUE_SUBGROUP_GE_MASK: + return TGSI_SEMANTIC_SUBGROUP_GE_MASK; + case SYSTEM_VALUE_SUBGROUP_GT_MASK: + return TGSI_SEMANTIC_SUBGROUP_GT_MASK; + case SYSTEM_VALUE_SUBGROUP_LE_MASK: + return TGSI_SEMANTIC_SUBGROUP_LE_MASK; + case SYSTEM_VALUE_SUBGROUP_LT_MASK: + return TGSI_SEMANTIC_SUBGROUP_LT_MASK; /* Unhandled */ case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: @@ -5250,53 +5475,6 @@ _mesa_sysval_to_semantic(unsigned sysval) } } - -/** - * Make note of a branch to a label in the TGSI code. - * After we've emitted all instructions, we'll go over the list - * of labels built here and patch the TGSI code with the actual - * location of each label. - */ -static unsigned *get_label(struct st_translate *t, unsigned branch_target) -{ - unsigned i; - - if (t->labels_count + 1 >= t->labels_size) { - t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); - t->labels = (struct label *)realloc(t->labels, - t->labels_size * sizeof(struct label)); - if (t->labels == NULL) { - static unsigned dummy; - t->error = TRUE; - return &dummy; - } - } - - i = t->labels_count++; - t->labels[i].branch_target = branch_target; - return &t->labels[i].token; -} - -/** - * Called prior to emitting the TGSI code for each instruction. - * Allocate additional space for instructions if needed. - * Update the insn[] array so the next glsl_to_tgsi_instruction points to - * the next TGSI instruction. - */ -static void set_insn_start(struct st_translate *t, unsigned start) -{ - if (t->insn_count + 1 >= t->insn_size) { - t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); - t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0])); - if (t->insn == NULL) { - t->error = TRUE; - return; - } - } - - t->insn[t->insn_count++] = start; -} - /** * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. */ @@ -5313,6 +5491,10 @@ emit_immediate(struct st_translate *t, return ureg_DECL_immediate(ureg, &values[0].f, size); case GL_DOUBLE: return ureg_DECL_immediate_f64(ureg, (double *)&values[0].f, size); + case GL_INT64_ARB: + return ureg_DECL_immediate_int64(ureg, (int64_t *)&values[0].f, size); + case GL_UNSIGNED_INT64_ARB: + return ureg_DECL_immediate_uint64(ureg, (uint64_t *)&values[0].f, size); case GL_INT: return ureg_DECL_immediate_int(ureg, &values[0].i, size); case GL_UNSIGNED_INT: @@ -5358,16 +5540,14 @@ dst_register(struct st_translate *t, gl_register_file file, unsigned index, return t->temps[index]; case PROGRAM_ARRAY: - array = index >> 16; - - assert(array < t->num_temp_arrays); + assert(array_id && array_id <= t->num_temp_arrays); + array = array_id - 1; if (ureg_dst_is_undef(t->arrays[array])) t->arrays[array] = ureg_DECL_array_temporary( t->ureg, t->array_sizes[array], TRUE); - return ureg_dst_array_offset(t->arrays[array], - (int)(index & 0xFFFF) - 0x8000); + return ureg_dst_array_offset(t->arrays[array], index); case PROGRAM_OUTPUT: if (!array_id) { @@ -5384,13 +5564,15 @@ dst_register(struct st_translate *t, gl_register_file file, unsigned index, return t->outputs[t->outputMapping[index]]; } else { - struct array_decl *decl = &t->output_arrays[array_id-1]; + struct inout_decl *decl = find_inout_array(t->output_decls, t->num_output_decls, array_id); unsigned mesa_index = decl->mesa_index; int slot = t->outputMapping[mesa_index]; assert(slot != -1 && t->outputs[slot].File == TGSI_FILE_OUTPUT); - assert(t->outputs[slot].ArrayID == array_id); - return ureg_dst_array_offset(t->outputs[slot], index - mesa_index); + + struct ureg_dst dst = t->outputs[slot]; + dst.ArrayID = array_id; + return ureg_dst_array_offset(dst, index - mesa_index); } case PROGRAM_ADDRESS: @@ -5417,9 +5599,19 @@ src_register(struct st_translate *t, const st_src_reg *reg) case PROGRAM_TEMPORARY: case PROGRAM_ARRAY: - case PROGRAM_OUTPUT: return ureg_src(dst_register(t, reg->file, reg->index, reg->array_id)); + case PROGRAM_OUTPUT: { + struct ureg_dst dst = dst_register(t, reg->file, reg->index, reg->array_id); + assert(dst.WriteMask != 0); + unsigned shift = ffs(dst.WriteMask) - 1; + return ureg_swizzle(ureg_src(dst), + shift, + MIN2(shift + 1, 3), + MIN2(shift + 2, 3), + MIN2(shift + 3, 3)); + } + case PROGRAM_UNIFORM: assert(reg->index >= 0); return reg->index < t->num_constants ? @@ -5447,13 +5639,15 @@ src_register(struct st_translate *t, const st_src_reg *reg) return t->inputs[t->inputMapping[index] + double_reg2]; } else { - struct array_decl *decl = &t->input_arrays[reg->array_id-1]; + struct inout_decl *decl = find_inout_array(t->input_decls, t->num_input_decls, reg->array_id); unsigned mesa_index = decl->mesa_index; int slot = t->inputMapping[mesa_index]; assert(slot != -1 && t->inputs[slot].File == TGSI_FILE_INPUT); - assert(t->inputs[slot].ArrayID == reg->array_id); - return ureg_src_array_offset(t->inputs[slot], index + double_reg2 - mesa_index); + + struct ureg_src src = t->inputs[slot]; + src.ArrayID = reg->array_id; + return ureg_src_array_offset(src, index + double_reg2 - mesa_index); } case PROGRAM_ADDRESS: @@ -5529,6 +5723,9 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg) GET_SWZ(src_reg->swizzle, 2) & 0x3, GET_SWZ(src_reg->swizzle, 3) & 0x3); + if (src_reg->abs) + src = ureg_abs(src); + if ((src_reg->negate & 0xf) == NEGATE_XYZW) src = ureg_negate(src); @@ -5542,60 +5739,24 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg) static struct tgsi_texture_offset translate_tex_offset(struct st_translate *t, - const st_src_reg *in_offset, int idx) + const st_src_reg *in_offset) { struct tgsi_texture_offset offset; - struct ureg_src imm_src; - struct ureg_dst dst; - int array; - - switch (in_offset->file) { - case PROGRAM_IMMEDIATE: - assert(in_offset->index >= 0 && in_offset->index < t->num_immediates); - imm_src = t->immediates[in_offset->index]; + struct ureg_src src = translate_src(t, in_offset); - offset.File = imm_src.File; - offset.Index = imm_src.Index; - offset.SwizzleX = imm_src.SwizzleX; - offset.SwizzleY = imm_src.SwizzleY; - offset.SwizzleZ = imm_src.SwizzleZ; - offset.Padding = 0; - break; - case PROGRAM_INPUT: - imm_src = t->inputs[t->inputMapping[in_offset->index]]; - offset.File = imm_src.File; - offset.Index = imm_src.Index; - offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0); - offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1); - offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2); - offset.Padding = 0; - break; - case PROGRAM_TEMPORARY: - imm_src = ureg_src(t->temps[in_offset->index]); - offset.File = imm_src.File; - offset.Index = imm_src.Index; - offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0); - offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1); - offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2); - offset.Padding = 0; - break; - case PROGRAM_ARRAY: - array = in_offset->index >> 16; + offset.File = src.File; + offset.Index = src.Index; + offset.SwizzleX = src.SwizzleX; + offset.SwizzleY = src.SwizzleY; + offset.SwizzleZ = src.SwizzleZ; + offset.Padding = 0; - assert(array >= 0); - assert(array < (int)t->num_temp_arrays); + assert(!src.Indirect); + assert(!src.DimIndirect); + assert(!src.Dimension); + assert(!src.Absolute); /* those shouldn't be used with integers anyway */ + assert(!src.Negate); - dst = t->arrays[array]; - offset.File = dst.File; - offset.Index = dst.Index + (in_offset->index & 0xFFFF) - 0x8000; - offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0); - offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1); - offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2); - offset.Padding = 0; - break; - default: - break; - } return offset; } @@ -5626,20 +5787,16 @@ compile_tgsi_instruction(struct st_translate *t, switch(inst->op) { case TGSI_OPCODE_BGNLOOP: - case TGSI_OPCODE_CAL: case TGSI_OPCODE_ELSE: case TGSI_OPCODE_ENDLOOP: case TGSI_OPCODE_IF: case TGSI_OPCODE_UIF: assert(num_dst == 0); - ureg_label_insn(ureg, - inst->op, - src, num_src, - get_label(t, - inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0)); + ureg_insn(ureg, inst->op, NULL, 0, src, num_src); return; case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TEX_LZ: case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXD: case TGSI_OPCODE_TXL: @@ -5647,19 +5804,20 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_TXQ: case TGSI_OPCODE_TXQS: case TGSI_OPCODE_TXF: + case TGSI_OPCODE_TXF_LZ: case TGSI_OPCODE_TEX2: case TGSI_OPCODE_TXB2: case TGSI_OPCODE_TXL2: case TGSI_OPCODE_TG4: case TGSI_OPCODE_LODQ: - src[num_src] = t->samplers[inst->sampler.index]; + src[num_src] = t->samplers[inst->resource.index]; assert(src[num_src].File != TGSI_FILE_NULL); - if (inst->sampler.reladdr) + if (inst->resource.reladdr) src[num_src] = ureg_src_indirect(src[num_src], ureg_src(t->address[2])); num_src++; for (i = 0; i < (int)inst->tex_offset_num_offset; i++) { - texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i); + texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]); } tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); @@ -5686,15 +5844,15 @@ compile_tgsi_instruction(struct st_translate *t, for (i = num_src - 1; i >= 0; i--) src[i + 1] = src[i]; num_src++; - if (inst->buffer.file == PROGRAM_MEMORY) { + if (inst->resource.file == PROGRAM_MEMORY) { src[0] = t->shared_memory; - } else if (inst->buffer.file == PROGRAM_BUFFER) { - src[0] = t->buffers[inst->buffer.index]; + } else if (inst->resource.file == PROGRAM_BUFFER) { + src[0] = t->buffers[inst->resource.index]; } else { - src[0] = t->images[inst->buffer.index]; + src[0] = t->images[inst->resource.index]; tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); } - if (inst->buffer.reladdr) + if (inst->resource.reladdr) src[0] = ureg_src_indirect(src[0], ureg_src(t->address[2])); assert(src[0].File != TGSI_FILE_NULL); ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src, @@ -5703,16 +5861,16 @@ compile_tgsi_instruction(struct st_translate *t, break; case TGSI_OPCODE_STORE: - if (inst->buffer.file == PROGRAM_MEMORY) { + if (inst->resource.file == PROGRAM_MEMORY) { dst[0] = ureg_dst(t->shared_memory); - } else if (inst->buffer.file == PROGRAM_BUFFER) { - dst[0] = ureg_dst(t->buffers[inst->buffer.index]); + } else if (inst->resource.file == PROGRAM_BUFFER) { + dst[0] = ureg_dst(t->buffers[inst->resource.index]); } else { - dst[0] = ureg_dst(t->images[inst->buffer.index]); + dst[0] = ureg_dst(t->images[inst->resource.index]); tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); } dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask); - if (inst->buffer.reladdr) + if (inst->resource.reladdr) dst[0] = ureg_dst_indirect(dst[0], ureg_src(t->address[2])); assert(dst[0].File != TGSI_FILE_NULL); ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src, @@ -5825,8 +5983,6 @@ emit_wpos(struct st_context *st, struct ureg_program *ureg, int wpos_transform_const) { - const struct gl_fragment_program *fp = - (const struct gl_fragment_program *) program; struct pipe_screen *pscreen = st->pipe->screen; GLfloat adjX = 0.0f; GLfloat adjY[2] = { 0.0f, 0.0f }; @@ -5859,7 +6015,7 @@ emit_wpos(struct st_context *st, * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 */ - if (fp->OriginUpperLeft) { + if (program->OriginUpperLeft) { /* Fragment shader wants origin in upper-left */ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { /* the driver supports upper-left origin */ @@ -5886,7 +6042,7 @@ emit_wpos(struct st_context *st, assert(0); } - if (fp->PixelCenterInteger) { + if (program->PixelCenterInteger) { /* Fragment shader wants pixel center integer */ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { /* the driver supports pixel center integer */ @@ -5948,37 +6104,58 @@ emit_face_var(struct gl_context *ctx, struct st_translate *t) t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp); } -static bool -find_array(unsigned attr, struct array_decl *arrays, unsigned count, - unsigned *array_id, unsigned *array_size) -{ - unsigned i; - - for (i = 0; i < count; i++) { - struct array_decl *decl = &arrays[i]; - - if (attr == decl->mesa_index) { - *array_id = decl->array_id; - *array_size = decl->array_size; - assert(*array_size); - return true; - } - } - return false; -} - static void -emit_compute_block_size(const struct gl_program *program, +emit_compute_block_size(const struct gl_program *prog, struct ureg_program *ureg) { - const struct gl_compute_program *cp = - (const struct gl_compute_program *)program; - ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, - cp->LocalSize[0]); + prog->info.cs.local_size[0]); ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, - cp->LocalSize[1]); + prog->info.cs.local_size[1]); ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, - cp->LocalSize[2]); + prog->info.cs.local_size[2]); +} + +struct sort_inout_decls { + bool operator()(const struct inout_decl &a, const struct inout_decl &b) const { + return mapping[a.mesa_index] < mapping[b.mesa_index]; + } + + const GLuint *mapping; +}; + +/* Sort the given array of decls by the corresponding slot (TGSI file index). + * + * This is for the benefit of older drivers which are broken when the + * declarations aren't sorted in this way. + */ +static void +sort_inout_decls_by_slot(struct inout_decl *decls, + unsigned count, + const GLuint mapping[]) +{ + sort_inout_decls sorter; + sorter.mapping = mapping; + std::sort(decls, decls + count, sorter); +} + +static unsigned +st_translate_interp(enum glsl_interp_mode glsl_qual, GLuint varying) +{ + switch (glsl_qual) { + case INTERP_MODE_NONE: + if (varying == VARYING_SLOT_COL0 || varying == VARYING_SLOT_COL1) + return TGSI_INTERPOLATE_COLOR; + return TGSI_INTERPOLATE_PERSPECTIVE; + case INTERP_MODE_SMOOTH: + return TGSI_INTERPOLATE_PERSPECTIVE; + case INTERP_MODE_FLAT: + return TGSI_INTERPOLATE_CONSTANT; + case INTERP_MODE_NOPERSPECTIVE: + return TGSI_INTERPOLATE_LINEAR; + default: + assert(0 && "unexpected interp mode in st_translate_interp()"); + return TGSI_INTERPOLATE_PERSPECTIVE; + } } /** @@ -5991,7 +6168,6 @@ emit_compute_block_size(const struct gl_program *program, * \param inputSemanticIndex the semantic index (ex: which texcoord) for * each input * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input - * \param interpLocation the TGSI_INTERPOLATE_LOC_* location for each input * \param numOutputs number of output registers used * \param outputMapping maps Mesa fragment program outputs to TGSI * generic outputs @@ -6014,7 +6190,6 @@ st_translate_program( const ubyte inputSemanticName[], const ubyte inputSemanticIndex[], const GLuint interpMode[], - const GLuint interpLocation[], GLuint numOutputs, const GLuint outputMapping[], const GLuint outputSlotToAttr[], @@ -6050,48 +6225,49 @@ st_translate_program( */ switch (procType) { case PIPE_SHADER_FRAGMENT: - for (i = 0; i < numInputs; i++) { - unsigned array_id = 0; - unsigned array_size; - - if (find_array(inputSlotToAttr[i], program->input_arrays, - program->num_input_arrays, &array_id, &array_size)) { - /* We've found an array. Declare it so. */ - t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg, - inputSemanticName[i], inputSemanticIndex[i], - interpMode[i], 0, interpLocation[i], - array_id, array_size); - - GLuint base_attr = inputSlotToAttr[i]; - while (i + 1 < numInputs && - inputSlotToAttr[i + 1] < base_attr + array_size) - ++i; - } - else { - t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg, - inputSemanticName[i], inputSemanticIndex[i], - interpMode[i], 0, interpLocation[i], 0, 1); - } - } - break; case PIPE_SHADER_GEOMETRY: case PIPE_SHADER_TESS_EVAL: case PIPE_SHADER_TESS_CTRL: - for (i = 0; i < numInputs; i++) { - unsigned array_id = 0; - unsigned array_size; - - if (find_array(inputSlotToAttr[i], program->input_arrays, - program->num_input_arrays, &array_id, &array_size)) { - /* We've found an array. Declare it so. */ - t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i], - inputSemanticIndex[i], - array_id, array_size); - i += array_size - 1; + sort_inout_decls_by_slot(program->inputs, program->num_inputs, inputMapping); + + for (i = 0; i < program->num_inputs; ++i) { + struct inout_decl *decl = &program->inputs[i]; + unsigned slot = inputMapping[decl->mesa_index]; + struct ureg_src src; + ubyte tgsi_usage_mask = decl->usage_mask; + + if (glsl_base_type_is_64bit(decl->base_type)) { + if (tgsi_usage_mask == 1) + tgsi_usage_mask = TGSI_WRITEMASK_XY; + else if (tgsi_usage_mask == 2) + tgsi_usage_mask = TGSI_WRITEMASK_ZW; + else + tgsi_usage_mask = TGSI_WRITEMASK_XYZW; } - else { - t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i], - inputSemanticIndex[i], 0, 1); + + unsigned interp_mode = 0; + unsigned interp_location = 0; + if (procType == PIPE_SHADER_FRAGMENT) { + assert(interpMode); + interp_mode = interpMode[slot] != TGSI_INTERPOLATE_COUNT ? + interpMode[slot] : + st_translate_interp(decl->interp, inputSlotToAttr[slot]); + + interp_location = decl->interp_loc; + } + + src = ureg_DECL_fs_input_cyl_centroid_layout(ureg, + inputSemanticName[slot], inputSemanticIndex[slot], + interp_mode, 0, interp_location, slot, tgsi_usage_mask, + decl->array_id, decl->size); + + for (unsigned j = 0; j < decl->size; ++j) { + if (t->inputs[slot + j].File != TGSI_FILE_INPUT) { + /* The ArrayID is set up in dst_register */ + t->inputs[slot + j] = src; + t->inputs[slot + j].ArrayID = 0; + t->inputs[slot + j].Index += j; + } } } break; @@ -6117,23 +6293,35 @@ st_translate_program( case PIPE_SHADER_TESS_EVAL: case PIPE_SHADER_TESS_CTRL: case PIPE_SHADER_VERTEX: - for (i = 0; i < numOutputs; i++) { - unsigned array_id = 0; - unsigned array_size; - - if (find_array(outputSlotToAttr[i], program->output_arrays, - program->num_output_arrays, &array_id, &array_size)) { - /* We've found an array. Declare it so. */ - t->outputs[i] = ureg_DECL_output_array(ureg, - outputSemanticName[i], - outputSemanticIndex[i], - array_id, array_size); - i += array_size - 1; + sort_inout_decls_by_slot(program->outputs, program->num_outputs, outputMapping); + + for (i = 0; i < program->num_outputs; ++i) { + struct inout_decl *decl = &program->outputs[i]; + unsigned slot = outputMapping[decl->mesa_index]; + struct ureg_dst dst; + ubyte tgsi_usage_mask = decl->usage_mask; + + if (glsl_base_type_is_64bit(decl->base_type)) { + if (tgsi_usage_mask == 1) + tgsi_usage_mask = TGSI_WRITEMASK_XY; + else if (tgsi_usage_mask == 2) + tgsi_usage_mask = TGSI_WRITEMASK_ZW; + else + tgsi_usage_mask = TGSI_WRITEMASK_XYZW; } - else { - t->outputs[i] = ureg_DECL_output(ureg, - outputSemanticName[i], - outputSemanticIndex[i]); + + dst = ureg_DECL_output_layout(ureg, + outputSemanticName[slot], outputSemanticIndex[slot], + decl->gs_out_streams, + slot, tgsi_usage_mask, decl->array_id, decl->size); + + for (unsigned j = 0; j < decl->size; ++j) { + if (t->outputs[slot + j].File != TGSI_FILE_OUTPUT) { + /* The ArrayID is set up in dst_register */ + t->outputs[slot + j] = dst; + t->outputs[slot + j].ArrayID = 0; + t->outputs[slot + j].Index += j; + } } } break; @@ -6142,16 +6330,16 @@ st_translate_program( } if (procType == PIPE_SHADER_FRAGMENT) { - if (program->shader->info.EarlyFragmentTests) + if (program->shader->Program->info.fs.early_fragment_tests) ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1); - if (proginfo->InputsRead & VARYING_BIT_POS) { + if (proginfo->info.inputs_read & VARYING_BIT_POS) { /* Must do this after setting up t->inputs. */ emit_wpos(st_context(ctx), t, proginfo, ureg, program->wpos_transform_const); } - if (proginfo->InputsRead & VARYING_BIT_FACE) + if (proginfo->info.inputs_read & VARYING_BIT_FACE) emit_face_var(ctx, t); for (i = 0; i < numOutputs; i++) { @@ -6216,7 +6404,7 @@ st_translate_program( /* Declare misc input registers */ { - GLbitfield sysInputs = proginfo->SystemValuesRead; + GLbitfield sysInputs = proginfo->info.system_values_read; for (i = 0; sysInputs; i++) { if (sysInputs & (1 << i)) { @@ -6256,8 +6444,10 @@ st_translate_program( } t->array_sizes = program->array_sizes; - t->input_arrays = program->input_arrays; - t->output_arrays = program->output_arrays; + t->input_decls = program->inputs; + t->num_input_decls = program->num_inputs; + t->output_decls = program->outputs; + t->num_output_decls = program->num_outputs; /* Emit constants and uniforms. TGSI uses a single index space for these, * so we put all the translated regs in t->constants. @@ -6299,18 +6489,14 @@ st_translate_program( } } - if (program->shader) { - unsigned num_ubos = program->shader->NumUniformBlocks; - - for (i = 0; i < num_ubos; i++) { - unsigned size = program->shader->UniformBlocks[i]->UniformBufferSize; - unsigned num_const_vecs = (size + 15) / 16; - unsigned first, last; - assert(num_const_vecs > 0); - first = 0; - last = num_const_vecs > 0 ? num_const_vecs - 1 : 0; - ureg_DECL_constant2D(t->ureg, first, last, i + 1); - } + for (i = 0; i < proginfo->info.num_ubos; i++) { + unsigned size = proginfo->sh.UniformBlocks[i]->UniformBufferSize; + unsigned num_const_vecs = (size + 15) / 16; + unsigned first, last; + assert(num_const_vecs > 0); + first = 0; + last = num_const_vecs > 0 ? num_const_vecs - 1 : 0; + ureg_DECL_constant2D(t->ureg, first, last, i + 1); } /* Emit immediate values. @@ -6372,7 +6558,7 @@ st_translate_program( if (program->use_shared_memory) t->shared_memory = ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED); - for (i = 0; i < program->shader->NumImages; i++) { + for (i = 0; i < program->shader->Program->info.num_images; i++) { if (program->images_used & (1 << i)) { t->images[i] = ureg_DECL_image(ureg, i, program->image_targets[i], @@ -6383,17 +6569,8 @@ st_translate_program( /* Emit each instruction in turn: */ - foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) { - set_insn_start(t, ureg_get_instruction_number(ureg)); + foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) compile_tgsi_instruction(t, inst); - } - - /* Fix up all emitted labels: - */ - for (i = 0; i < t->labels_count; i++) { - ureg_fixup_label(ureg, t->labels[i].token, - t->insn[t->labels[i].branch_target]); - } /* Set the next shader stage hint for VS and TES. */ switch (procType) { @@ -6435,17 +6612,10 @@ out: if (t) { free(t->arrays); free(t->temps); - free(t->insn); - free(t->labels); free(t->constants); t->num_constants = 0; free(t->immediates); t->num_immediates = 0; - - if (t->error) { - debug_printf("%s: translate error flag set\n", __func__); - } - FREE(t); } @@ -6465,8 +6635,6 @@ get_mesa_program_tgsi(struct gl_context *ctx, { glsl_to_tgsi_visitor* v; struct gl_program *prog; - GLenum target = _mesa_shader_stage_to_program(shader->Stage); - bool progress; struct gl_shader_compiler_options *options = &ctx->Const.ShaderCompilerOptions[shader->Stage]; struct pipe_screen *pscreen = ctx->st->pipe->screen; @@ -6474,9 +6642,8 @@ get_mesa_program_tgsi(struct gl_context *ctx, validate_ir_tree(shader->ir); - prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); - if (!prog) - return NULL; + prog = shader->Program; + prog->Parameters = _mesa_new_parameter_list(); v = new glsl_to_tgsi_visitor(); v->ctx = ctx; @@ -6491,44 +6658,19 @@ get_mesa_program_tgsi(struct gl_context *ctx, PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED); v->have_fma = pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED); + v->has_tex_txf_lz = pscreen->get_param(pscreen, + PIPE_CAP_TGSI_TEX_TXF_LZ); - _mesa_copy_linked_program_data(shader->Stage, shader_program, prog); _mesa_generate_parameters_list_for_uniforms(shader_program, shader, prog->Parameters); /* Remove reads from output registers. */ - lower_output_reads(shader->Stage, shader->ir); + if (!pscreen->get_param(pscreen, PIPE_CAP_TGSI_CAN_READ_OUTPUTS)) + lower_output_reads(shader->Stage, shader->ir); /* Emit intermediate IR for main(). */ visit_exec_list(shader->ir, v); - /* Now emit bodies for any functions that were used. */ - do { - progress = GL_FALSE; - - foreach_in_list(function_entry, entry, &v->function_signatures) { - if (!entry->bgn_inst) { - v->current_function = entry; - - entry->bgn_inst = v->emit_asm(NULL, TGSI_OPCODE_BGNSUB); - entry->bgn_inst->function = entry; - - visit_exec_list(&entry->sig->body, v); - - glsl_to_tgsi_instruction *last; - last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); - if (last->op != TGSI_OPCODE_RET) - v->emit_asm(NULL, TGSI_OPCODE_RET); - - glsl_to_tgsi_instruction *end; - end = v->emit_asm(NULL, TGSI_OPCODE_ENDSUB); - end->function = entry; - - progress = GL_TRUE; - } - } - } while (progress); - #if 0 /* Print out some information (for debugging purposes) used by the * optimization passes. */ @@ -6585,14 +6727,15 @@ get_mesa_program_tgsi(struct gl_context *ctx, _mesa_log("\n\n"); } - prog->Instructions = NULL; - prog->NumInstructions = 0; - do_set_program_inouts(shader->ir, prog, shader->Stage); - shrink_array_declarations(v->input_arrays, v->num_input_arrays, - prog->InputsRead, prog->DoubleInputsRead, prog->PatchInputsRead); - shrink_array_declarations(v->output_arrays, v->num_output_arrays, - prog->OutputsWritten, 0ULL, prog->PatchOutputsWritten); + _mesa_copy_linked_program_data(shader_program, shader); + shrink_array_declarations(v->inputs, v->num_inputs, + &prog->info.inputs_read, + prog->info.double_inputs_read, + &prog->info.patch_inputs_read); + shrink_array_declarations(v->outputs, v->num_outputs, + &prog->info.outputs_written, 0ULL, + &prog->info.patch_outputs_written); count_resources(v, prog); /* The GLSL IR won't be needed anymore. */ @@ -6601,8 +6744,8 @@ get_mesa_program_tgsi(struct gl_context *ctx, /* This must be done before the uniform storage is associated. */ if (shader->Stage == MESA_SHADER_FRAGMENT && - (prog->InputsRead & VARYING_BIT_POS || - prog->SystemValuesRead & (1 << SYSTEM_VALUE_FRAG_COORD))) { + (prog->info.inputs_read & VARYING_BIT_POS || + prog->info.system_values_read & (1 << SYSTEM_VALUE_FRAG_COORD))) { static const gl_state_index wposTransformState[STATE_LENGTH] = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM }; @@ -6611,8 +6754,6 @@ get_mesa_program_tgsi(struct gl_context *ctx, wposTransformState); } - _mesa_reference_program(ctx, &shader->Program, prog); - /* Avoid reallocation of the program parameter list, because the uniform * storage is only associated with the original parameter list. * This should be enough for Bitmap and DrawPixels constants. @@ -6623,9 +6764,11 @@ get_mesa_program_tgsi(struct gl_context *ctx, * prog->ParameterValues to get reallocated (e.g., anything that adds a * program constant) has to happen before creating this linkage. */ - _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters); - if (!shader_program->LinkStatus) { + _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters, + true); + if (!shader_program->data->LinkStatus) { free_glsl_to_tgsi_visitor(v); + _mesa_reference_program(ctx, &shader->Program, NULL); return NULL; } @@ -6669,173 +6812,55 @@ get_mesa_program_tgsi(struct gl_context *ctx, return prog; } -static void -set_affected_state_flags(uint64_t *states, - struct gl_program *prog, - struct gl_linked_shader *shader, - uint64_t new_constants, - uint64_t new_sampler_views, - uint64_t new_samplers, - uint64_t new_images, - uint64_t new_ubos, - uint64_t new_ssbos, - uint64_t new_atomics) -{ - if (prog->Parameters->NumParameters) - *states |= new_constants; - - if (shader->num_samplers) - *states |= new_sampler_views | new_samplers; - - if (shader->NumImages) - *states |= new_images; - - if (shader->NumUniformBlocks) - *states |= new_ubos; +/* See if there are unsupported control flow statements. */ +class ir_control_flow_info_visitor : public ir_hierarchical_visitor { +private: + const struct gl_shader_compiler_options *options; +public: + ir_control_flow_info_visitor(const struct gl_shader_compiler_options *options) + : options(options), + unsupported(false) + { + } - if (shader->NumShaderStorageBlocks) - *states |= new_ssbos; + virtual ir_visitor_status visit_enter(ir_function *ir) + { + /* Other functions are skipped (same as glsl_to_tgsi). */ + if (strcmp(ir->name, "main") == 0) + return visit_continue; - if (shader->NumAtomicBuffers) - *states |= new_atomics; -} + return visit_continue_with_parent; + } -static struct gl_program * -get_mesa_program(struct gl_context *ctx, - struct gl_shader_program *shader_program, - struct gl_linked_shader *shader) -{ - struct pipe_screen *pscreen = ctx->st->pipe->screen; - enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(shader->Stage); - enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir) - pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_PREFERRED_IR); - struct gl_program *prog = NULL; - - if (preferred_ir == PIPE_SHADER_IR_NIR) { - /* TODO only for GLSL VS/FS for now: */ - switch (shader->Stage) { - case MESA_SHADER_VERTEX: - case MESA_SHADER_FRAGMENT: - prog = st_nir_get_mesa_program(ctx, shader_program, shader); - default: - break; + virtual ir_visitor_status visit_enter(ir_call *ir) + { + if (!ir->callee->is_intrinsic()) { + unsupported = true; /* it's a function call */ + return visit_stop; } - } else { - prog = get_mesa_program_tgsi(ctx, shader_program, shader); + return visit_continue; } - if (prog) { - uint64_t *states; - - /* This determines which states will be updated when the shader is - * bound. - */ - switch (shader->Stage) { - case MESA_SHADER_VERTEX: - states = &((struct st_vertex_program*)prog)->affected_states; - - *states = ST_NEW_VS_STATE | - ST_NEW_RASTERIZER | - ST_NEW_VERTEX_ARRAYS; - - set_affected_state_flags(states, prog, shader, - ST_NEW_VS_CONSTANTS, - ST_NEW_VS_SAMPLER_VIEWS, - ST_NEW_RENDER_SAMPLERS, - ST_NEW_VS_IMAGES, - ST_NEW_VS_UBOS, - ST_NEW_VS_SSBOS, - ST_NEW_VS_ATOMICS); - break; - - case MESA_SHADER_TESS_CTRL: - states = &((struct st_tessctrl_program*)prog)->affected_states; - - *states = ST_NEW_TCS_STATE; - - set_affected_state_flags(states, prog, shader, - ST_NEW_TCS_CONSTANTS, - ST_NEW_TCS_SAMPLER_VIEWS, - ST_NEW_RENDER_SAMPLERS, - ST_NEW_TCS_IMAGES, - ST_NEW_TCS_UBOS, - ST_NEW_TCS_SSBOS, - ST_NEW_TCS_ATOMICS); - break; - - case MESA_SHADER_TESS_EVAL: - states = &((struct st_tesseval_program*)prog)->affected_states; - - *states = ST_NEW_TES_STATE | - ST_NEW_RASTERIZER; - - set_affected_state_flags(states, prog, shader, - ST_NEW_TES_CONSTANTS, - ST_NEW_TES_SAMPLER_VIEWS, - ST_NEW_RENDER_SAMPLERS, - ST_NEW_TES_IMAGES, - ST_NEW_TES_UBOS, - ST_NEW_TES_SSBOS, - ST_NEW_TES_ATOMICS); - break; - - case MESA_SHADER_GEOMETRY: - states = &((struct st_geometry_program*)prog)->affected_states; - - *states = ST_NEW_GS_STATE | - ST_NEW_RASTERIZER; - - set_affected_state_flags(states, prog, shader, - ST_NEW_GS_CONSTANTS, - ST_NEW_GS_SAMPLER_VIEWS, - ST_NEW_RENDER_SAMPLERS, - ST_NEW_GS_IMAGES, - ST_NEW_GS_UBOS, - ST_NEW_GS_SSBOS, - ST_NEW_GS_ATOMICS); - break; - - case MESA_SHADER_FRAGMENT: - states = &((struct st_fragment_program*)prog)->affected_states; - - /* gl_FragCoord and glDrawPixels always use constants. */ - *states = ST_NEW_FS_STATE | - ST_NEW_SAMPLE_SHADING | - ST_NEW_FS_CONSTANTS; - - set_affected_state_flags(states, prog, shader, - ST_NEW_FS_CONSTANTS, - ST_NEW_FS_SAMPLER_VIEWS, - ST_NEW_RENDER_SAMPLERS, - ST_NEW_FS_IMAGES, - ST_NEW_FS_UBOS, - ST_NEW_FS_SSBOS, - ST_NEW_FS_ATOMICS); - break; - - case MESA_SHADER_COMPUTE: - states = &((struct st_compute_program*)prog)->affected_states; - - *states = ST_NEW_CS_STATE; - - set_affected_state_flags(states, prog, shader, - ST_NEW_CS_CONSTANTS, - ST_NEW_CS_SAMPLER_VIEWS, - ST_NEW_CS_SAMPLERS, - ST_NEW_CS_IMAGES, - ST_NEW_CS_UBOS, - ST_NEW_CS_SSBOS, - ST_NEW_CS_ATOMICS); - break; - - default: - unreachable("unhandled shader stage"); + virtual ir_visitor_status visit_enter(ir_return *ir) + { + if (options->EmitNoMainReturn) { + unsupported = true; + return visit_stop; } + return visit_continue; } - return prog; -} + bool unsupported; +}; +static bool +has_unsupported_control_flow(exec_list *ir, + const struct gl_shader_compiler_options *options) +{ + ir_control_flow_info_visitor visitor(options); + visit_list_elements(&visitor, ir); + return visitor.unsupported; +} extern "C" { @@ -6848,16 +6873,21 @@ extern "C" { GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) { + /* Return early if we are loading the shader from on-disk cache */ + if (st_load_tgsi_from_disk_cache(ctx, prog)) { + return GL_TRUE; + } + struct pipe_screen *pscreen = ctx->st->pipe->screen; - assert(prog->LinkStatus); + assert(prog->data->LinkStatus); for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { if (prog->_LinkedShaders[i] == NULL) continue; - bool progress; - exec_list *ir = prog->_LinkedShaders[i]->ir; - gl_shader_stage stage = prog->_LinkedShaders[i]->Stage; + struct gl_linked_shader *shader = prog->_LinkedShaders[i]; + exec_list *ir = shader->ir; + gl_shader_stage stage = shader->Stage; const struct gl_shader_compiler_options *options = &ctx->Const.ShaderCompilerOptions[stage]; enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(stage); @@ -6865,19 +6895,24 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED); bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED); + unsigned if_threshold = pscreen->get_shader_param(pscreen, ptarget, + PIPE_SHADER_CAP_LOWER_IF_THRESHOLD); /* If there are forms of indirect addressing that the driver * cannot handle, perform the lowering pass. */ if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) { - lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir, + lower_variable_index_to_cond_assign(stage, ir, options->EmitNoIndirectInput, options->EmitNoIndirectOutput, options->EmitNoIndirectTemp, options->EmitNoIndirectUniform); } + if (!pscreen->get_param(pscreen, PIPE_CAP_INT64_DIVMOD)) + lower_64bit_integer_instructions(ir, DIV64 | MOD64); + if (ctx->Extensions.ARB_shading_language_packing) { unsigned lower_inst = LOWER_PACK_SNORM_2x16 | LOWER_UNPACK_SNORM_2x16 | @@ -6901,9 +6936,13 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS)) lower_offset_arrays(ir); do_mat_op_to_vec(ir); + + if (stage == MESA_SHADER_FRAGMENT) + lower_blend_equation_advanced(shader); + lower_instructions(ir, MOD_TO_FLOOR | - DIV_TO_MUL_RCP | + FDIV_TO_MUL_RCP | EXP_TO_EXP2 | LOG_TO_LOG2 | LDEXP_TO_ARITH | @@ -6914,6 +6953,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) (options->EmitNoPow ? POW_TO_EXP2 : 0) | (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) | (options->EmitNoSat ? SAT_TO_CLAMP : 0) | + (ctx->Const.ForceGLSLAbsSqrt ? SQRT_TO_ABS_SQRT : 0) | /* Assume that if ARB_gpu_shader5 is not supported * then all of the extended integer functions need * lowering. It may be necessary to add some caps @@ -6937,18 +6977,24 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) lower_discard(ir); } - do { - progress = false; - - progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; - - progress = do_common_optimization(ir, true, true, options, - ctx->Const.NativeIntegers) - || progress; - - progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress; - - } while (progress); + if (ctx->Const.GLSLOptimizeConservatively) { + /* Do it once and repeat only if there's unsupported control flow. */ + do { + do_common_optimization(ir, true, true, options, + ctx->Const.NativeIntegers); + lower_if_to_cond_assign((gl_shader_stage)i, ir, + options->MaxIfDepth, if_threshold); + } while (has_unsupported_control_flow(ir, options)); + } else { + /* Repeat it until it stops making changes. */ + bool progress; + do { + progress = do_common_optimization(ir, true, true, options, + ctx->Const.NativeIntegers); + progress |= lower_if_to_cond_assign((gl_shader_stage)i, ir, + options->MaxIfDepth, if_threshold); + } while (progress); + } validate_ir_tree(ir); } @@ -6956,27 +7002,39 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) build_program_resource_list(ctx, prog); for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - struct gl_program *linked_prog; - - if (prog->_LinkedShaders[i] == NULL) + struct gl_linked_shader *shader = prog->_LinkedShaders[i]; + if (shader == NULL) continue; - linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); + enum pipe_shader_type ptarget = + st_shader_stage_to_ptarget(shader->Stage); + enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir) + pscreen->get_shader_param(pscreen, ptarget, + PIPE_SHADER_CAP_PREFERRED_IR); + + struct gl_program *linked_prog = NULL; + if (preferred_ir == PIPE_SHADER_IR_NIR) { + /* TODO only for GLSL VS/FS for now: */ + switch (shader->Stage) { + case MESA_SHADER_VERTEX: + case MESA_SHADER_FRAGMENT: + linked_prog = st_nir_get_mesa_program(ctx, prog, shader); + default: + break; + } + } else { + linked_prog = get_mesa_program_tgsi(ctx, prog, shader); + } if (linked_prog) { - _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, - linked_prog); + st_set_prog_affected_state_flags(linked_prog); if (!ctx->Driver.ProgramStringNotify(ctx, _mesa_shader_stage_to_program(i), linked_prog)) { - _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, - NULL); - _mesa_reference_program(ctx, &linked_prog, NULL); + _mesa_reference_program(ctx, &shader->Program, NULL); return GL_FALSE; } } - - _mesa_reference_program(ctx, &linked_prog, NULL); } return GL_TRUE; @@ -6987,8 +7045,11 @@ st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, const GLuint outputMapping[], struct pipe_stream_output_info *so) { + if (!glsl_to_tgsi->shader_program->last_vert_prog) + return; + struct gl_transform_feedback_info *info = - &glsl_to_tgsi->shader_program->LinkedTransformFeedback; + glsl_to_tgsi->shader_program->last_vert_prog->sh.LinkedTransformFeedback; st_translate_stream_output_info2(info, outputMapping, so); }