X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fstate_tracker%2Fst_glsl_to_tgsi.cpp;h=baa835924bcd4644cbf3396e0f9fbcea61166925;hb=268ba028dcbdaea25a972a460c1636f485d5c5bc;hp=978dc46401b192d706759ac4e11cfd3065eb2a91;hpb=203c8794a1debc0e45019fe945d1cc55459e6c6f;p=mesa.git diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 978dc46401b..baa835924bc 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -54,7 +54,9 @@ #include "st_format.h" #include "st_glsl_types.h" #include "st_nir.h" +#include "st_shader_cache.h" +#include "util/hash_table.h" #include #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \ @@ -85,6 +87,13 @@ static int swizzle_for_type(const glsl_type *type, int component = 0) return swizzle; } +static unsigned is_precise(const ir_variable *ir) +{ + if (!ir) + return 0; + return ir->data.precise || ir->data.invariant; +} + /** * This struct is a corresponding struct to TGSI ureg_src. */ @@ -98,6 +107,7 @@ public: this->index = index; this->swizzle = swizzle_for_type(type, component); this->negate = 0; + this->abs = 0; this->index2D = 0; this->type = type ? type->base_type : GLSL_TYPE_ERROR; this->reladdr = NULL; @@ -117,6 +127,7 @@ public: this->index2D = 0; this->swizzle = SWIZZLE_XYZW; this->negate = 0; + this->abs = 0; this->reladdr = NULL; this->reladdr2 = NULL; this->has_index2 = false; @@ -134,6 +145,7 @@ public: this->index2D = index2D; this->swizzle = SWIZZLE_XYZW; this->negate = 0; + this->abs = 0; this->reladdr = NULL; this->reladdr2 = NULL; this->has_index2 = false; @@ -150,6 +162,7 @@ public: this->index2D = 0; this->swizzle = 0; this->negate = 0; + this->abs = 0; this->reladdr = NULL; this->reladdr2 = NULL; this->has_index2 = false; @@ -160,11 +173,12 @@ public: explicit st_src_reg(st_dst_reg reg); - int16_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ + int32_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ int16_t index2D; uint16_t swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ int negate:4; /**< NEGATE_XYZW mask from mesa */ - enum glsl_base_type type:4; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ + unsigned abs:1; + enum glsl_base_type type:5; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ unsigned has_index2:1; gl_register_file file:5; /**< PROGRAM_* from Mesa */ /* @@ -178,6 +192,14 @@ public: /** Register index should be offset by the integer in this reg. */ st_src_reg *reladdr; st_src_reg *reladdr2; + + st_src_reg get_abs() + { + st_src_reg reg = *this; + reg.negate = 0; + reg.abs = 1; + return reg; + } }; class st_dst_reg { @@ -225,11 +247,11 @@ public: explicit st_dst_reg(st_src_reg reg); - int16_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ + int32_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ int16_t index2D; gl_register_file file:5; /**< PROGRAM_* from Mesa */ unsigned writemask:4; /**< Bitfield of WRITEMASK_[XYZW] */ - enum glsl_base_type type:4; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ + enum glsl_base_type type:5; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ unsigned has_index2:1; unsigned array_id:10; @@ -245,6 +267,7 @@ st_src_reg::st_src_reg(st_dst_reg reg) this->index = reg.index; this->swizzle = SWIZZLE_XYZW; this->negate = 0; + this->abs = 0; this->reladdr = reg.reladdr; this->index2D = reg.index2D; this->reladdr2 = reg.reladdr2; @@ -273,19 +296,20 @@ public: st_dst_reg dst[2]; st_src_reg src[4]; - st_src_reg resource; /**< sampler or buffer register */ + st_src_reg resource; /**< sampler, image or buffer register */ st_src_reg *tex_offsets; /** Pointer to the ir source this tree came from for debugging */ ir_instruction *ir; unsigned op:8; /**< TGSI opcode */ + unsigned precise:1; unsigned saturate:1; unsigned is_64bit_expanded:1; unsigned sampler_base:5; unsigned sampler_array_size:6; /**< 1-based size of sampler array, 1 if not array */ unsigned tex_target:4; /**< One of TEXTURE_*_INDEX */ - glsl_base_type tex_type:4; + glsl_base_type tex_type:5; unsigned tex_shadow:1; unsigned image_format:9; unsigned tex_offset_num_offset:3; @@ -295,7 +319,9 @@ public: const struct tgsi_opcode_info *info; }; -class variable_storage : public exec_node { +class variable_storage { + DECLARE_RZALLOC_CXX_OPERATORS(variable_storage) + public: variable_storage(ir_variable *var, gl_register_file file, int index, unsigned array_id = 0) @@ -331,14 +357,15 @@ public: int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ }; -static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); -static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); +static const st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); +static const st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); struct inout_decl { unsigned mesa_index; unsigned array_id; /* TGSI ArrayID; 1-based: 0 means not an array */ unsigned size; unsigned interp_loc; + unsigned gs_out_streams; enum glsl_interp_mode interp; enum glsl_base_type base_type; ubyte usage_mask; /* GLSL-style usage-mask, i.e. single bit per double */ @@ -372,7 +399,7 @@ find_array_type(struct inout_decl *decls, unsigned count, unsigned array_id) } struct rename_reg_pair { - int old_reg; + bool valid; int new_reg; }; @@ -404,7 +431,6 @@ public: uint32_t samplers_used; glsl_base_type sampler_types[PIPE_MAX_SAMPLERS]; int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */ - int buffers_used; int images_used; int image_targets[PIPE_MAX_SHADER_IMAGES]; unsigned image_formats[PIPE_MAX_SHADER_IMAGES]; @@ -416,6 +442,8 @@ public: bool have_sqrt; bool have_fma; bool use_shared_memory; + bool has_tex_txf_lz; + bool precise; variable_storage *find_variable_storage(ir_variable *var); @@ -467,11 +495,12 @@ public: void visit_membar_intrinsic(ir_call *); void visit_shared_intrinsic(ir_call *); void visit_image_intrinsic(ir_call *); + void visit_generic_intrinsic(ir_call *, unsigned op); st_src_reg result; /** List of variable_storage */ - exec_list variables; + struct hash_table *variables; /** List of immediate_storage */ exec_list immediates; @@ -539,8 +568,9 @@ public: void simplify_cmp(void); - void rename_temp_registers(int num_renames, struct rename_reg_pair *renames); + void rename_temp_registers(struct rename_reg_pair *renames); void get_first_temp_read(int *first_reads); + void get_first_temp_write(int *first_writes); void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes); void get_last_temp_write(int *last_writes); @@ -573,7 +603,7 @@ fail_link(struct gl_shader_program *prog, const char *fmt, ...) ralloc_vasprintf_append(&prog->data->InfoLog, fmt, args); va_end(args); - prog->data->LinkStatus = GL_FALSE; + prog->data->LinkStatus = linking_failure; } static int @@ -670,6 +700,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, STATIC_ASSERT(TGSI_OPCODE_LAST <= 255); inst->op = op; + inst->precise = this->precise; inst->info = tgsi_get_opcode_info(op); inst->dst[0] = dst; inst->dst[1] = dst1; @@ -886,6 +917,10 @@ glsl_to_tgsi_visitor::get_opcode(unsigned op, if (is_resource_instruction(op)) type = src1.type; + else if (src0.type == GLSL_TYPE_INT64 || src1.type == GLSL_TYPE_INT64) + type = GLSL_TYPE_INT64; + else if (src0.type == GLSL_TYPE_UINT64 || src1.type == GLSL_TYPE_UINT64) + type = GLSL_TYPE_UINT64; else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE) type = GLSL_TYPE_DOUBLE; else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) @@ -893,6 +928,21 @@ glsl_to_tgsi_visitor::get_opcode(unsigned op, else if (native_integers) type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type; +#define case7(c, f, i, u, d, i64, ui64) \ + case TGSI_OPCODE_##c: \ + if (type == GLSL_TYPE_UINT64) \ + op = TGSI_OPCODE_##ui64; \ + else if (type == GLSL_TYPE_INT64) \ + op = TGSI_OPCODE_##i64; \ + else if (type == GLSL_TYPE_DOUBLE) \ + op = TGSI_OPCODE_##d; \ + else if (type == GLSL_TYPE_INT) \ + op = TGSI_OPCODE_##i; \ + else if (type == GLSL_TYPE_UINT) \ + op = TGSI_OPCODE_##u; \ + else \ + op = TGSI_OPCODE_##f; \ + break; #define case5(c, f, i, u, d) \ case TGSI_OPCODE_##c: \ if (type == GLSL_TYPE_DOUBLE) \ @@ -916,14 +966,22 @@ glsl_to_tgsi_visitor::get_opcode(unsigned op, break; #define case3(f, i, u) case4(f, f, i, u) -#define case4d(f, i, u, d) case5(f, f, i, u, d) +#define case6d(f, i, u, d, i64, u64) case7(f, f, i, u, d, i64, u64) #define case3fid(f, i, d) case5(f, f, i, i, d) +#define case3fid64(f, i, d, i64) case7(f, f, i, i, d, i64, i64) #define case2fi(f, i) case4(f, f, i, i) #define case2iu(i, u) case4(i, LAST, i, u) -#define casecomp(c, f, i, u, d) \ +#define case2iu64(i, i64) case7(i, LAST, i, i, LAST, i64, i64) +#define case4iu64(i, u, i64, u64) case7(i, LAST, i, u, LAST, i64, u64) + +#define casecomp(c, f, i, u, d, i64, ui64) \ case TGSI_OPCODE_##c: \ - if (type == GLSL_TYPE_DOUBLE) \ + if (type == GLSL_TYPE_INT64) \ + op = TGSI_OPCODE_##i64; \ + else if (type == GLSL_TYPE_UINT64) \ + op = TGSI_OPCODE_##ui64; \ + else if (type == GLSL_TYPE_DOUBLE) \ op = TGSI_OPCODE_##d; \ else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE) \ op = TGSI_OPCODE_##i; \ @@ -936,24 +994,24 @@ glsl_to_tgsi_visitor::get_opcode(unsigned op, break; switch(op) { - case3fid(ADD, UADD, DADD); - case3fid(MUL, UMUL, DMUL); + case3fid64(ADD, UADD, DADD, U64ADD); + case3fid64(MUL, UMUL, DMUL, U64MUL); case3fid(MAD, UMAD, DMAD); case3fid(FMA, UMAD, DFMA); - case3(DIV, IDIV, UDIV); - case4d(MAX, IMAX, UMAX, DMAX); - case4d(MIN, IMIN, UMIN, DMIN); - case2iu(MOD, UMOD); + case6d(DIV, IDIV, UDIV, DDIV, I64DIV, U64DIV); + case6d(MAX, IMAX, UMAX, DMAX, I64MAX, U64MAX); + case6d(MIN, IMIN, UMIN, DMIN, I64MIN, U64MIN); + case4iu64(MOD, UMOD, I64MOD, U64MOD); - casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ); - casecomp(SNE, FSNE, USNE, USNE, DSNE); - casecomp(SGE, FSGE, ISGE, USGE, DSGE); - casecomp(SLT, FSLT, ISLT, USLT, DSLT); + casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ, U64SEQ, U64SEQ); + casecomp(SNE, FSNE, USNE, USNE, DSNE, U64SNE, U64SNE); + casecomp(SGE, FSGE, ISGE, USGE, DSGE, I64SGE, U64SGE); + casecomp(SLT, FSLT, ISLT, USLT, DSLT, I64SLT, U64SLT); - case2iu(ISHR, USHR); + case2iu64(SHL, U64SHL); + case4iu64(ISHR, USHR, I64SHR, U64SHR); - case3fid(SSG, ISSG, DSSG); - case3fid(ABS, IABS, DABS); + case3fid64(SSG, ISSG, DSSG, I64SSG); case2iu(IBFE, UBFE); case2iu(IMSB, UMSB); @@ -1089,7 +1147,9 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file, int index = 0; immediate_storage *entry; - int size32 = size * (datatype == GL_DOUBLE ? 2 : 1); + int size32 = size * ((datatype == GL_DOUBLE || + datatype == GL_INT64_ARB || + datatype == GL_UNSIGNED_INT64_ARB)? 2 : 1); int i; /* Search immediate storage to see if we already have an identical @@ -1227,6 +1287,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; src.reladdr = NULL; src.negate = 0; + src.abs = 0; if (!options->EmitNoIndirectTemp && type_has_array_or_matrix(type)) { if (next_array >= max_num_arrays) { @@ -1259,13 +1320,13 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) variable_storage * glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) { + struct hash_entry *entry; - foreach_in_list(variable_storage, entry, &this->variables) { - if (entry->var == var) - return entry; - } + entry = _mesa_hash_table_search(this->variables, var); + if (!entry) + return NULL; - return NULL; + return (variable_storage *)entry->data; } void @@ -1298,7 +1359,8 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) if (i == ir->get_num_state_slots()) { /* We'll set the index later. */ storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); - this->variables.push_tail(storage); + + _mesa_hash_table_insert(this->variables, ir, storage); dst = undef_dst; } else { @@ -1313,7 +1375,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index, dst.array_id); - this->variables.push_tail(storage); + _mesa_hash_table_insert(this->variables, ir, storage); } @@ -1495,7 +1557,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) */ - if (ir->operation == ir_binop_add) { + if (!this->precise && ir->operation == ir_binop_add) { if (try_emit_mad(ir, 1)) return; if (try_emit_mad(ir, 0)) @@ -1579,7 +1641,9 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) } break; case ir_unop_neg: - if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT) + if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64) + emit_asm(ir, TGSI_OPCODE_I64NEG, result_dst, op[0]); + else if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT) emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]); else if (result_dst.type == GLSL_TYPE_DOUBLE) emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]); @@ -1592,7 +1656,14 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); break; case ir_unop_abs: - emit_asm(ir, TGSI_OPCODE_ABS, result_dst, op[0]); + if (result_dst.type == GLSL_TYPE_FLOAT) + emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0].get_abs()); + else if (result_dst.type == GLSL_TYPE_DOUBLE) + emit_asm(ir, TGSI_OPCODE_DABS, result_dst, op[0]); + else if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64) + emit_asm(ir, TGSI_OPCODE_I64ABS, result_dst, op[0]); + else + emit_asm(ir, TGSI_OPCODE_IABS, result_dst, op[0]); break; case ir_unop_sign: emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]); @@ -1605,8 +1676,10 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); break; case ir_unop_exp: + assert(!"not reached: should be handled by exp_to_exp2"); + break; case ir_unop_log: - assert(!"not reached: should be handled by ir_explog_to_explog2"); + assert(!"not reached: should be handled by log_to_log2"); break; case ir_unop_log2: emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); @@ -1682,17 +1755,15 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); break; case ir_binop_sub: - emit_asm(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); + op[1].negate = ~op[1].negate; + emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); break; case ir_binop_mul: emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); break; case ir_binop_div: - if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE) - assert(!"not reached: should be handled by ir_div_to_mul_rcp"); - else - emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); break; case ir_binop_mod: if (result_dst.type == GLSL_TYPE_FLOAT) @@ -1918,8 +1989,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) * we want, I choose to use ABS to match DX9 and pre-GLSL RSQ * behavior. */ - emit_scalar(ir, TGSI_OPCODE_ABS, result_dst, op[0]); - emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, result_src); + emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0].get_abs()); emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, result_src); } break; @@ -1940,6 +2010,8 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) /* fallthrough to next case otherwise */ case ir_unop_i2u: case ir_unop_u2i: + case ir_unop_i642u64: + case ir_unop_u642i64: /* Converting between signed and unsigned integers is a no-op. */ result_src = op[0]; result_src.type = result_dst.type; @@ -1973,7 +2045,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) case ir_unop_bitcast_f2i: case ir_unop_bitcast_f2u: /* Make sure we don't propagate the negate modifier to integer opcodes. */ - if (op[0].negate) + if (op[0].negate || op[0].abs) emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); else result_src = op[0]; @@ -1997,6 +2069,19 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) else emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); break; + case ir_unop_bitcast_u642d: + case ir_unop_bitcast_i642d: + result_src = op[0]; + result_src.type = GLSL_TYPE_DOUBLE; + break; + case ir_unop_bitcast_d2i64: + result_src = op[0]; + result_src.type = GLSL_TYPE_INT64; + break; + case ir_unop_bitcast_d2u64: + result_src = op[0]; + result_src.type = GLSL_TYPE_UINT64; + break; case ir_unop_trunc: emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; @@ -2034,13 +2119,23 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) break; } case ir_binop_lshift: - if (native_integers) { - emit_asm(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]); - break; - } case ir_binop_rshift: if (native_integers) { - emit_asm(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]); + unsigned opcode = ir->operation == ir_binop_lshift ? TGSI_OPCODE_SHL + : TGSI_OPCODE_ISHR; + st_src_reg count; + + if (glsl_base_type_is_64bit(op[0].type)) { + /* GLSL shift operations have 32-bit shift counts, but TGSI uses + * 64 bits. + */ + count = get_temp(glsl_type::u64vec(ir->operands[1]->type->components())); + emit_asm(ir, TGSI_OPCODE_U2I64, st_dst_reg(count), op[1]); + } else { + count = op[1]; + } + + emit_asm(ir, opcode, result_dst, op[0], count); break; } case ir_binop_bit_and: @@ -2075,6 +2170,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) cbuf.index = 0; cbuf.reladdr = NULL; cbuf.negate = 0; + cbuf.abs = 0; assert(ir->type->is_vector() || ir->type->is_scalar()); @@ -2144,7 +2240,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) const_offset % 16 / 4, const_offset % 16 / 4); - if (ir->type->base_type == GLSL_TYPE_BOOL) { + if (ir->type->is_boolean()) { emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0)); } else { emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf); @@ -2241,11 +2337,19 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) break; case ir_unop_unpack_double_2x32: case ir_unop_pack_double_2x32: + case ir_unop_unpack_int_2x32: + case ir_unop_pack_int_2x32: + case ir_unop_unpack_uint_2x32: + case ir_unop_pack_uint_2x32: + case ir_unop_unpack_sampler_2x32: + case ir_unop_pack_sampler_2x32: + case ir_unop_unpack_image_2x32: + case ir_unop_pack_image_2x32: emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); break; case ir_binop_ldexp: - if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) { + if (ir->operands[0]->type->is_double()) { emit_asm(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]); } else { assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()"); @@ -2275,16 +2379,120 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) break; } - case ir_unop_vote_any: - emit_asm(ir, TGSI_OPCODE_VOTE_ANY, result_dst, op[0]); + case ir_unop_u2i64: + case ir_unop_u2u64: + case ir_unop_b2i64: { + st_src_reg temp = get_temp(glsl_type::uvec4_type); + st_dst_reg temp_dst = st_dst_reg(temp); + unsigned orig_swz = op[0].swizzle; + /* + * To convert unsigned to 64-bit: + * zero Y channel, copy X channel. + */ + temp_dst.writemask = WRITEMASK_Y; + if (vector_elements > 1) + temp_dst.writemask |= WRITEMASK_W; + emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0)); + temp_dst.writemask = WRITEMASK_X; + if (vector_elements > 1) + temp_dst.writemask |= WRITEMASK_Z; + op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 0), GET_SWZ(orig_swz, 0), + GET_SWZ(orig_swz, 1), GET_SWZ(orig_swz, 1)); + if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64) + emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]); + else + emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1)); + result_src = temp; + result_src.type = GLSL_TYPE_UINT64; + if (vector_elements > 2) { + /* Subtle: We rely on the fact that get_temp here returns the next + * TGSI temporary register directly after the temp register used for + * the first two components, so that the result gets picked up + * automatically. + */ + st_src_reg temp = get_temp(glsl_type::uvec4_type); + st_dst_reg temp_dst = st_dst_reg(temp); + temp_dst.writemask = WRITEMASK_Y; + if (vector_elements > 3) + temp_dst.writemask |= WRITEMASK_W; + emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0)); + + temp_dst.writemask = WRITEMASK_X; + if (vector_elements > 3) + temp_dst.writemask |= WRITEMASK_Z; + op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 2), GET_SWZ(orig_swz, 2), + GET_SWZ(orig_swz, 3), GET_SWZ(orig_swz, 3)); + if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64) + emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]); + else + emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1)); + } break; - case ir_unop_vote_all: - emit_asm(ir, TGSI_OPCODE_VOTE_ALL, result_dst, op[0]); + } + case ir_unop_i642i: + case ir_unop_u642i: + case ir_unop_u642u: + case ir_unop_i642u: { + st_src_reg temp = get_temp(glsl_type::uvec4_type); + st_dst_reg temp_dst = st_dst_reg(temp); + unsigned orig_swz = op[0].swizzle; + unsigned orig_idx = op[0].index; + int el; + temp_dst.writemask = WRITEMASK_X; + + for (el = 0; el < vector_elements; el++) { + unsigned swz = GET_SWZ(orig_swz, el); + if (swz & 1) + op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z); + else + op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); + if (swz > 2) + op[0].index = orig_idx + 1; + op[0].type = GLSL_TYPE_UINT; + temp_dst.writemask = WRITEMASK_X << el; + emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]); + } + result_src = temp; + if (ir->operation == ir_unop_u642u || ir->operation == ir_unop_i642u) + result_src.type = GLSL_TYPE_UINT; + else + result_src.type = GLSL_TYPE_INT; break; - case ir_unop_vote_eq: - emit_asm(ir, TGSI_OPCODE_VOTE_EQ, result_dst, op[0]); + } + case ir_unop_i642b: + emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], st_src_reg_for_int(0)); break; - + case ir_unop_i642f: + emit_asm(ir, TGSI_OPCODE_I642F, result_dst, op[0]); + break; + case ir_unop_u642f: + emit_asm(ir, TGSI_OPCODE_U642F, result_dst, op[0]); + break; + case ir_unop_i642d: + emit_asm(ir, TGSI_OPCODE_I642D, result_dst, op[0]); + break; + case ir_unop_u642d: + emit_asm(ir, TGSI_OPCODE_U642D, result_dst, op[0]); + break; + case ir_unop_i2i64: + emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]); + break; + case ir_unop_f2i64: + emit_asm(ir, TGSI_OPCODE_F2I64, result_dst, op[0]); + break; + case ir_unop_d2i64: + emit_asm(ir, TGSI_OPCODE_D2I64, result_dst, op[0]); + break; + case ir_unop_i2u64: + emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]); + break; + case ir_unop_f2u64: + emit_asm(ir, TGSI_OPCODE_F2U64, result_dst, op[0]); + break; + case ir_unop_d2u64: + emit_asm(ir, TGSI_OPCODE_D2U64, result_dst, op[0]); + break; + /* these might be needed */ case ir_unop_pack_snorm_2x16: case ir_unop_pack_unorm_2x16: case ir_unop_pack_snorm_4x8: @@ -2409,7 +2617,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) case ir_var_uniform: entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, var->data.param_index); - this->variables.push_tail(entry); + _mesa_hash_table_insert(this->variables, var, entry); break; case ir_var_shader_in: { /* The linker assigns locations for varyings and attributes, @@ -2456,7 +2664,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) decl->array_id); entry->component = component; - this->variables.push_tail(entry); + _mesa_hash_table_insert(this->variables, var, entry); + break; } case ir_var_shader_out: { @@ -2478,6 +2687,14 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) decl->mesa_index = var->data.location + FRAG_RESULT_MAX * var->data.index; decl->base_type = type_without_array->base_type; decl->usage_mask = u_bit_consecutive(component, num_components); + if (var->data.stream & (1u << 31)) { + decl->gs_out_streams = var->data.stream & ~(1u << 31); + } else { + assert(var->data.stream < 4); + decl->gs_out_streams = 0; + for (unsigned i = 0; i < num_components; ++i) + decl->gs_out_streams |= var->data.stream << (2 * (component + i)); + } if (is_inout_array(shader->Stage, var, &remove_array)) { decl->array_id = num_output_arrays + 1; @@ -2491,13 +2708,23 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) else decl->size = type_size(var->type); - entry = new(mem_ctx) variable_storage(var, - PROGRAM_OUTPUT, - decl->mesa_index, - decl->array_id); + if (var->data.fb_fetch_output) { + st_dst_reg dst = st_dst_reg(get_temp(var->type)); + st_src_reg src = st_src_reg(PROGRAM_OUTPUT, decl->mesa_index, + var->type, component, decl->array_id); + emit_asm(NULL, TGSI_OPCODE_FBFETCH, dst, src); + entry = new(mem_ctx) variable_storage(var, dst.file, dst.index, + dst.array_id); + } else { + entry = new(mem_ctx) variable_storage(var, + PROGRAM_OUTPUT, + decl->mesa_index, + decl->array_id); + } entry->component = component; - this->variables.push_tail(entry); + _mesa_hash_table_insert(this->variables, var, entry); + break; } case ir_var_system_value: @@ -2511,7 +2738,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) entry = new(mem_ctx) variable_storage(var, src.file, src.index, src.array_id); - this->variables.push_tail(entry); + _mesa_hash_table_insert(this->variables, var, entry); break; } @@ -2867,7 +3094,7 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type * st_dst_reg *l, st_src_reg *r, st_src_reg *cond, bool cond_swap) { - if (type->base_type == GLSL_TYPE_STRUCT) { + if (type->is_record()) { for (unsigned int i = 0; i < type->length; i++) { emit_block_mov(ir, type->fields.structure[i].type, l, r, cond, cond_swap); @@ -2930,6 +3157,8 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) st_dst_reg l; st_src_reg r; + /* all generated instructions need to be flaged as precise */ + this->precise = is_precise(ir->lhs->variable_referenced()); ir->rhs->accept(this); r = this->result; @@ -3021,6 +3250,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) } else { emit_block_mov(ir, ir->rhs->type, &l, &r, NULL, false); } + this->precise = 0; } @@ -3040,7 +3270,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) * aggregate constant and move each constant value into it. If we * get lucky, copy propagation will eliminate the extra moves. */ - if (ir->type->base_type == GLSL_TYPE_STRUCT) { + if (ir->type->is_record()) { st_src_reg temp_base = get_temp(ir->type); st_dst_reg temp = st_dst_reg(temp_base); @@ -3159,6 +3389,18 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) memcpy(&values[i * 2], &ir->value.d[i], sizeof(double)); } break; + case GLSL_TYPE_INT64: + gl_type = GL_INT64_ARB; + for (i = 0; i < ir->type->vector_elements; i++) { + memcpy(&values[i * 2], &ir->value.d[i], sizeof(int64_t)); + } + break; + case GLSL_TYPE_UINT64: + gl_type = GL_UNSIGNED_INT64_ARB; + for (i = 0; i < ir->type->vector_elements; i++) { + memcpy(&values[i * 2], &ir->value.d[i], sizeof(uint64_t)); + } + break; case GLSL_TYPE_UINT: gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { @@ -3397,10 +3639,17 @@ glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir) inst->resource = buffer; if (access) inst->buffer_access = access->value.u[0]; + + if (inst == this->instructions.get_head_raw()) + break; inst = (glsl_to_tgsi_instruction *)inst->get_prev(); - if (inst->op == TGSI_OPCODE_UADD) + + if (inst->op == TGSI_OPCODE_UADD) { + if (inst == this->instructions.get_head_raw()) + break; inst = (glsl_to_tgsi_instruction *)inst->get_prev(); - } while (inst && inst->op == op && inst->resource.file == PROGRAM_UNDEFINED); + } + } while (inst->op == op && inst->resource.file == PROGRAM_UNDEFINED); } void @@ -3527,6 +3776,59 @@ glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir) } } +static void +get_image_qualifiers(ir_dereference *ir, const glsl_type **type, + bool *memory_coherent, bool *memory_volatile, + bool *memory_restrict, unsigned *image_format) +{ + + switch (ir->ir_type) { + case ir_type_dereference_record: { + ir_dereference_record *deref_record = ir->as_dereference_record(); + const glsl_type *struct_type = deref_record->record->type; + + for (unsigned i = 0; i < struct_type->length; i++) { + if (!strcmp(struct_type->fields.structure[i].name, + deref_record->field)) { + *type = struct_type->fields.structure[i].type; + *memory_coherent = + struct_type->fields.structure[i].memory_coherent; + *memory_volatile = + struct_type->fields.structure[i].memory_volatile; + *memory_restrict = + struct_type->fields.structure[i].memory_restrict; + *image_format = + struct_type->fields.structure[i].image_format; + break; + } + } + break; + } + + case ir_type_dereference_array: { + ir_dereference_array *deref_arr = ir->as_dereference_array(); + get_image_qualifiers((ir_dereference *)deref_arr->array, type, + memory_coherent, memory_volatile, memory_restrict, + image_format); + break; + } + + case ir_type_dereference_variable: { + ir_variable *var = ir->variable_referenced(); + + *type = var->type->without_array(); + *memory_coherent = var->data.memory_coherent; + *memory_volatile = var->data.memory_volatile; + *memory_restrict = var->data.memory_restrict; + *image_format = var->data.image_format; + break; + } + + default: + break; + } +} + void glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) { @@ -3534,15 +3836,21 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) ir_dereference *img = (ir_dereference *)param; const ir_variable *imgvar = img->variable_referenced(); - const glsl_type *type = imgvar->type->without_array(); unsigned sampler_array_size = 1, sampler_base = 0; + bool memory_coherent = false, memory_volatile = false, memory_restrict = false; + unsigned image_format = 0; + const glsl_type *type = NULL; + + get_image_qualifiers(img, &type, &memory_coherent, &memory_volatile, + &memory_restrict, &image_format); st_src_reg reladdr; st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT); - + uint16_t index = 0; get_deref_offsets(img, &sampler_array_size, &sampler_base, - (uint16_t*)&image.index, &reladdr, true); + &index, &reladdr, !imgvar->contains_bindless()); + image.index = index; if (reladdr.file != PROGRAM_UNDEFINED) { image.reladdr = ralloc(mem_ctx, st_src_reg); *image.reladdr = reladdr; @@ -3654,54 +3962,53 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) inst->dst[0].writemask = WRITEMASK_XYZW; } - inst->resource = image; - inst->sampler_array_size = sampler_array_size; - inst->sampler_base = sampler_base; - - switch (type->sampler_dimensionality) { - case GLSL_SAMPLER_DIM_1D: - inst->tex_target = (type->sampler_array) - ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; - break; - case GLSL_SAMPLER_DIM_2D: - inst->tex_target = (type->sampler_array) - ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; - break; - case GLSL_SAMPLER_DIM_3D: - inst->tex_target = TEXTURE_3D_INDEX; - break; - case GLSL_SAMPLER_DIM_CUBE: - inst->tex_target = (type->sampler_array) - ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX; - break; - case GLSL_SAMPLER_DIM_RECT: - inst->tex_target = TEXTURE_RECT_INDEX; - break; - case GLSL_SAMPLER_DIM_BUF: - inst->tex_target = TEXTURE_BUFFER_INDEX; - break; - case GLSL_SAMPLER_DIM_EXTERNAL: - inst->tex_target = TEXTURE_EXTERNAL_INDEX; - break; - case GLSL_SAMPLER_DIM_MS: - inst->tex_target = (type->sampler_array) - ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX; - break; - default: - assert(!"Should not get here."); + if (imgvar->contains_bindless()) { + img->accept(this); + inst->resource = this->result; + inst->resource.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, + SWIZZLE_X, SWIZZLE_Y); + } else { + inst->resource = image; + inst->sampler_array_size = sampler_array_size; + inst->sampler_base = sampler_base; } + inst->tex_target = type->sampler_index(); inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx), - _mesa_get_shader_image_format(imgvar->data.image_format)); + _mesa_get_shader_image_format(image_format)); - if (imgvar->data.image_coherent) + if (memory_coherent) inst->buffer_access |= TGSI_MEMORY_COHERENT; - if (imgvar->data.image_restrict) + if (memory_restrict) inst->buffer_access |= TGSI_MEMORY_RESTRICT; - if (imgvar->data.image_volatile) + if (memory_volatile) inst->buffer_access |= TGSI_MEMORY_VOLATILE; } +void +glsl_to_tgsi_visitor::visit_generic_intrinsic(ir_call *ir, unsigned op) +{ + ir->return_deref->accept(this); + st_dst_reg dst = st_dst_reg(this->result); + + dst.writemask = u_bit_consecutive(0, ir->return_deref->var->type->vector_elements); + + st_src_reg src[4] = { undef_src, undef_src, undef_src, undef_src }; + unsigned num_src = 0; + foreach_in_list(ir_rvalue, param, &ir->actual_parameters) { + assert(num_src < ARRAY_SIZE(src)); + + this->result.file = PROGRAM_UNDEFINED; + param->accept(this); + assert(this->result.file != PROGRAM_UNDEFINED); + + src[num_src] = this->result; + num_src++; + } + + emit_asm(ir, op, dst, src[0], src[1], src[2], src[3]); +} + void glsl_to_tgsi_visitor::visit(ir_call *ir) { @@ -3773,6 +4080,29 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) visit_image_intrinsic(ir); return; + case ir_intrinsic_shader_clock: + visit_generic_intrinsic(ir, TGSI_OPCODE_CLOCK); + return; + + case ir_intrinsic_vote_all: + visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_ALL); + return; + case ir_intrinsic_vote_any: + visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_ANY); + return; + case ir_intrinsic_vote_eq: + visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_EQ); + return; + case ir_intrinsic_ballot: + visit_generic_intrinsic(ir, TGSI_OPCODE_BALLOT); + return; + case ir_intrinsic_read_first_invocation: + visit_generic_intrinsic(ir, TGSI_OPCODE_READ_FIRST); + return; + case ir_intrinsic_read_invocation: + visit_generic_intrinsic(ir, TGSI_OPCODE_READ_INVOC); + return; + case ir_intrinsic_invalid: case ir_intrinsic_generic_load: case ir_intrinsic_generic_store: @@ -3784,7 +4114,6 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) case ir_intrinsic_generic_atomic_max: case ir_intrinsic_generic_atomic_exchange: case ir_intrinsic_generic_atomic_comp_swap: - case ir_intrinsic_shader_clock: unreachable("Invalid intrinsic"); } } @@ -3910,14 +4239,14 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) unsigned opcode = TGSI_OPCODE_NOP; const glsl_type *sampler_type = ir->sampler->type; unsigned sampler_array_size = 1, sampler_base = 0; - uint16_t sampler_index = 0; - bool is_cube_array = false; + bool is_cube_array = false, is_cube_shadow = false; + ir_variable *var = ir->sampler->variable_referenced(); unsigned i; - /* if we are a cube array sampler */ - if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && - sampler_type->sampler_array)) { - is_cube_array = true; + /* if we are a cube array sampler or a cube shadow */ + if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { + is_cube_array = sampler_type->sampler_array; + is_cube_shadow = sampler_type->sampler_shadow; } if (ir->coordinate) { @@ -3944,18 +4273,18 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) */ result_src = get_temp(ir->type); result_dst = st_dst_reg(result_src); + result_dst.writemask = (1 << ir->type->vector_elements) - 1; switch (ir->op) { case ir_tex: - opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX; + opcode = (is_cube_array && ir->shadow_comparator) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX; if (ir->offset) { ir->offset->accept(this); offset[0] = this->result; } break; case ir_txb: - if (is_cube_array || - sampler_type == glsl_type::samplerCubeShadow_type) { + if (is_cube_array || is_cube_shadow) { opcode = TGSI_OPCODE_TXB2; } else { @@ -3969,9 +4298,13 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) } break; case ir_txl: - opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL; - ir->lod_info.lod->accept(this); - lod_info = this->result; + if (this->has_tex_txf_lz && ir->lod_info.lod->is_zero()) { + opcode = TGSI_OPCODE_TEX_LZ; + } else { + opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL; + ir->lod_info.lod->accept(this); + lod_info = this->result; + } if (ir->offset) { ir->offset->accept(this); offset[0] = this->result; @@ -3999,9 +4332,13 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) levels_src = get_temp(ir->type); break; case ir_txf: - opcode = TGSI_OPCODE_TXF; - ir->lod_info.lod->accept(this); - lod_info = this->result; + if (this->has_tex_txf_lz && ir->lod_info.lod->is_zero()) { + opcode = TGSI_OPCODE_TXF_LZ; + } else { + opcode = TGSI_OPCODE_TXF; + ir->lod_info.lod->accept(this); + lod_info = this->result; + } if (ir->offset) { ir->offset->accept(this); offset[0] = this->result; @@ -4018,7 +4355,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) component = this->result; if (ir->offset) { ir->offset->accept(this); - if (ir->offset->type->base_type == GLSL_TYPE_ARRAY) { + if (ir->offset->type->is_array()) { const glsl_type *elt_type = ir->offset->type->fields.array; for (i = 0; i < ir->offset->type->length; i++) { offset[i] = this->result; @@ -4064,11 +4401,11 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) * the shadow comparator value must also be projected. */ st_src_reg tmp_src = coord; - if (ir->shadow_comparitor) { + if (ir->shadow_comparator) { /* Slot the shadow value in as the second to last component of the * coord. */ - ir->shadow_comparitor->accept(this); + ir->shadow_comparator->accept(this); tmp_src = get_temp(glsl_type::vec4_type); st_dst_reg tmp_dst = st_dst_reg(tmp_src); @@ -4095,11 +4432,11 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) * comparator was put in the correct place (and projected) by the code, * above, that handles by-hand projection. */ - if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { + if (ir->shadow_comparator && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { /* Slot the shadow value in as the second to last component of the * coord. */ - ir->shadow_comparitor->accept(this); + ir->shadow_comparator->accept(this); if (is_cube_array) { cube_sc = get_temp(glsl_type::float_type); @@ -4133,10 +4470,18 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) coord_dst.writemask = WRITEMASK_XYZW; } + st_src_reg sampler(PROGRAM_SAMPLER, 0, GLSL_TYPE_UINT); + + uint16_t index = 0; get_deref_offsets(ir->sampler, &sampler_array_size, &sampler_base, - &sampler_index, &reladdr, true); - if (reladdr.file != PROGRAM_UNDEFINED) + &index, &reladdr, !var->contains_bindless()); + + sampler.index = index; + if (reladdr.file != PROGRAM_UNDEFINED) { + sampler.reladdr = ralloc(mem_ctx, st_src_reg); + *sampler.reladdr = reladdr; emit_arl(ir, sampler_reladdr, reladdr); + } if (opcode == TGSI_OPCODE_TXD) inst = emit_asm(ir, opcode, result_dst, coord, dx, dy); @@ -4151,14 +4496,12 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) inst = emit_asm(ir, opcode, result_dst, lod_info); } else if (opcode == TGSI_OPCODE_TXQS) { inst = emit_asm(ir, opcode, result_dst); - } else if (opcode == TGSI_OPCODE_TXF) { - inst = emit_asm(ir, opcode, result_dst, coord); } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) { inst = emit_asm(ir, opcode, result_dst, coord, lod_info); } else if (opcode == TGSI_OPCODE_TEX2) { inst = emit_asm(ir, opcode, result_dst, coord, cube_sc); } else if (opcode == TGSI_OPCODE_TG4) { - if (is_cube_array && ir->shadow_comparitor) { + if (is_cube_array && ir->shadow_comparator) { inst = emit_asm(ir, opcode, result_dst, coord, cube_sc); } else { inst = emit_asm(ir, opcode, result_dst, coord, component); @@ -4166,16 +4509,18 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) } else inst = emit_asm(ir, opcode, result_dst, coord); - if (ir->shadow_comparitor) + if (ir->shadow_comparator) inst->tex_shadow = GL_TRUE; - inst->resource.index = sampler_index; - inst->sampler_array_size = sampler_array_size; - inst->sampler_base = sampler_base; - - if (reladdr.file != PROGRAM_UNDEFINED) { - inst->resource.reladdr = ralloc(mem_ctx, st_src_reg); - memcpy(inst->resource.reladdr, &reladdr, sizeof(reladdr)); + if (var->contains_bindless()) { + ir->sampler->accept(this); + inst->resource = this->result; + inst->resource.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, + SWIZZLE_X, SWIZZLE_Y); + } else { + inst->resource = sampler; + inst->sampler_array_size = sampler_array_size; + inst->sampler_base = sampler_base; } if (ir->offset) { @@ -4187,39 +4532,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) inst->tex_offset_num_offset = i; } - switch (sampler_type->sampler_dimensionality) { - case GLSL_SAMPLER_DIM_1D: - inst->tex_target = (sampler_type->sampler_array) - ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; - break; - case GLSL_SAMPLER_DIM_2D: - inst->tex_target = (sampler_type->sampler_array) - ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; - break; - case GLSL_SAMPLER_DIM_3D: - inst->tex_target = TEXTURE_3D_INDEX; - break; - case GLSL_SAMPLER_DIM_CUBE: - inst->tex_target = (sampler_type->sampler_array) - ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX; - break; - case GLSL_SAMPLER_DIM_RECT: - inst->tex_target = TEXTURE_RECT_INDEX; - break; - case GLSL_SAMPLER_DIM_BUF: - inst->tex_target = TEXTURE_BUFFER_INDEX; - break; - case GLSL_SAMPLER_DIM_EXTERNAL: - inst->tex_target = TEXTURE_EXTERNAL_INDEX; - break; - case GLSL_SAMPLER_DIM_MS: - inst->tex_target = (sampler_type->sampler_array) - ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX; - break; - default: - assert(!"Should not get here."); - } - + inst->tex_target = sampler_type->sampler_index(); inst->tex_type = ir->type->base_type; this->result = result_src; @@ -4325,7 +4638,6 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() num_immediates = 0; num_address_regs = 0; samplers_used = 0; - buffers_used = 0; images_used = 0; indirect_addr_consts = false; wpos_transform_const = -1; @@ -4340,10 +4652,20 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() have_sqrt = false; have_fma = false; use_shared_memory = false; + has_tex_txf_lz = false; + variables = NULL; +} + +static void var_destroy(struct hash_entry *entry) +{ + variable_storage *storage = (variable_storage *)entry->data; + + delete storage; } glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() { + _mesa_hash_table_destroy(variables, var_destroy); free(array_sizes); ralloc_free(mem_ctx); } @@ -4362,7 +4684,6 @@ static void count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) { v->samplers_used = 0; - v->buffers_used = 0; v->images_used = 0; foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) { @@ -4388,12 +4709,9 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) if (inst->resource.file != PROGRAM_UNDEFINED && ( is_resource_instruction(inst->op) || inst->op == TGSI_OPCODE_STORE)) { - if (inst->resource.file == PROGRAM_BUFFER) { - v->buffers_used |= 1 << inst->resource.index; - } else if (inst->resource.file == PROGRAM_MEMORY) { + if (inst->resource.file == PROGRAM_MEMORY) { v->use_shared_memory = true; - } else { - assert(inst->resource.file == PROGRAM_IMAGE); + } else if (inst->resource.file == PROGRAM_IMAGE) { for (int i = 0; i < inst->sampler_array_size; i++) { unsigned idx = inst->sampler_base + i; v->images_used |= 1 << idx; @@ -4468,7 +4786,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void) /* Give up if we encounter relative addressing or flow control. */ if (inst->dst[0].reladdr || inst->dst[0].reladdr2 || inst->dst[1].reladdr || inst->dst[1].reladdr2 || - tgsi_get_opcode_info(inst->op)->is_branch || + inst->info->is_branch || inst->op == TGSI_OPCODE_CONT || inst->op == TGSI_OPCODE_END || inst->op == TGSI_OPCODE_RET) { @@ -4517,31 +4835,65 @@ glsl_to_tgsi_visitor::simplify_cmp(void) /* Replaces all references to a temporary register index with another index. */ void -glsl_to_tgsi_visitor::rename_temp_registers(int num_renames, struct rename_reg_pair *renames) +glsl_to_tgsi_visitor::rename_temp_registers(struct rename_reg_pair *renames) { foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { unsigned j; - int k; for (j = 0; j < num_inst_src_regs(inst); j++) { - if (inst->src[j].file == PROGRAM_TEMPORARY) - for (k = 0; k < num_renames; k++) - if (inst->src[j].index == renames[k].old_reg) - inst->src[j].index = renames[k].new_reg; + if (inst->src[j].file == PROGRAM_TEMPORARY) { + int old_idx = inst->src[j].index; + if (renames[old_idx].valid) + inst->src[j].index = renames[old_idx].new_reg; + } } for (j = 0; j < inst->tex_offset_num_offset; j++) { - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) - for (k = 0; k < num_renames; k++) - if (inst->tex_offsets[j].index == renames[k].old_reg) - inst->tex_offsets[j].index = renames[k].new_reg; + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) { + int old_idx = inst->tex_offsets[j].index; + if (renames[old_idx].valid) + inst->tex_offsets[j].index = renames[old_idx].new_reg; + } + } + + if (inst->resource.file == PROGRAM_TEMPORARY) { + int old_idx = inst->resource.index; + if (renames[old_idx].valid) + inst->resource.index = renames[old_idx].new_reg; } for (j = 0; j < num_inst_dst_regs(inst); j++) { - if (inst->dst[j].file == PROGRAM_TEMPORARY) - for (k = 0; k < num_renames; k++) - if (inst->dst[j].index == renames[k].old_reg) - inst->dst[j].index = renames[k].new_reg; + if (inst->dst[j].file == PROGRAM_TEMPORARY) { + int old_idx = inst->dst[j].index; + if (renames[old_idx].valid) + inst->dst[j].index = renames[old_idx].new_reg;} + } + } +} + +void +glsl_to_tgsi_visitor::get_first_temp_write(int *first_writes) +{ + int depth = 0; /* loop depth */ + int loop_start = -1; /* index of the first active BGNLOOP (if any) */ + unsigned i = 0, j; + + foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { + for (j = 0; j < num_inst_dst_regs(inst); j++) { + if (inst->dst[j].file == PROGRAM_TEMPORARY) { + if (first_writes[inst->dst[j].index] == -1) + first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start; + } + } + + if (inst->op == TGSI_OPCODE_BGNLOOP) { + if(depth++ == 0) + loop_start = i; + } else if (inst->op == TGSI_OPCODE_ENDLOOP) { + if (--depth == 0) + loop_start = -1; } + assert(depth >= 0); + i++; } } @@ -4834,7 +5186,8 @@ glsl_to_tgsi_visitor::copy_propagate(void) inst->src[0].file != PROGRAM_ARRAY && !inst->src[0].reladdr && !inst->src[0].reladdr2 && - !inst->src[0].negate) { + !inst->src[0].negate && + !inst->src[0].abs) { for (int i = 0; i < 4; i++) { if (inst->dst[0].writemask & (1 << i)) { acp[4 * inst->dst[0].index + i] = inst; @@ -4958,6 +5311,21 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void) } } } + + if (inst->resource.file == PROGRAM_TEMPORARY) { + int src_chans; + + src_chans = 1 << GET_SWZ(inst->resource.swizzle, 0); + src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 1); + src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 2); + src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 3); + + for (int c = 0; c < 4; c++) { + if (src_chans & (1 << c)) + writes[4 * inst->resource.index + c] = NULL; + } + } + break; } @@ -5074,11 +5442,10 @@ glsl_to_tgsi_visitor::merge_two_dsts(void) void glsl_to_tgsi_visitor::merge_registers(void) { - int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); - int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); + int *last_reads = ralloc_array(mem_ctx, int, this->next_temp); + int *first_writes = ralloc_array(mem_ctx, int, this->next_temp); struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp); int i, j; - int num_renames = 0; /* Read the indices of the last read and first write to each temp register * into an array so that we don't have to traverse the instruction list as @@ -5105,9 +5472,8 @@ glsl_to_tgsi_visitor::merge_registers(void) * as the register at index j. */ if (first_writes[i] <= first_writes[j] && last_reads[i] <= first_writes[j]) { - renames[num_renames].old_reg = j; - renames[num_renames].new_reg = i; - num_renames++; + renames[j].new_reg = i; + renames[j].valid = true; /* Update the first_writes and last_reads arrays with the new * values for the merged register index, and mark the newly unused @@ -5120,7 +5486,7 @@ glsl_to_tgsi_visitor::merge_registers(void) } } - rename_temp_registers(num_renames, renames); + rename_temp_registers(renames); ralloc_free(renames); ralloc_free(last_reads); ralloc_free(first_writes); @@ -5133,28 +5499,27 @@ glsl_to_tgsi_visitor::renumber_registers(void) { int i = 0; int new_index = 0; - int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp); + int *first_writes = ralloc_array(mem_ctx, int, this->next_temp); struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp); - int num_renames = 0; + for (i = 0; i < this->next_temp; i++) { - first_reads[i] = -1; + first_writes[i] = -1; } - get_first_temp_read(first_reads); + get_first_temp_write(first_writes); for (i = 0; i < this->next_temp; i++) { - if (first_reads[i] < 0) continue; + if (first_writes[i] < 0) continue; if (i != new_index) { - renames[num_renames].old_reg = i; - renames[num_renames].new_reg = new_index; - num_renames++; + renames[i].new_reg = new_index; + renames[i].valid = true; } new_index++; } - rename_temp_registers(num_renames, renames); + rename_temp_registers(renames); this->next_temp = new_index; ralloc_free(renames); - ralloc_free(first_reads); + ralloc_free(first_writes); } /* ------------------------- TGSI conversion stuff -------------------------- */ @@ -5188,8 +5553,8 @@ struct st_translate { struct inout_decl *output_decls; unsigned num_output_decls; - const GLuint *inputMapping; - const GLuint *outputMapping; + const ubyte *inputMapping; + const ubyte *outputMapping; unsigned procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */ }; @@ -5253,6 +5618,22 @@ _mesa_sysval_to_semantic(unsigned sysval) case SYSTEM_VALUE_LOCAL_GROUP_SIZE: return TGSI_SEMANTIC_BLOCK_SIZE; + /* ARB_shader_ballot */ + case SYSTEM_VALUE_SUBGROUP_SIZE: + return TGSI_SEMANTIC_SUBGROUP_SIZE; + case SYSTEM_VALUE_SUBGROUP_INVOCATION: + return TGSI_SEMANTIC_SUBGROUP_INVOCATION; + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: + return TGSI_SEMANTIC_SUBGROUP_EQ_MASK; + case SYSTEM_VALUE_SUBGROUP_GE_MASK: + return TGSI_SEMANTIC_SUBGROUP_GE_MASK; + case SYSTEM_VALUE_SUBGROUP_GT_MASK: + return TGSI_SEMANTIC_SUBGROUP_GT_MASK; + case SYSTEM_VALUE_SUBGROUP_LE_MASK: + return TGSI_SEMANTIC_SUBGROUP_LE_MASK; + case SYSTEM_VALUE_SUBGROUP_LT_MASK: + return TGSI_SEMANTIC_SUBGROUP_LT_MASK; + /* Unhandled */ case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: @@ -5279,6 +5660,10 @@ emit_immediate(struct st_translate *t, return ureg_DECL_immediate(ureg, &values[0].f, size); case GL_DOUBLE: return ureg_DECL_immediate_f64(ureg, (double *)&values[0].f, size); + case GL_INT64_ARB: + return ureg_DECL_immediate_int64(ureg, (int64_t *)&values[0].f, size); + case GL_UNSIGNED_INT64_ARB: + return ureg_DECL_immediate_uint64(ureg, (uint64_t *)&values[0].f, size); case GL_INT: return ureg_DECL_immediate_int(ureg, &values[0].i, size); case GL_UNSIGNED_INT: @@ -5383,9 +5768,19 @@ src_register(struct st_translate *t, const st_src_reg *reg) case PROGRAM_TEMPORARY: case PROGRAM_ARRAY: - case PROGRAM_OUTPUT: return ureg_src(dst_register(t, reg->file, reg->index, reg->array_id)); + case PROGRAM_OUTPUT: { + struct ureg_dst dst = dst_register(t, reg->file, reg->index, reg->array_id); + assert(dst.WriteMask != 0); + unsigned shift = ffs(dst.WriteMask) - 1; + return ureg_swizzle(ureg_src(dst), + shift, + MIN2(shift + 1, 3), + MIN2(shift + 2, 3), + MIN2(shift + 3, 3)); + } + case PROGRAM_UNIFORM: assert(reg->index >= 0); return reg->index < t->num_constants ? @@ -5497,6 +5892,9 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg) GET_SWZ(src_reg->swizzle, 2) & 0x3, GET_SWZ(src_reg->swizzle, 3) & 0x3); + if (src_reg->abs) + src = ureg_abs(src); + if ((src_reg->negate & 0xf) == NEGATE_XYZW) src = ureg_negate(src); @@ -5563,10 +5961,11 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_IF: case TGSI_OPCODE_UIF: assert(num_dst == 0); - ureg_insn(ureg, inst->op, NULL, 0, src, num_src); + ureg_insn(ureg, inst->op, NULL, 0, src, num_src, inst->precise); return; case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TEX_LZ: case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXD: case TGSI_OPCODE_TXL: @@ -5574,12 +5973,18 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_TXQ: case TGSI_OPCODE_TXQS: case TGSI_OPCODE_TXF: + case TGSI_OPCODE_TXF_LZ: case TGSI_OPCODE_TEX2: case TGSI_OPCODE_TXB2: case TGSI_OPCODE_TXL2: case TGSI_OPCODE_TG4: case TGSI_OPCODE_LODQ: - src[num_src] = t->samplers[inst->resource.index]; + if (inst->resource.file == PROGRAM_SAMPLER) { + src[num_src] = t->samplers[inst->resource.index]; + } else { + /* Bindless samplers. */ + src[num_src] = translate_src(t, &inst->resource); + } assert(src[num_src].File != TGSI_FILE_NULL); if (inst->resource.reladdr) src[num_src] = @@ -5594,6 +5999,7 @@ compile_tgsi_instruction(struct st_translate *t, inst->op, dst, num_dst, tex_target, + st_translate_texture_type(inst->tex_type), texoffsets, inst->tex_offset_num_offset, src, num_src); return; @@ -5618,7 +6024,12 @@ compile_tgsi_instruction(struct st_translate *t, } else if (inst->resource.file == PROGRAM_BUFFER) { src[0] = t->buffers[inst->resource.index]; } else { - src[0] = t->images[inst->resource.index]; + if (inst->resource.file == PROGRAM_IMAGE) { + src[0] = t->images[inst->resource.index]; + } else { + /* Bindless images. */ + src[0] = translate_src(t, &inst->resource); + } tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); } if (inst->resource.reladdr) @@ -5635,7 +6046,12 @@ compile_tgsi_instruction(struct st_translate *t, } else if (inst->resource.file == PROGRAM_BUFFER) { dst[0] = ureg_dst(t->buffers[inst->resource.index]); } else { - dst[0] = ureg_dst(t->images[inst->resource.index]); + if (inst->resource.file == PROGRAM_IMAGE) { + dst[0] = ureg_dst(t->images[inst->resource.index]); + } else { + /* Bindless images. */ + dst[0] = ureg_dst(translate_src(t, &inst->resource)); + } tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); } dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask); @@ -5649,14 +6065,14 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_SCS: dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); - ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); + ureg_insn(ureg, inst->op, dst, num_dst, src, num_src, inst->precise); break; default: ureg_insn(ureg, inst->op, dst, num_dst, - src, num_src); + src, num_src, inst->precise); break; } } @@ -5889,7 +6305,7 @@ struct sort_inout_decls { return mapping[a.mesa_index] < mapping[b.mesa_index]; } - const GLuint *mapping; + const ubyte *mapping; }; /* Sort the given array of decls by the corresponding slot (TGSI file index). @@ -5900,7 +6316,7 @@ struct sort_inout_decls { static void sort_inout_decls_by_slot(struct inout_decl *decls, unsigned count, - const GLuint mapping[]) + const ubyte mapping[]) { sort_inout_decls sorter; sorter.mapping = mapping; @@ -5954,14 +6370,13 @@ st_translate_program( glsl_to_tgsi_visitor *program, const struct gl_program *proginfo, GLuint numInputs, - const GLuint inputMapping[], - const GLuint inputSlotToAttr[], + const ubyte inputMapping[], + const ubyte inputSlotToAttr[], const ubyte inputSemanticName[], const ubyte inputSemanticIndex[], - const GLuint interpMode[], + const ubyte interpMode[], GLuint numOutputs, - const GLuint outputMapping[], - const GLuint outputSlotToAttr[], + const ubyte outputMapping[], const ubyte outputSemanticName[], const ubyte outputSemanticIndex[]) { @@ -6081,6 +6496,7 @@ st_translate_program( dst = ureg_DECL_output_layout(ureg, outputSemanticName[slot], outputSemanticIndex[slot], + decl->gs_out_streams, slot, tgsi_usage_mask, decl->array_id, decl->size); for (unsigned j = 0; j < decl->size; ++j) { @@ -6098,9 +6514,14 @@ st_translate_program( } if (procType == PIPE_SHADER_FRAGMENT) { - if (program->shader->info.EarlyFragmentTests) + if (program->shader->Program->info.fs.early_fragment_tests || + program->shader->Program->info.fs.post_depth_coverage) { ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1); + if (program->shader->Program->info.fs.post_depth_coverage) + ureg_property(ureg, TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE, 1); + } + if (proginfo->info.inputs_read & VARYING_BIT_POS) { /* Must do this after setting up t->inputs. */ emit_wpos(st_context(ctx), t, proginfo, ureg, @@ -6257,18 +6678,14 @@ st_translate_program( } } - if (program->shader) { - unsigned num_ubos = program->shader->NumUniformBlocks; - - for (i = 0; i < num_ubos; i++) { - unsigned size = program->shader->UniformBlocks[i]->UniformBufferSize; - unsigned num_const_vecs = (size + 15) / 16; - unsigned first, last; - assert(num_const_vecs > 0); - first = 0; - last = num_const_vecs > 0 ? num_const_vecs - 1 : 0; - ureg_DECL_constant2D(t->ureg, first, last, i + 1); - } + for (i = 0; i < proginfo->info.num_ubos; i++) { + unsigned size = proginfo->sh.UniformBlocks[i]->UniformBufferSize; + unsigned num_const_vecs = (size + 15) / 16; + unsigned first, last; + assert(num_const_vecs > 0); + first = 0; + last = num_const_vecs > 0 ? num_const_vecs - 1 : 0; + ureg_DECL_constant2D(t->ureg, first, last, i + 1); } /* Emit immediate values. @@ -6291,46 +6708,36 @@ st_translate_program( /* texture samplers */ for (i = 0; i < frag_const->MaxTextureImageUnits; i++) { if (program->samplers_used & (1u << i)) { - unsigned type; + unsigned type = st_translate_texture_type(program->sampler_types[i]); t->samplers[i] = ureg_DECL_sampler(ureg, i); - switch (program->sampler_types[i]) { - case GLSL_TYPE_INT: - type = TGSI_RETURN_TYPE_SINT; - break; - case GLSL_TYPE_UINT: - type = TGSI_RETURN_TYPE_UINT; - break; - case GLSL_TYPE_FLOAT: - type = TGSI_RETURN_TYPE_FLOAT; - break; - default: - unreachable("not reached"); - } - ureg_DECL_sampler_view( ureg, i, program->sampler_targets[i], type, type, type, type ); } } - for (i = 0; i < frag_const->MaxAtomicBuffers; i++) { - if (program->buffers_used & (1 << i)) { - t->buffers[i] = ureg_DECL_buffer(ureg, i, true); + /* Declare atomic and shader storage buffers. */ + { + struct gl_program *prog = program->prog; + + for (i = 0; i < prog->info.num_abos; i++) { + unsigned index = prog->sh.AtomicBuffers[i]->Binding; + assert(index < frag_const->MaxAtomicBuffers); + t->buffers[index] = ureg_DECL_buffer(ureg, index, true); } - } - for (; i < frag_const->MaxAtomicBuffers + frag_const->MaxShaderStorageBlocks; - i++) { - if (program->buffers_used & (1 << i)) { - t->buffers[i] = ureg_DECL_buffer(ureg, i, false); + assert(prog->info.num_ssbos <= frag_const->MaxShaderStorageBlocks); + for (i = 0; i < prog->info.num_ssbos; i++) { + unsigned index = frag_const->MaxAtomicBuffers + i; + t->buffers[index] = ureg_DECL_buffer(ureg, index, false); } } if (program->use_shared_memory) t->shared_memory = ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED); - for (i = 0; i < program->shader->NumImages; i++) { + for (i = 0; i < program->shader->Program->info.num_images; i++) { if (program->images_used & (1 << i)) { t->images[i] = ureg_DECL_image(ureg, i, program->image_targets[i], @@ -6353,27 +6760,8 @@ st_translate_program( for (i = program->shader->Stage+1; i <= MESA_SHADER_FRAGMENT; i++) { if (program->shader_program->_LinkedShaders[i]) { - unsigned next; - - switch (i) { - case MESA_SHADER_TESS_CTRL: - next = PIPE_SHADER_TESS_CTRL; - break; - case MESA_SHADER_TESS_EVAL: - next = PIPE_SHADER_TESS_EVAL; - break; - case MESA_SHADER_GEOMETRY: - next = PIPE_SHADER_GEOMETRY; - break; - case MESA_SHADER_FRAGMENT: - next = PIPE_SHADER_FRAGMENT; - break; - default: - assert(0); - continue; - } - - ureg_set_next_shader_processor(ureg, next); + ureg_set_next_shader_processor( + ureg, pipe_shader_type_from_mesa((gl_shader_stage)i)); break; } } @@ -6411,6 +6799,7 @@ get_mesa_program_tgsi(struct gl_context *ctx, &ctx->Const.ShaderCompilerOptions[shader->Stage]; struct pipe_screen *pscreen = ctx->st->pipe->screen; enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(shader->Stage); + unsigned skip_merge_registers; validate_ir_tree(shader->ir); @@ -6430,12 +6819,21 @@ get_mesa_program_tgsi(struct gl_context *ctx, PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED); v->have_fma = pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED); + v->has_tex_txf_lz = pscreen->get_param(pscreen, + PIPE_CAP_TGSI_TEX_TXF_LZ); + + v->variables = _mesa_hash_table_create(v->mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + skip_merge_registers = + pscreen->get_shader_param(pscreen, ptarget, + PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS); _mesa_generate_parameters_list_for_uniforms(shader_program, shader, prog->Parameters); /* Remove reads from output registers. */ - lower_output_reads(shader->Stage, shader->ir); + if (!pscreen->get_param(pscreen, PIPE_CAP_TGSI_CAN_READ_OUTPUTS)) + lower_output_reads(shader->Stage, shader->ir); /* Emit intermediate IR for main(). */ visit_exec_list(shader->ir, v); @@ -6445,10 +6843,10 @@ get_mesa_program_tgsi(struct gl_context *ctx, * optimization passes. */ { int i; - int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp); - int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp); - int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp); - int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp); + int *first_writes = ralloc_array(v->mem_ctx, int, v->next_temp); + int *first_reads = ralloc_array(v->mem_ctx, int, v->next_temp); + int *last_writes = ralloc_array(v->mem_ctx, int, v->next_temp); + int *last_reads = ralloc_array(v->mem_ctx, int, v->next_temp); for (i = 0; i < v->next_temp; i++) { first_writes[i] = -1; @@ -6481,7 +6879,8 @@ get_mesa_program_tgsi(struct gl_context *ctx, while (v->eliminate_dead_code()); v->merge_two_dsts(); - v->merge_registers(); + if (!skip_merge_registers) + v->merge_registers(); v->renumber_registers(); /* Write the END instruction. */ @@ -6533,7 +6932,7 @@ get_mesa_program_tgsi(struct gl_context *ctx, * prog->ParameterValues to get reallocated (e.g., anything that adds a * program constant) has to happen before creating this linkage. */ - _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters); + _mesa_associate_uniform_storage(ctx, shader_program, prog, true); if (!shader_program->data->LinkStatus) { free_glsl_to_tgsi_visitor(v); _mesa_reference_program(ctx, &shader->Program, NULL); @@ -6542,9 +6941,7 @@ get_mesa_program_tgsi(struct gl_context *ctx, struct st_vertex_program *stvp; struct st_fragment_program *stfp; - struct st_geometry_program *stgp; - struct st_tessctrl_program *sttcp; - struct st_tesseval_program *sttep; + struct st_common_program *stp; struct st_compute_program *stcp; switch (shader->Stage) { @@ -6556,17 +6953,11 @@ get_mesa_program_tgsi(struct gl_context *ctx, stfp = (struct st_fragment_program *)prog; stfp->glsl_to_tgsi = v; break; - case MESA_SHADER_GEOMETRY: - stgp = (struct st_geometry_program *)prog; - stgp->glsl_to_tgsi = v; - break; case MESA_SHADER_TESS_CTRL: - sttcp = (struct st_tessctrl_program *)prog; - sttcp->glsl_to_tgsi = v; - break; case MESA_SHADER_TESS_EVAL: - sttep = (struct st_tesseval_program *)prog; - sttep->glsl_to_tgsi = v; + case MESA_SHADER_GEOMETRY: + stp = st_common_program(prog); + stp->glsl_to_tgsi = v; break; case MESA_SHADER_COMPUTE: stcp = (struct st_compute_program *)prog; @@ -6580,173 +6971,55 @@ get_mesa_program_tgsi(struct gl_context *ctx, return prog; } -static void -set_affected_state_flags(uint64_t *states, - struct gl_program *prog, - struct gl_linked_shader *shader, - uint64_t new_constants, - uint64_t new_sampler_views, - uint64_t new_samplers, - uint64_t new_images, - uint64_t new_ubos, - uint64_t new_ssbos, - uint64_t new_atomics) -{ - if (prog->Parameters->NumParameters) - *states |= new_constants; - - if (shader->num_samplers) - *states |= new_sampler_views | new_samplers; - - if (shader->NumImages) - *states |= new_images; - - if (shader->NumUniformBlocks) - *states |= new_ubos; +/* See if there are unsupported control flow statements. */ +class ir_control_flow_info_visitor : public ir_hierarchical_visitor { +private: + const struct gl_shader_compiler_options *options; +public: + ir_control_flow_info_visitor(const struct gl_shader_compiler_options *options) + : options(options), + unsupported(false) + { + } - if (shader->NumShaderStorageBlocks) - *states |= new_ssbos; + virtual ir_visitor_status visit_enter(ir_function *ir) + { + /* Other functions are skipped (same as glsl_to_tgsi). */ + if (strcmp(ir->name, "main") == 0) + return visit_continue; - if (prog->info.num_abos) - *states |= new_atomics; -} + return visit_continue_with_parent; + } -static struct gl_program * -get_mesa_program(struct gl_context *ctx, - struct gl_shader_program *shader_program, - struct gl_linked_shader *shader) -{ - struct pipe_screen *pscreen = ctx->st->pipe->screen; - enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(shader->Stage); - enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir) - pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_PREFERRED_IR); - struct gl_program *prog = NULL; - - if (preferred_ir == PIPE_SHADER_IR_NIR) { - /* TODO only for GLSL VS/FS for now: */ - switch (shader->Stage) { - case MESA_SHADER_VERTEX: - case MESA_SHADER_FRAGMENT: - prog = st_nir_get_mesa_program(ctx, shader_program, shader); - default: - break; + virtual ir_visitor_status visit_enter(ir_call *ir) + { + if (!ir->callee->is_intrinsic()) { + unsupported = true; /* it's a function call */ + return visit_stop; } - } else { - prog = get_mesa_program_tgsi(ctx, shader_program, shader); + return visit_continue; } - if (prog) { - uint64_t *states; - - /* This determines which states will be updated when the shader is - * bound. - */ - switch (shader->Stage) { - case MESA_SHADER_VERTEX: - states = &((struct st_vertex_program*)prog)->affected_states; - - *states = ST_NEW_VS_STATE | - ST_NEW_RASTERIZER | - ST_NEW_VERTEX_ARRAYS; - - set_affected_state_flags(states, prog, shader, - ST_NEW_VS_CONSTANTS, - ST_NEW_VS_SAMPLER_VIEWS, - ST_NEW_RENDER_SAMPLERS, - ST_NEW_VS_IMAGES, - ST_NEW_VS_UBOS, - ST_NEW_VS_SSBOS, - ST_NEW_VS_ATOMICS); - break; - - case MESA_SHADER_TESS_CTRL: - states = &((struct st_tessctrl_program*)prog)->affected_states; - - *states = ST_NEW_TCS_STATE; - - set_affected_state_flags(states, prog, shader, - ST_NEW_TCS_CONSTANTS, - ST_NEW_TCS_SAMPLER_VIEWS, - ST_NEW_RENDER_SAMPLERS, - ST_NEW_TCS_IMAGES, - ST_NEW_TCS_UBOS, - ST_NEW_TCS_SSBOS, - ST_NEW_TCS_ATOMICS); - break; - - case MESA_SHADER_TESS_EVAL: - states = &((struct st_tesseval_program*)prog)->affected_states; - - *states = ST_NEW_TES_STATE | - ST_NEW_RASTERIZER; - - set_affected_state_flags(states, prog, shader, - ST_NEW_TES_CONSTANTS, - ST_NEW_TES_SAMPLER_VIEWS, - ST_NEW_RENDER_SAMPLERS, - ST_NEW_TES_IMAGES, - ST_NEW_TES_UBOS, - ST_NEW_TES_SSBOS, - ST_NEW_TES_ATOMICS); - break; - - case MESA_SHADER_GEOMETRY: - states = &((struct st_geometry_program*)prog)->affected_states; - - *states = ST_NEW_GS_STATE | - ST_NEW_RASTERIZER; - - set_affected_state_flags(states, prog, shader, - ST_NEW_GS_CONSTANTS, - ST_NEW_GS_SAMPLER_VIEWS, - ST_NEW_RENDER_SAMPLERS, - ST_NEW_GS_IMAGES, - ST_NEW_GS_UBOS, - ST_NEW_GS_SSBOS, - ST_NEW_GS_ATOMICS); - break; - - case MESA_SHADER_FRAGMENT: - states = &((struct st_fragment_program*)prog)->affected_states; - - /* gl_FragCoord and glDrawPixels always use constants. */ - *states = ST_NEW_FS_STATE | - ST_NEW_SAMPLE_SHADING | - ST_NEW_FS_CONSTANTS; - - set_affected_state_flags(states, prog, shader, - ST_NEW_FS_CONSTANTS, - ST_NEW_FS_SAMPLER_VIEWS, - ST_NEW_RENDER_SAMPLERS, - ST_NEW_FS_IMAGES, - ST_NEW_FS_UBOS, - ST_NEW_FS_SSBOS, - ST_NEW_FS_ATOMICS); - break; - - case MESA_SHADER_COMPUTE: - states = &((struct st_compute_program*)prog)->affected_states; - - *states = ST_NEW_CS_STATE; - - set_affected_state_flags(states, prog, shader, - ST_NEW_CS_CONSTANTS, - ST_NEW_CS_SAMPLER_VIEWS, - ST_NEW_CS_SAMPLERS, - ST_NEW_CS_IMAGES, - ST_NEW_CS_UBOS, - ST_NEW_CS_SSBOS, - ST_NEW_CS_ATOMICS); - break; - - default: - unreachable("unhandled shader stage"); + virtual ir_visitor_status visit_enter(ir_return *ir) + { + if (options->EmitNoMainReturn) { + unsupported = true; + return visit_stop; } + return visit_continue; } - return prog; -} + bool unsupported; +}; +static bool +has_unsupported_control_flow(exec_list *ir, + const struct gl_shader_compiler_options *options) +{ + ir_control_flow_info_visitor visitor(options); + visit_list_elements(&visitor, ir); + return visitor.unsupported; +} extern "C" { @@ -6759,6 +7032,11 @@ extern "C" { GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) { + /* Return early if we are loading the shader from on-disk cache */ + if (st_load_tgsi_from_disk_cache(ctx, prog)) { + return GL_TRUE; + } + struct pipe_screen *pscreen = ctx->st->pipe->screen; assert(prog->data->LinkStatus); @@ -6766,9 +7044,9 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) if (prog->_LinkedShaders[i] == NULL) continue; - bool progress; - exec_list *ir = prog->_LinkedShaders[i]->ir; - gl_shader_stage stage = prog->_LinkedShaders[i]->Stage; + struct gl_linked_shader *shader = prog->_LinkedShaders[i]; + exec_list *ir = shader->ir; + gl_shader_stage stage = shader->Stage; const struct gl_shader_compiler_options *options = &ctx->Const.ShaderCompilerOptions[stage]; enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(stage); @@ -6784,13 +7062,16 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) */ if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) { - lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir, + lower_variable_index_to_cond_assign(stage, ir, options->EmitNoIndirectInput, options->EmitNoIndirectOutput, options->EmitNoIndirectTemp, options->EmitNoIndirectUniform); } + if (!pscreen->get_param(pscreen, PIPE_CAP_INT64_DIVMOD)) + lower_64bit_integer_instructions(ir, DIV64 | MOD64); + if (ctx->Extensions.ARB_shading_language_packing) { unsigned lower_inst = LOWER_PACK_SNORM_2x16 | LOWER_UNPACK_SNORM_2x16 | @@ -6814,9 +7095,13 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS)) lower_offset_arrays(ir); do_mat_op_to_vec(ir); + + if (stage == MESA_SHADER_FRAGMENT) + lower_blend_equation_advanced(shader); + lower_instructions(ir, MOD_TO_FLOOR | - DIV_TO_MUL_RCP | + FDIV_TO_MUL_RCP | EXP_TO_EXP2 | LOG_TO_LOG2 | LDEXP_TO_ARITH | @@ -6827,6 +7112,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) (options->EmitNoPow ? POW_TO_EXP2 : 0) | (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) | (options->EmitNoSat ? SAT_TO_CLAMP : 0) | + (ctx->Const.ForceGLSLAbsSqrt ? SQRT_TO_ABS_SQRT : 0) | /* Assume that if ARB_gpu_shader5 is not supported * then all of the extended integer functions need * lowering. It may be necessary to add some caps @@ -6850,20 +7136,24 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) lower_discard(ir); } - do { - progress = false; - - progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; - - progress = do_common_optimization(ir, true, true, options, - ctx->Const.NativeIntegers) - || progress; - - progress = lower_if_to_cond_assign((gl_shader_stage)i, ir, - options->MaxIfDepth, if_threshold) || - progress; - - } while (progress); + if (ctx->Const.GLSLOptimizeConservatively) { + /* Do it once and repeat only if there's unsupported control flow. */ + do { + do_common_optimization(ir, true, true, options, + ctx->Const.NativeIntegers); + lower_if_to_cond_assign((gl_shader_stage)i, ir, + options->MaxIfDepth, if_threshold); + } while (has_unsupported_control_flow(ir, options)); + } else { + /* Repeat it until it stops making changes. */ + bool progress; + do { + progress = do_common_optimization(ir, true, true, options, + ctx->Const.NativeIntegers); + progress |= lower_if_to_cond_assign((gl_shader_stage)i, ir, + options->MaxIfDepth, if_threshold); + } while (progress); + } validate_ir_tree(ir); } @@ -6871,19 +7161,37 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) build_program_resource_list(ctx, prog); for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - struct gl_program *linked_prog; - - if (prog->_LinkedShaders[i] == NULL) + struct gl_linked_shader *shader = prog->_LinkedShaders[i]; + if (shader == NULL) continue; - linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); + enum pipe_shader_type ptarget = + st_shader_stage_to_ptarget(shader->Stage); + enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir) + pscreen->get_shader_param(pscreen, ptarget, + PIPE_SHADER_CAP_PREFERRED_IR); + + struct gl_program *linked_prog = NULL; + if (preferred_ir == PIPE_SHADER_IR_NIR) { + /* TODO only for GLSL VS/FS/CS for now: */ + switch (shader->Stage) { + case MESA_SHADER_VERTEX: + case MESA_SHADER_FRAGMENT: + case MESA_SHADER_COMPUTE: + linked_prog = st_nir_get_mesa_program(ctx, prog, shader); + default: + break; + } + } else { + linked_prog = get_mesa_program_tgsi(ctx, prog, shader); + } if (linked_prog) { + st_set_prog_affected_state_flags(linked_prog); if (!ctx->Driver.ProgramStringNotify(ctx, _mesa_shader_stage_to_program(i), linked_prog)) { - _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, - NULL); + _mesa_reference_program(ctx, &shader->Program, NULL); return GL_FALSE; } } @@ -6894,17 +7202,20 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) void st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, - const GLuint outputMapping[], + const ubyte outputMapping[], struct pipe_stream_output_info *so) { + if (!glsl_to_tgsi->shader_program->last_vert_prog) + return; + struct gl_transform_feedback_info *info = - &glsl_to_tgsi->shader_program->LinkedTransformFeedback; + glsl_to_tgsi->shader_program->last_vert_prog->sh.LinkedTransformFeedback; st_translate_stream_output_info2(info, outputMapping, so); } void st_translate_stream_output_info2(struct gl_transform_feedback_info *info, - const GLuint outputMapping[], + const ubyte outputMapping[], struct pipe_stream_output_info *so) { unsigned i;