X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fstate_tracker%2Fst_glsl_to_tgsi.cpp;h=bc032f4c751f0c6092a4dd5cb8e708a425509841;hb=4f8fc0f0665d66bf9320023240ff4b47300dd91d;hp=a45f0047a8ab6b91498f82556e06d13f195e8511;hpb=fbf4c2916c14d95209abc6b64737ad8528e62fae;p=mesa.git diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index a45f0047a8a..bc032f4c751 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -49,12 +49,9 @@ #include "tgsi/tgsi_info.h" #include "util/u_math.h" #include "util/u_memory.h" -#include "st_glsl_types.h" #include "st_program.h" #include "st_mesa_to_tgsi.h" #include "st_format.h" -#include "st_nir.h" -#include "st_shader_cache.h" #include "st_glsl_to_tgsi_temprename.h" #include "util/hash_table.h" @@ -66,6 +63,49 @@ #define MAX_GLSL_TEXTURE_OFFSET 4 +#ifndef NDEBUG +#include "util/u_atomic.h" +#include "util/simple_mtx.h" +#include +#include + +/* Prepare to make it possible to specify log file */ +static std::ofstream stats_log; + +/* Helper function to check whether we want to write some statistics + * of the shader conversion. + */ + +static simple_mtx_t print_stats_mutex = _SIMPLE_MTX_INITIALIZER_NP; + +static inline bool print_stats_enabled () +{ + static int stats_enabled = 0; + + if (!stats_enabled) { + simple_mtx_lock(&print_stats_mutex); + if (!stats_enabled) { + const char *stats_filename = getenv("GLSL_TO_TGSI_PRINT_STATS"); + if (stats_filename) { + bool write_header = std::ifstream(stats_filename).fail(); + stats_log.open(stats_filename, std::ios_base::out | std::ios_base::app); + stats_enabled = stats_log.good() ? 1 : -1; + if (write_header) + stats_log << "arrays,temps,temps in arrays,total,instructions\n"; + } else { + stats_enabled = -1; + } + } + simple_mtx_unlock(&print_stats_mutex); + } + return stats_enabled > 0; +} +#define PRINT_STATS(X) if (print_stats_enabled()) do { X; } while (false); +#else +#define PRINT_STATS(X) +#endif + + static unsigned is_precise(const ir_variable *ir) { if (!ir) @@ -98,7 +138,7 @@ public: class immediate_storage : public exec_node { public: - immediate_storage(gl_constant_value *values, int size32, int type) + immediate_storage(gl_constant_value *values, int size32, GLenum type) { memcpy(this->values, values, size32 * sizeof(gl_constant_value)); this->size32 = size32; @@ -108,7 +148,7 @@ public: /* doubles are stored across 2 gl_constant_values */ gl_constant_value values[4]; int size32; /**< Number of 32-bit components (1-4) */ - int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ + GLenum type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ }; static const st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); @@ -123,6 +163,7 @@ struct inout_decl { enum glsl_interp_mode interp; enum glsl_base_type base_type; ubyte usage_mask; /* GLSL-style usage-mask, i.e. single bit per double */ + bool invariant; }; static struct inout_decl * @@ -152,6 +193,13 @@ find_array_type(struct inout_decl *decls, unsigned count, unsigned array_id) return GLSL_TYPE_ERROR; } +struct hwatomic_decl { + unsigned location; + unsigned binding; + unsigned size; + unsigned array_id; +}; + struct glsl_to_tgsi_visitor : public ir_visitor { public: glsl_to_tgsi_visitor(); @@ -176,17 +224,20 @@ public: unsigned num_outputs; unsigned num_output_arrays; + struct hwatomic_decl atomic_info[PIPE_MAX_HW_ATOMIC_BUFFERS]; + unsigned num_atomics; + unsigned num_atomic_arrays; int num_address_regs; uint32_t samplers_used; glsl_base_type sampler_types[PIPE_MAX_SAMPLERS]; - int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */ + enum tgsi_texture_type sampler_targets[PIPE_MAX_SAMPLERS]; int images_used; - int image_targets[PIPE_MAX_SHADER_IMAGES]; - unsigned image_formats[PIPE_MAX_SHADER_IMAGES]; + enum tgsi_texture_type image_targets[PIPE_MAX_SHADER_IMAGES]; + enum pipe_format image_formats[PIPE_MAX_SHADER_IMAGES]; + bool image_wr[PIPE_MAX_SHADER_IMAGES]; bool indirect_addr_consts; int wpos_transform_const; - int glsl_version; bool native_integers; bool have_sqrt; bool have_fma; @@ -194,11 +245,12 @@ public: bool has_tex_txf_lz; bool precise; bool need_uarl; + bool tg4_component_in_swizzle; variable_storage *find_variable_storage(ir_variable *var); int add_constant(gl_register_file file, gl_constant_value values[8], - int size, int datatype, uint16_t *swizzle_out); + int size, GLenum datatype, uint16_t *swizzle_out); st_src_reg get_temp(const glsl_type *type); void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); @@ -232,6 +284,7 @@ public: virtual void visit(ir_call *); virtual void visit(ir_return *); virtual void visit(ir_discard *); + virtual void visit(ir_demote *); virtual void visit(ir_texture *); virtual void visit(ir_if *); virtual void visit(ir_emit_vertex *); @@ -246,7 +299,7 @@ public: void visit_membar_intrinsic(ir_call *); void visit_shared_intrinsic(ir_call *); void visit_image_intrinsic(ir_call *); - void visit_generic_intrinsic(ir_call *, unsigned op); + void visit_generic_intrinsic(ir_call *, enum tgsi_opcode op); st_src_reg result; @@ -260,23 +313,23 @@ public: /** List of glsl_to_tgsi_instruction */ exec_list instructions; - glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, + glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, enum tgsi_opcode op, st_dst_reg dst = undef_dst, st_src_reg src0 = undef_src, st_src_reg src1 = undef_src, st_src_reg src2 = undef_src, st_src_reg src3 = undef_src); - glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, + glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, enum tgsi_opcode op, st_dst_reg dst, st_dst_reg dst1, st_src_reg src0 = undef_src, st_src_reg src1 = undef_src, st_src_reg src2 = undef_src, st_src_reg src3 = undef_src); - unsigned get_opcode(unsigned op, - st_dst_reg dst, - st_src_reg src0, st_src_reg src1); + enum tgsi_opcode get_opcode(enum tgsi_opcode op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1); /** * Emit the correct dot-product instruction for the type of arguments @@ -287,10 +340,10 @@ public: st_src_reg src1, unsigned elements); - void emit_scalar(ir_instruction *ir, unsigned op, + void emit_scalar(ir_instruction *ir, enum tgsi_opcode op, st_dst_reg dst, st_src_reg src0); - void emit_scalar(ir_instruction *ir, unsigned op, + void emit_scalar(ir_instruction *ir, enum tgsi_opcode op, st_dst_reg dst, st_src_reg src0, st_src_reg src1); void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); @@ -307,6 +360,7 @@ public: st_src_reg *indirect, unsigned *location); st_src_reg canonicalize_gather_offset(st_src_reg offset); + bool handle_bound_deref(ir_dereference *ir); bool try_emit_mad(ir_expression *ir, int mul_operand); @@ -328,6 +382,7 @@ public: void copy_propagate(void); int eliminate_dead_code(void); + void split_arrays(void); void merge_two_dsts(void); void merge_registers(void); void renumber_registers(void); @@ -336,15 +391,21 @@ public: st_dst_reg *l, st_src_reg *r, st_src_reg *cond, bool cond_swap); + void print_stats(); + void *mem_ctx; }; -static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0); -static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1); -static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2); +static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, + GLSL_TYPE_FLOAT, 0); +static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, + GLSL_TYPE_FLOAT, 1); +static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, + GLSL_TYPE_FLOAT, 2); static void -fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); +fail_link(struct gl_shader_program *prog, const char *fmt, ...) + PRINTFLIKE(2, 3); static void fail_link(struct gl_shader_program *prog, const char *fmt, ...) @@ -354,7 +415,7 @@ fail_link(struct gl_shader_program *prog, const char *fmt, ...) ralloc_vasprintf_append(&prog->data->InfoLog, fmt, args); va_end(args); - prog->data->LinkStatus = linking_failure; + prog->data->LinkStatus = LINKING_FAILURE; } int @@ -373,7 +434,7 @@ swizzle_for_size(int size) glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, +glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, enum tgsi_opcode op, st_dst_reg dst, st_dst_reg dst1, st_src_reg src0, st_src_reg src1, st_src_reg src2, st_src_reg src3) @@ -436,7 +497,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, /* Update indirect addressing status used by TGSI */ if (dst.reladdr || dst.reladdr2) { - switch(dst.file) { + switch (dst.file) { case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: case PROGRAM_UNIFORM: @@ -451,8 +512,8 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, } else { for (i = 0; i < 4; i++) { - if(inst->src[i].reladdr) { - switch(inst->src[i].file) { + if (inst->src[i].reladdr) { + switch (inst->src[i].file) { case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: case PROGRAM_UNIFORM: @@ -488,8 +549,11 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, */ for (j = 0; j < 2; j++) { dst_is_64bit[j] = glsl_base_type_is_64bit(inst->dst[j].type); - if (!dst_is_64bit[j] && inst->dst[j].file == PROGRAM_OUTPUT && inst->dst[j].type == GLSL_TYPE_ARRAY) { - enum glsl_base_type type = find_array_type(this->outputs, this->num_outputs, inst->dst[j].array_id); + if (!dst_is_64bit[j] && inst->dst[j].file == PROGRAM_OUTPUT && + inst->dst[j].type == GLSL_TYPE_ARRAY) { + enum glsl_base_type type = find_array_type(this->outputs, + this->num_outputs, + inst->dst[j].array_id); if (glsl_base_type_is_64bit(type)) dst_is_64bit[j] = true; } @@ -501,7 +565,8 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, int initial_src_swz[4], initial_src_idx[4]; int initial_dst_idx[2], initial_dst_writemask[2]; /* select the writemask for dst0 or dst1 */ - unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED ? inst->dst[0].writemask : inst->dst[1].writemask; + unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED + ? inst->dst[0].writemask : inst->dst[1].writemask; /* copy out the writemask, index and swizzles for all src/dsts. */ for (j = 0; j < 2; j++) { @@ -523,9 +588,10 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, int i = u_bit_scan(&writemask); - /* before emitting the instruction, see if we have to adjust load / store - * address */ - if (i > 1 && (inst->op == TGSI_OPCODE_LOAD || inst->op == TGSI_OPCODE_STORE) && + /* before emitting the instruction, see if we have to adjust + * load / store address */ + if (i > 1 && (inst->op == TGSI_OPCODE_LOAD || + inst->op == TGSI_OPCODE_STORE) && addr.file == PROGRAM_UNDEFINED) { /* We have to advance the buffer address by 16 */ addr = get_temp(glsl_type::uint_type); @@ -552,14 +618,16 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY; dinst->dst[j].index = initial_dst_idx[j]; if (i > 1) { - if (dinst->op == TGSI_OPCODE_LOAD || dinst->op == TGSI_OPCODE_STORE) + if (dinst->op == TGSI_OPCODE_LOAD || + dinst->op == TGSI_OPCODE_STORE) dinst->src[0] = addr; if (dinst->op != TGSI_OPCODE_STORE) dinst->dst[j].index++; } } else { - /* if we aren't writing to a double, just get the bit of the initial writemask - for this channel */ + /* if we aren't writing to a double, just get the bit of the + * initial writemask for this channel + */ dinst->dst[j].writemask = initial_dst_writemask[j] & (1 << i); } } @@ -576,14 +644,19 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, } if (swz & 1) - dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); + dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, + SWIZZLE_Z, SWIZZLE_W); else - dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y); + dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, + SWIZZLE_X, SWIZZLE_Y); } else { /* some opcodes are special case in what they use as sources - - [FUI]2D/[UI]2I64 is a float/[u]int src0, (D)LDEXP is integer src1 */ - if (op == TGSI_OPCODE_F2D || op == TGSI_OPCODE_U2D || op == TGSI_OPCODE_I2D || + * - [FUI]2D/[UI]2I64 is a float/[u]int src0, (D)LDEXP is + * integer src1 + */ + if (op == TGSI_OPCODE_F2D || op == TGSI_OPCODE_U2D || + op == TGSI_OPCODE_I2D || op == TGSI_OPCODE_I2I64 || op == TGSI_OPCODE_U2I64 || op == TGSI_OPCODE_DLDEXP || op == TGSI_OPCODE_LDEXP || (op == TGSI_OPCODE_UCMP && dst_is_64bit[0])) { @@ -602,7 +675,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, } glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, +glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, enum tgsi_opcode op, st_dst_reg dst, st_src_reg src0, st_src_reg src1, st_src_reg src2, st_src_reg src3) @@ -614,8 +687,8 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, * Determines whether to use an integer, unsigned integer, or float opcode * based on the operands and input opcode, then emits the result. */ -unsigned -glsl_to_tgsi_visitor::get_opcode(unsigned op, +enum tgsi_opcode +glsl_to_tgsi_visitor::get_opcode(enum tgsi_opcode op, st_dst_reg dst, st_src_reg src0, st_src_reg src1) { @@ -676,7 +749,7 @@ glsl_to_tgsi_visitor::get_opcode(unsigned op, op = TGSI_OPCODE_##c; \ break; - switch(op) { + switch (op) { /* Some instructions are initially selected without considering the type. * This fixes the type: * @@ -707,13 +780,15 @@ glsl_to_tgsi_visitor::get_opcode(unsigned op, case7(ISHR, LAST, ISHR, USHR, LAST, I64SHR, U64SHR); case7(ATOMIMAX,LAST, ATOMIMAX,ATOMUMAX,LAST, LAST, LAST); case7(ATOMIMIN,LAST, ATOMIMIN,ATOMUMIN,LAST, LAST, LAST); + case7(ATOMUADD,ATOMFADD,ATOMUADD,ATOMUADD,LAST, LAST, LAST); casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ, U64SEQ, U64SEQ); casecomp(SNE, FSNE, USNE, USNE, DSNE, U64SNE, U64SNE); casecomp(SGE, FSGE, ISGE, USGE, DSGE, I64SGE, U64SGE); casecomp(SLT, FSLT, ISLT, USLT, DSLT, I64SLT, U64SLT); - default: break; + default: + break; } assert(op != TGSI_OPCODE_LAST); @@ -725,7 +800,7 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, st_dst_reg dst, st_src_reg src0, st_src_reg src1, unsigned elements) { - static const unsigned dot_opcodes[] = { + static const enum tgsi_opcode dot_opcodes[] = { TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 }; @@ -741,7 +816,7 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, * to produce dest channels. */ void -glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, +glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum tgsi_opcode op, st_dst_reg dst, st_src_reg orig_src0, st_src_reg orig_src1) { @@ -785,7 +860,7 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, } void -glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, +glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum tgsi_opcode op, st_dst_reg dst, st_src_reg src0) { st_src_reg undef = undef_src; @@ -799,7 +874,7 @@ void glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0) { - int op = TGSI_OPCODE_ARL; + enum tgsi_opcode op = TGSI_OPCODE_ARL; if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT) { if (!this->need_uarl && src0.is_legal_tgsi_address_operand()) @@ -817,13 +892,15 @@ glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, int glsl_to_tgsi_visitor::add_constant(gl_register_file file, - gl_constant_value values[8], int size, int datatype, + gl_constant_value values[8], int size, + GLenum datatype, uint16_t *swizzle_out) { if (file == PROGRAM_CONSTANT) { GLuint swizzle = swizzle_out ? *swizzle_out : 0; - int result = _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, - size, datatype, &swizzle); + int result = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + values, size, datatype, + &swizzle); if (swizzle_out) *swizzle_out = swizzle; return result; @@ -835,7 +912,7 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file, immediate_storage *entry; int size32 = size * ((datatype == GL_DOUBLE || datatype == GL_INT64_ARB || - datatype == GL_UNSIGNED_INT64_ARB)? 2 : 1); + datatype == GL_UNSIGNED_INT64_ARB) ? 2 : 1); int i; /* Search immediate storage to see if we already have an identical @@ -866,7 +943,8 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file, for (i = 0; i * 4 < size32; i++) { int slot_size = MIN2(size32 - (i * 4), 4); /* Add this immediate to the list. */ - entry = new(mem_ctx) immediate_storage(&values[i * 4], slot_size, datatype); + entry = new(mem_ctx) immediate_storage(&values[i * 4], + slot_size, datatype); this->immediates.push_tail(entry); this->num_immediates++; } @@ -954,7 +1032,7 @@ add_buffer_to_load_and_stores(glsl_to_tgsi_instruction *inst, st_src_reg *buf, * emit_asm() might have actually split the op into pieces, e.g. for * double stores. We have to go back and fix up all the generated ops. */ - unsigned op = inst->op; + enum tgsi_opcode op = inst->op; do { inst->resource = *buf; if (access) @@ -987,7 +1065,7 @@ type_has_array_or_matrix(const glsl_type *type) if (type->is_array() || type->is_matrix()) return true; - if (type->is_record()) { + if (type->is_struct()) { for (unsigned i = 0; i < type->length; i++) { if (type_has_array_or_matrix(type->fields.structure[i].type)) { return true; @@ -1033,7 +1111,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) next_temp += type_size(type); } - if (type->is_array() || type->is_record()) { + if (type->is_array() || type->is_struct()) { src.swizzle = SWIZZLE_NOOP; } else { src.swizzle = swizzle_for_size(type->vector_elements); @@ -1057,11 +1135,6 @@ glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) void glsl_to_tgsi_visitor::visit(ir_variable *ir) { - if (strcmp(ir->name, "gl_FragCoord") == 0) { - this->prog->OriginUpperLeft = ir->data.origin_upper_left; - this->prog->PixelCenterInteger = ir->data.pixel_center_integer; - } - if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { unsigned int i; const ir_state_slot *const slots = ir->get_state_slots(); @@ -1106,7 +1179,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) { int index = _mesa_add_state_reference(this->prog->Parameters, - (gl_state_index *)slots[i].tokens); + slots[i].tokens); if (storage->file == PROGRAM_STATE_VAR) { if (storage->index == -1) { @@ -1196,6 +1269,10 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) st_src_reg a, b, c; st_dst_reg result_dst; + // there is no TGSI opcode for this + if (ir->type->is_integer_64()) + return false; + ir_expression *expr = ir->operands[mul_operand]->as_expression(); if (!expr || expr->operation != ir_binop_mul) return false; @@ -1233,7 +1310,8 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) * instruction. */ bool -glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) +glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, + int try_operand) { const int other_operand = 1 - try_operand; st_src_reg a, b; @@ -1262,8 +1340,10 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, if (!reg->reladdr && !reg->reladdr2) return; - if (reg->reladdr) emit_arl(ir, address_reg, *reg->reladdr); - if (reg->reladdr2) emit_arl(ir, address_reg2, *reg->reladdr2); + if (reg->reladdr) + emit_arl(ir, address_reg, *reg->reladdr); + if (reg->reladdr2) + emit_arl(ir, address_reg2, *reg->reladdr2); if (*num_reladdr != 1) { st_src_reg temp = get_temp(glsl_type::get_instance(reg->type, 4, 1)); @@ -1332,10 +1412,33 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) st_dst_reg result_dst; int vector_elements = ir->operands[0]->type->vector_elements; - if (ir->operands[1]) { + if (ir->operands[1] && + ir->operation != ir_binop_interpolate_at_offset && + ir->operation != ir_binop_interpolate_at_sample) { + st_src_reg *swz_op = NULL; + if (vector_elements > ir->operands[1]->type->vector_elements) { + assert(ir->operands[1]->type->vector_elements == 1); + swz_op = &op[1]; + } else if (vector_elements < ir->operands[1]->type->vector_elements) { + assert(ir->operands[0]->type->vector_elements == 1); + swz_op = &op[0]; + } + if (swz_op) { + uint16_t swizzle_x = GET_SWZ(swz_op->swizzle, 0); + swz_op->swizzle = MAKE_SWIZZLE4(swizzle_x, swizzle_x, + swizzle_x, swizzle_x); + } vector_elements = MAX2(vector_elements, ir->operands[1]->type->vector_elements); } + if (ir->operands[2] && + ir->operands[2]->type->vector_elements != vector_elements) { + /* This can happen with ir_triop_lrp, i.e. glsl mix */ + assert(ir->operands[2]->type->vector_elements == 1); + uint16_t swizzle_x = GET_SWZ(op[2].swizzle, 0); + op[2].swizzle = MAKE_SWIZZLE4(swizzle_x, swizzle_x, + swizzle_x, swizzle_x); + } this->result.file = PROGRAM_UNDEFINED; @@ -1362,13 +1465,16 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) * 0.0 and 1.0, 1-x also implements !x. */ op[0].negate = ~op[0].negate; - emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); + emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], + st_src_reg_for_float(1.0)); } break; case ir_unop_neg: - if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64) + if (result_dst.type == GLSL_TYPE_INT64 || + result_dst.type == GLSL_TYPE_UINT64) emit_asm(ir, TGSI_OPCODE_I64NEG, result_dst, op[0]); - else if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT) + else if (result_dst.type == GLSL_TYPE_INT || + result_dst.type == GLSL_TYPE_UINT) emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]); else if (result_dst.type == GLSL_TYPE_DOUBLE) emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]); @@ -1385,7 +1491,8 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0].get_abs()); else if (result_dst.type == GLSL_TYPE_DOUBLE) emit_asm(ir, TGSI_OPCODE_DABS, result_dst, op[0]); - else if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64) + else if (result_dst.type == GLSL_TYPE_INT64 || + result_dst.type == GLSL_TYPE_UINT64) emit_asm(ir, TGSI_OPCODE_I64ABS, result_dst, op[0]); else emit_asm(ir, TGSI_OPCODE_IABS, result_dst, op[0]); @@ -1437,7 +1544,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) * is a FBO or the window system buffer, respectively. * It is then multiplied with the source operand of DDY. */ - static const gl_state_index transform_y_state[STATE_LENGTH] + static const gl_state_index16 transform_y_state[STATE_LENGTH] = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM }; unsigned transform_y_index = @@ -1465,17 +1572,6 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_asm(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]); break; - case ir_unop_noise: { - /* At some point, a motivated person could add a better - * implementation of noise. Currently not even the nvidia - * binary drivers do anything more than this. In any case, the - * place to do this is in the GL state tracker, not the poor - * driver. - */ - emit_asm(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); - break; - } - case ir_binop_add: emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); break; @@ -1500,12 +1596,6 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) case ir_binop_less: emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); break; - case ir_binop_greater: - emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]); - break; - case ir_binop_lequal: - emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]); - break; case ir_binop_gequal: emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); break; @@ -1573,7 +1663,8 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) */ st_src_reg sge_src = result_src; sge_src.negate = ~sge_src.negate; - emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); + emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src, + st_src_reg_for_float(0.0)); } } else { emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); @@ -1634,13 +1725,14 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) */ dp->saturate = true; } else { - /* Negating the result of the dot-product gives values on the range - * [-4, 0]. Zero stays zero, and negative values become 1.0. This - * achieved using SLT. + /* Negating the result of the dot-product gives values on the + * range [-4, 0]. Zero stays zero, and negative values become + * 1.0. This achieved using SLT. */ st_src_reg slt_src = result_src; slt_src.negate = ~slt_src.negate; - emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, + st_src_reg_for_float(0.0)); } } } else { @@ -1670,17 +1762,19 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { /* The clamping to [0,1] can be done for free in the fragment - * shader with a saturate if floats are being used as boolean values. + * shader with a saturate if floats are being used as boolean + * values. */ add->saturate = true; } else { /* Negating the result of the addition gives values on the range - * [-2, 0]. Zero stays zero, and negative values become 1.0. This - * is achieved using SLT. + * [-2, 0]. Zero stays zero, and negative values become 1.0 + * This is achieved using SLT. */ st_src_reg slt_src = result_src; slt_src.negate = ~slt_src.negate; - emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, + st_src_reg_for_float(0.0)); } } break; @@ -1729,7 +1823,8 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) /* fallthrough to next case otherwise */ case ir_unop_b2f: if (native_integers) { - emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0)); + emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], + st_src_reg_for_float(1.0)); break; } /* fallthrough to next case otherwise */ @@ -1747,7 +1842,8 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) * GLSL requires that int(bool) return 1 for true and 0 for false. * This conversion is done with AND, but it could be done with NEG. */ - emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1)); + emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], + st_src_reg_for_int(1)); } else { /* Booleans and integers are both stored as floats when native * integers are disabled. @@ -1783,16 +1879,20 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) result_src.type = GLSL_TYPE_FLOAT; break; case ir_unop_f2b: - emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); + emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], + st_src_reg_for_float(0.0)); break; case ir_unop_d2b: - emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0)); + emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], + st_src_reg_for_double(0.0)); break; case ir_unop_i2b: if (native_integers) - emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0)); + emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0], + st_src_reg_for_int(0)); else - emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); + emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], + st_src_reg_for_float(0.0)); break; case ir_unop_bitcast_u642d: case ir_unop_bitcast_i642d: @@ -1838,23 +1938,26 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]); break; } + /* fallthrough */ case ir_unop_u2f: if (native_integers) { emit_asm(ir, TGSI_OPCODE_U2F, result_dst, op[0]); break; } + /* fallthrough */ case ir_binop_lshift: case ir_binop_rshift: if (native_integers) { - unsigned opcode = ir->operation == ir_binop_lshift ? TGSI_OPCODE_SHL - : TGSI_OPCODE_ISHR; + enum tgsi_opcode opcode = ir->operation == ir_binop_lshift + ? TGSI_OPCODE_SHL : TGSI_OPCODE_ISHR; st_src_reg count; if (glsl_base_type_is_64bit(op[0].type)) { /* GLSL shift operations have 32-bit shift counts, but TGSI uses * 64 bits. */ - count = get_temp(glsl_type::u64vec(ir->operands[1]->type->components())); + count = get_temp(glsl_type::u64vec(ir->operands[1] + ->type->components())); emit_asm(ir, TGSI_OPCODE_U2I64, st_dst_reg(count), op[1]); } else { count = op[1]; @@ -1863,16 +1966,19 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_asm(ir, opcode, result_dst, op[0], count); break; } + /* fallthrough */ case ir_binop_bit_and: if (native_integers) { emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); break; } + /* fallthrough */ case ir_binop_bit_xor: if (native_integers) { emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); break; } + /* fallthrough */ case ir_binop_bit_or: if (native_integers) { emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); @@ -1971,7 +2077,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), offset, st_src_reg_for_int(4)); cbuf.reladdr = ralloc(mem_ctx, st_src_reg); - memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg)); + *cbuf.reladdr = index_reg; } if (const_uniform_block) { @@ -1980,7 +2086,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) } else { /* Relative/variable constant buffer */ cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg); - memcpy(cbuf.reladdr2, &op[0], sizeof(st_src_reg)); + *cbuf.reladdr2 = op[0]; } cbuf.has_index2 = true; @@ -2050,7 +2156,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) break; case ir_binop_interpolate_at_offset: { /* The y coordinate needs to be flipped for the default fb */ - static const gl_state_index transform_y_state[STATE_LENGTH] + static const gl_state_index16 transform_y_state[STATE_LENGTH] = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM }; unsigned transform_y_index = @@ -2127,8 +2233,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) ir_constant *const_offset = ir->operands[0]->as_constant(); st_src_reg buffer( PROGRAM_BUFFER, - ctx->Const.Program[shader->Stage].MaxAtomicBuffers + - (const_offset ? const_offset->value.u[0] : 0), + const_offset ? const_offset->value.u[0] : 0, GLSL_TYPE_UINT); if (!const_offset) { buffer.reladdr = ralloc(mem_ctx, st_src_reg); @@ -2180,12 +2285,15 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) temp_dst.writemask = WRITEMASK_X; if (vector_elements > 3) temp_dst.writemask |= WRITEMASK_Z; - op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 2), GET_SWZ(orig_swz, 2), - GET_SWZ(orig_swz, 3), GET_SWZ(orig_swz, 3)); + op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 2), + GET_SWZ(orig_swz, 2), + GET_SWZ(orig_swz, 3), + GET_SWZ(orig_swz, 3)); if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64) emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]); else - emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1)); + emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], + st_src_reg_for_int(1)); } break; } @@ -2203,9 +2311,11 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) for (el = 0; el < vector_elements; el++) { unsigned swz = GET_SWZ(orig_swz, el); if (swz & 1) - op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z); + op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, + SWIZZLE_Z, SWIZZLE_Z); else - op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); + op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, + SWIZZLE_X, SWIZZLE_X); if (swz > 2) op[0].index = orig_idx + 1; op[0].type = GLSL_TYPE_UINT; @@ -2220,7 +2330,8 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) break; } case ir_unop_i642b: - emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], st_src_reg_for_int64(0)); + emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], + st_src_reg_for_int64(0)); break; case ir_unop_i642f: emit_asm(ir, TGSI_OPCODE_I642F, result_dst, op[0]); @@ -2269,6 +2380,24 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) case ir_binop_carry: case ir_binop_borrow: case ir_unop_ssbo_unsized_array_length: + case ir_unop_atan: + case ir_binop_atan2: + case ir_unop_clz: + case ir_binop_add_sat: + case ir_binop_sub_sat: + case ir_binop_abs_sub: + case ir_binop_avg: + case ir_binop_avg_round: + case ir_binop_mul_32x16: + case ir_unop_f162f: + case ir_unop_f2f16: + case ir_unop_f2fmp: + case ir_unop_f162b: + case ir_unop_b2f16: + case ir_unop_i2i: + case ir_unop_i2imp: + case ir_unop_u2u: + case ir_unop_u2ump: /* This operation is not supported, or should have already been handled. */ assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()"); @@ -2284,7 +2413,7 @@ glsl_to_tgsi_visitor::visit(ir_swizzle *ir) { st_src_reg src; int i; - int swizzle[4]; + int swizzle[4] = {0}; /* Note that this is only swizzles in expressions, not those on the left * hand side of an assignment, which do write masking. See ir_assignment @@ -2368,10 +2497,15 @@ st_translate_interp_loc(ir_variable *var) void glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) { - variable_storage *entry = find_variable_storage(ir->var); + variable_storage *entry; ir_variable *var = ir->var; bool remove_array; + if (handle_bound_deref(ir->as_dereference())) + return; + + entry = find_variable_storage(ir->var); + if (!entry) { switch (var->data.mode) { case ir_var_uniform: @@ -2437,6 +2571,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) unsigned num_components; num_outputs++; + decl->invariant = var->data.invariant; + if (type_without_array->is_64bit()) component = component / 2; if (type_without_array->vector_elements) @@ -2590,6 +2726,42 @@ shrink_array_declarations(struct inout_decl *decls, unsigned count, } } + +static void +mark_array_io(struct inout_decl *decls, unsigned count, + GLbitfield64* usage_mask, + GLbitfield64 double_usage_mask, + GLbitfield* patch_usage_mask) +{ + unsigned i; + int j; + + /* Fix array declarations by removing unused array elements at both ends + * of the arrays. For example, mat4[3] where only mat[1] is used. + */ + for (i = 0; i < count; i++) { + struct inout_decl *decl = &decls[i]; + if (!decl->array_id) + continue; + + /* When not all entries of an array are accessed, we mark them as used + * here anyway, to ensure that the input/output mapping logic doesn't get + * confused. + * + * TODO This happens when an array isn't used via indirect access, which + * some game ports do (at least eON-based). There is an optimization + * opportunity here by replacing the array declaration with non-array + * declarations of those slots that are actually used. + */ + for (j = 0; j < (int)decl->size; ++j) { + if (decl->mesa_index >= VARYING_SLOT_PATCH0) + *patch_usage_mask |= BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j); + else + *usage_mask |= BITFIELD64_BIT(decl->mesa_index + j); + } + } +} + void glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) { @@ -2598,12 +2770,15 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) bool is_2D = false; ir_variable *var = ir->variable_referenced(); - /* We only need the logic provided by st_glsl_storage_type_size() + if (handle_bound_deref(ir->as_dereference())) + return; + + /* We only need the logic provided by count_vec4_slots() * for arrays of structs. Indirect sampler and image indexing is handled * elsewhere. */ - int element_size = ir->type->without_array()->is_record() ? - st_glsl_storage_type_size(ir->type, var->data.bindless) : + int element_size = ir->type->without_array()->is_struct() ? + ir->type->count_vec4_slots(false, var->data.bindless) : type_size(ir->type); index = ir->array_index->constant_expression_value(ralloc_parent(ir)); @@ -2674,12 +2849,12 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) if (is_2D) { src.reladdr2 = ralloc(mem_ctx, st_src_reg); - memcpy(src.reladdr2, &index_reg, sizeof(index_reg)); + *src.reladdr2 = index_reg; src.index2D = 0; src.has_index2 = true; } else { src.reladdr = ralloc(mem_ctx, st_src_reg); - memcpy(src.reladdr, &index_reg, sizeof(index_reg)); + *src.reladdr = index_reg; } } @@ -2697,6 +2872,9 @@ glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) ir_variable *var = ir->record->variable_referenced(); int offset = 0; + if (handle_bound_deref(ir->as_dereference())) + return; + ir->record->accept(this); assert(ir->field_idx >= 0); @@ -2705,7 +2883,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) if (i == (unsigned) ir->field_idx) break; const glsl_type *member_type = struct_type->fields.structure[i].type; - offset += st_glsl_storage_type_size(member_type, var->data.bindless); + offset += member_type->count_vec4_slots(false, var->data.bindless); } /* If the type is smaller than a vec4, replicate the last channel out. */ @@ -2808,10 +2986,6 @@ glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) /* a is - 0 + - 0 + * (a < 0) T F F ( a < 0) T F F * (0 < a) F F T (-a < 0) F F T - * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) - * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) - * (a > 0) F F T (-a < 0) F F T - * (0 > a) T F F ( a < 0) T F F * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) * @@ -2825,16 +2999,6 @@ glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) negate = zero_on_left; break; - case ir_binop_greater: - switch_order = false; - negate = !zero_on_left; - break; - - case ir_binop_lequal: - switch_order = true; - negate = !zero_on_left; - break; - case ir_binop_gequal: switch_order = true; negate = zero_on_left; @@ -2868,7 +3032,7 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type * st_dst_reg *l, st_src_reg *r, st_src_reg *cond, bool cond_swap) { - if (type->is_record()) { + if (type->is_struct()) { for (unsigned int i = 0; i < type->length; i++) { emit_block_mov(ir, type->fields.structure[i].type, l, r, cond, cond_swap); @@ -2886,7 +3050,8 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type * if (type->is_matrix()) { const struct glsl_type *vec_type; - vec_type = glsl_type::get_instance(type->is_double() ? GLSL_TYPE_DOUBLE : GLSL_TYPE_FLOAT, + vec_type = glsl_type::get_instance(type->is_double() + ? GLSL_TYPE_DOUBLE : GLSL_TYPE_FLOAT, type->vector_elements, 1); for (int i = 0; i < type->matrix_columns; i++) { @@ -2904,7 +3069,8 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type * if (l_src.file == PROGRAM_OUTPUT && this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && - (l_src.index == FRAG_RESULT_DEPTH || l_src.index == FRAG_RESULT_STENCIL)) { + (l_src.index == FRAG_RESULT_DEPTH || + l_src.index == FRAG_RESULT_STENCIL)) { /* This is a special case because the source swizzles will be shifted * later to account for the difference between GLSL (where they're * plain floats) and TGSI (where they're Z and Y components). */ @@ -2968,7 +3134,8 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) } else if (ir->write_mask == 0) { assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); - unsigned num_elements = ir->lhs->type->without_array()->vector_elements; + unsigned num_elements = + ir->lhs->type->without_array()->vector_elements; if (num_elements) { l.writemask = u_bit_consecutive(0, num_elements); @@ -3044,7 +3211,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) GLdouble stack_vals[4] = { 0 }; gl_constant_value *values = (gl_constant_value *) stack_vals; GLenum gl_type = GL_NONE; - unsigned int i; + unsigned int i, elements; static int in_array = 0; gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; @@ -3053,7 +3220,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) * aggregate constant and move each constant value into it. If we * get lucky, copy propagation will eliminate the extra moves. */ - if (ir->type->is_record()) { + if (ir->type->is_struct()) { st_src_reg temp_base = get_temp(ir->type); st_dst_reg temp = st_dst_reg(temp_base); @@ -3107,7 +3274,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) for (i = 0; i < ir->type->matrix_columns; i++) { switch (ir->type->base_type) { case GLSL_TYPE_FLOAT: - values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; + values = (gl_constant_value *) + &ir->value.f[i * ir->type->vector_elements]; src = st_src_reg(file, -1, ir->type->base_type); src.index = add_constant(file, @@ -3118,7 +3286,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); break; case GLSL_TYPE_DOUBLE: - values = (gl_constant_value *) &ir->value.d[i * ir->type->vector_elements]; + values = (gl_constant_value *) + &ir->value.d[i * ir->type->vector_elements]; src = st_src_reg(file, -1, ir->type->base_type); src.index = add_constant(file, values, @@ -3127,22 +3296,26 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) &src.swizzle); if (ir->type->vector_elements >= 2) { mat_column.writemask = WRITEMASK_XY; - src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y); + src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, + SWIZZLE_X, SWIZZLE_Y); emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); } else { mat_column.writemask = WRITEMASK_X; - src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); + src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, + SWIZZLE_X, SWIZZLE_X); emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); } src.index++; if (ir->type->vector_elements > 2) { if (ir->type->vector_elements == 4) { mat_column.writemask = WRITEMASK_ZW; - src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y); + src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, + SWIZZLE_X, SWIZZLE_Y); emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); } else { mat_column.writemask = WRITEMASK_Z; - src.swizzle = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y); + src.swizzle = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y, + SWIZZLE_Y, SWIZZLE_Y); emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); mat_column.writemask = WRITEMASK_XYZW; src.swizzle = SWIZZLE_XYZW; @@ -3160,6 +3333,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) return; } + elements = ir->type->vector_elements; switch (ir->type->base_type) { case GLSL_TYPE_FLOAT: gl_type = GL_FLOAT; @@ -3209,14 +3383,21 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) values[i].u = ir->value.b[i] ? ctx->Const.UniformBooleanTrue : 0; } break; + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + gl_type = GL_UNSIGNED_INT; + elements = 2; + values[0].u = ir->value.u64[0] & 0xffffffff; + values[1].u = ir->value.u64[0] >> 32; + break; default: - assert(!"Non-float/uint/int/bool constant"); + assert(!"Non-float/uint/int/bool/sampler/image constant"); } this->result = st_src_reg(file, -1, ir->type); this->result.index = add_constant(file, values, - ir->type->vector_elements, + elements, gl_type, &this->result.swizzle); } @@ -3227,24 +3408,69 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) exec_node *param = ir->actual_parameters.get_head(); ir_dereference *deref = static_cast(param); ir_variable *location = deref->variable_referenced(); - - st_src_reg buffer( - PROGRAM_BUFFER, location->data.binding, GLSL_TYPE_ATOMIC_UINT); - + bool has_hw_atomics = st_context(ctx)->has_hw_atomics; /* Calculate the surface offset */ st_src_reg offset; unsigned array_size = 0, base = 0; uint16_t index = 0; + st_src_reg resource; get_deref_offsets(deref, &array_size, &base, &index, &offset, false); - if (offset.file != PROGRAM_UNDEFINED) { - emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset), - offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE)); - emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset), - offset, st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE)); + if (has_hw_atomics) { + variable_storage *entry = find_variable_storage(location); + st_src_reg buffer(PROGRAM_HW_ATOMIC, 0, GLSL_TYPE_ATOMIC_UINT, + location->data.binding); + + if (!entry) { + entry = new(mem_ctx) variable_storage(location, PROGRAM_HW_ATOMIC, + num_atomics); + _mesa_hash_table_insert(this->variables, location, entry); + + atomic_info[num_atomics].location = location->data.location; + atomic_info[num_atomics].binding = location->data.binding; + atomic_info[num_atomics].size = location->type->arrays_of_arrays_size(); + if (atomic_info[num_atomics].size == 0) + atomic_info[num_atomics].size = 1; + atomic_info[num_atomics].array_id = 0; + num_atomics++; + } + + if (offset.file != PROGRAM_UNDEFINED) { + if (atomic_info[entry->index].array_id == 0) { + num_atomic_arrays++; + atomic_info[entry->index].array_id = num_atomic_arrays; + } + buffer.array_id = atomic_info[entry->index].array_id; + } + + buffer.index = index; + buffer.index += location->data.offset / ATOMIC_COUNTER_SIZE; + buffer.has_index2 = true; + + if (offset.file != PROGRAM_UNDEFINED) { + buffer.reladdr = ralloc(mem_ctx, st_src_reg); + *buffer.reladdr = offset; + emit_arl(ir, sampler_reladdr, offset); + } + offset = st_src_reg_for_int(0); + + resource = buffer; } else { - offset = st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE); + st_src_reg buffer(PROGRAM_BUFFER, + prog->info.num_ssbos + + location->data.binding, + GLSL_TYPE_ATOMIC_UINT); + + if (offset.file != PROGRAM_UNDEFINED) { + emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset), + offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE)); + emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset), + offset, st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE)); + } else { + offset = st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE); + } + resource = buffer; } ir->return_deref->accept(this); @@ -3268,7 +3494,7 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) val->accept(this); st_src_reg data = this->result, data2 = undef_src; - unsigned opcode; + enum tgsi_opcode opcode; switch (ir->callee->intrinsic_id) { case ir_intrinsic_atomic_counter_add: opcode = TGSI_OPCODE_ATOMUADD; @@ -3307,7 +3533,7 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) inst = emit_asm(ir, opcode, dst, offset, data, data2); } - inst->resource = buffer; + inst->resource = resource; } void @@ -3321,11 +3547,9 @@ glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir) ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); ir_constant *const_block = block->as_constant(); - st_src_reg buffer( PROGRAM_BUFFER, - ctx->Const.Program[shader->Stage].MaxAtomicBuffers + - (const_block ? const_block->value.u[0] : 0), + const_block ? const_block->value.u[0] : 0, GLSL_TYPE_UINT); if (!const_block) { @@ -3351,7 +3575,8 @@ glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir) if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_load) { inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off); if (dst.type == GLSL_TYPE_BOOL) - emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), st_src_reg_for_int(0)); + emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), + st_src_reg_for_int(0)); } else if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_store) { param = param->get_next(); ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); @@ -3370,7 +3595,7 @@ glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir) val->accept(this); st_src_reg data = this->result, data2 = undef_src; - unsigned opcode; + enum tgsi_opcode opcode; switch (ir->callee->intrinsic_id) { case ir_intrinsic_ssbo_atomic_add: opcode = TGSI_OPCODE_ATOMUADD; @@ -3502,7 +3727,7 @@ glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir) val->accept(this); st_src_reg data = this->result, data2 = undef_src; - unsigned opcode; + enum tgsi_opcode opcode; switch (ir->callee->intrinsic_id) { case ir_intrinsic_shared_atomic_add: opcode = TGSI_OPCODE_ATOMUADD; @@ -3545,7 +3770,8 @@ glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir) static void get_image_qualifiers(ir_dereference *ir, const glsl_type **type, bool *memory_coherent, bool *memory_volatile, - bool *memory_restrict, unsigned *image_format) + bool *memory_restrict, bool *memory_read_only, + enum pipe_format *image_format) { switch (ir->ir_type) { @@ -3561,6 +3787,8 @@ get_image_qualifiers(ir_dereference *ir, const glsl_type **type, struct_type->fields.structure[fild_idx].memory_volatile; *memory_restrict = struct_type->fields.structure[fild_idx].memory_restrict; + *memory_read_only = + struct_type->fields.structure[fild_idx].memory_read_only; *image_format = struct_type->fields.structure[fild_idx].image_format; break; @@ -3570,7 +3798,7 @@ get_image_qualifiers(ir_dereference *ir, const glsl_type **type, ir_dereference_array *deref_arr = ir->as_dereference_array(); get_image_qualifiers((ir_dereference *)deref_arr->array, type, memory_coherent, memory_volatile, memory_restrict, - image_format); + memory_read_only, image_format); break; } @@ -3581,6 +3809,7 @@ get_image_qualifiers(ir_dereference *ir, const glsl_type **type, *memory_coherent = var->data.memory_coherent; *memory_volatile = var->data.memory_volatile; *memory_restrict = var->data.memory_restrict; + *memory_read_only = var->data.memory_read_only; *image_format = var->data.image_format; break; } @@ -3598,12 +3827,13 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) ir_dereference *img = (ir_dereference *)param; const ir_variable *imgvar = img->variable_referenced(); unsigned sampler_array_size = 1, sampler_base = 0; - bool memory_coherent = false, memory_volatile = false, memory_restrict = false; - unsigned image_format = 0; + bool memory_coherent = false, memory_volatile = false, + memory_restrict = false, memory_read_only = false; + enum pipe_format image_format = PIPE_FORMAT_NONE; const glsl_type *type = NULL; get_image_qualifiers(img, &type, &memory_coherent, &memory_volatile, - &memory_restrict, &image_format); + &memory_restrict, &memory_read_only, &image_format); st_src_reg reladdr; st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT); @@ -3687,7 +3917,7 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) assert(param->is_tail_sentinel()); - unsigned opcode; + enum tgsi_opcode opcode; switch (ir->callee->intrinsic_id) { case ir_intrinsic_image_load: opcode = TGSI_OPCODE_LOAD; @@ -3719,6 +3949,21 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) case ir_intrinsic_image_atomic_comp_swap: opcode = TGSI_OPCODE_ATOMCAS; break; + case ir_intrinsic_image_atomic_inc_wrap: { + /* There's a bit of disagreement between GLSL and the hardware. The + * hardware wants to wrap after the given wrap value, while GLSL + * wants to wrap at the value. Subtract 1 to make up the difference. + */ + st_src_reg wrap = get_temp(glsl_type::uint_type); + emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(wrap), + arg1, st_src_reg_for_int(-1)); + arg1 = wrap; + opcode = TGSI_OPCODE_ATOMINC_WRAP; + break; + } + case ir_intrinsic_image_atomic_dec_wrap: + opcode = TGSI_OPCODE_ATOMDEC_WRAP; + break; default: assert(!"Unexpected intrinsic"); return; @@ -3740,8 +3985,8 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) } inst->tex_target = type->sampler_index(); - inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx), - _mesa_get_shader_image_format(image_format)); + inst->image_format = image_format; + inst->read_only = memory_read_only; if (memory_coherent) inst->buffer_access |= TGSI_MEMORY_COHERENT; @@ -3752,7 +3997,7 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) } void -glsl_to_tgsi_visitor::visit_generic_intrinsic(ir_call *ir, unsigned op) +glsl_to_tgsi_visitor::visit_generic_intrinsic(ir_call *ir, enum tgsi_opcode op) { ir->return_deref->accept(this); st_dst_reg dst = st_dst_reg(this->result); @@ -3843,6 +4088,8 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) case ir_intrinsic_image_atomic_comp_swap: case ir_intrinsic_image_size: case ir_intrinsic_image_samples: + case ir_intrinsic_image_atomic_inc_wrap: + case ir_intrinsic_image_atomic_dec_wrap: visit_image_intrinsic(ir); return; @@ -3869,6 +4116,10 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) visit_generic_intrinsic(ir, TGSI_OPCODE_READ_INVOC); return; + case ir_intrinsic_helper_invocation: + visit_generic_intrinsic(ir, TGSI_OPCODE_READ_HELPER); + return; + case ir_intrinsic_invalid: case ir_intrinsic_generic_load: case ir_intrinsic_generic_store: @@ -3880,6 +4131,8 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) case ir_intrinsic_generic_atomic_max: case ir_intrinsic_generic_atomic_exchange: case ir_intrinsic_generic_atomic_comp_swap: + case ir_intrinsic_begin_invocation_interlock: + case ir_intrinsic_end_invocation_interlock: unreachable("Invalid intrinsic"); } } @@ -3900,7 +4153,7 @@ glsl_to_tgsi_visitor::calc_deref_offsets(ir_dereference *tail, calc_deref_offsets(deref_record->record->as_dereference(), array_elements, index, indirect, location); assert(field_index >= 0); - *location += struct_type->record_location_offset(field_index); + *location += struct_type->struct_location_offset(field_index); break; } @@ -3957,8 +4210,7 @@ glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir, unsigned location = 0; ir_variable *var = ir->variable_referenced(); - memset(reladdr, 0, sizeof(*reladdr)); - reladdr->file = PROGRAM_UNDEFINED; + reladdr->reset(); *base = 0; *array_size = 1; @@ -3986,7 +4238,11 @@ glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir, st_src_reg glsl_to_tgsi_visitor::canonicalize_gather_offset(st_src_reg offset) { - if (offset.reladdr || offset.reladdr2) { + if (offset.reladdr || offset.reladdr2 || + offset.has_index2 || + offset.file == PROGRAM_UNIFORM || + offset.file == PROGRAM_CONSTANT || + offset.file == PROGRAM_STATE_VAR) { st_src_reg tmp = get_temp(glsl_type::ivec2_type); st_dst_reg tmp_dst = st_dst_reg(tmp); tmp_dst.writemask = WRITEMASK_XY; @@ -3996,6 +4252,45 @@ glsl_to_tgsi_visitor::canonicalize_gather_offset(st_src_reg offset) return offset; } + +bool +glsl_to_tgsi_visitor::handle_bound_deref(ir_dereference *ir) +{ + ir_variable *var = ir->variable_referenced(); + + if (!var || var->data.mode != ir_var_uniform || var->data.bindless || + !(ir->type->is_image() || ir->type->is_sampler())) + return false; + + /* Convert from bound sampler/image to bindless handle. */ + bool is_image = ir->type->is_image(); + st_src_reg resource(is_image ? PROGRAM_IMAGE : PROGRAM_SAMPLER, 0, GLSL_TYPE_UINT); + uint16_t index = 0; + unsigned array_size = 1, base = 0; + st_src_reg reladdr; + get_deref_offsets(ir, &array_size, &base, &index, &reladdr, true); + + resource.index = index; + if (reladdr.file != PROGRAM_UNDEFINED) { + resource.reladdr = ralloc(mem_ctx, st_src_reg); + *resource.reladdr = reladdr; + emit_arl(ir, sampler_reladdr, reladdr); + } + + this->result = get_temp(glsl_type::uvec2_type); + st_dst_reg dst(this->result); + dst.writemask = WRITEMASK_XY; + + glsl_to_tgsi_instruction *inst = emit_asm( + ir, is_image ? TGSI_OPCODE_IMG2HND : TGSI_OPCODE_SAMP2HND, dst); + + inst->tex_target = ir->type->sampler_index(); + inst->resource = resource; + inst->sampler_array_size = array_size; + inst->sampler_base = base; + + return true; +} void glsl_to_tgsi_visitor::visit(ir_texture *ir) @@ -4005,17 +4300,16 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) st_src_reg levels_src, reladdr; st_dst_reg result_dst, coord_dst, cube_sc_dst; glsl_to_tgsi_instruction *inst = NULL; - unsigned opcode = TGSI_OPCODE_NOP; + enum tgsi_opcode opcode = TGSI_OPCODE_NOP; const glsl_type *sampler_type = ir->sampler->type; unsigned sampler_array_size = 1, sampler_base = 0; - bool is_cube_array = false, is_cube_shadow = false; + bool is_cube_array = false; ir_variable *var = ir->sampler->variable_referenced(); unsigned i; /* if we are a cube array sampler or a cube shadow */ if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { is_cube_array = sampler_type->sampler_array; - is_cube_shadow = sampler_type->sampler_shadow; } if (ir->coordinate) { @@ -4053,7 +4347,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) } break; case ir_txb: - if (is_cube_array || is_cube_shadow) { + if (is_cube_array || + (sampler_type->sampler_shadow && sampler_type->coordinate_components() >= 3)) { opcode = TGSI_OPCODE_TXB2; } else { @@ -4070,7 +4365,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) if (this->has_tex_txf_lz && ir->lod_info.lod->is_zero()) { opcode = TGSI_OPCODE_TEX_LZ; } else { - opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL; + opcode = (is_cube_array || (sampler_type->sampler_shadow && sampler_type->coordinate_components() >= 3)) ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL; ir->lod_info.lod->accept(this); lod_info = this->result; } @@ -4197,9 +4492,9 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) } } - /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow - * comparator was put in the correct place (and projected) by the code, - * above, that handles by-hand projection. + /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the + * shadow comparator was put in the correct place (and projected) by the + * code, above, that handles by-hand projection. */ if (ir->shadow_comparator && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { /* Slot the shadow value in as the second to last component of the @@ -4208,11 +4503,21 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) ir->shadow_comparator->accept(this); if (is_cube_array) { - cube_sc = get_temp(glsl_type::float_type); - cube_sc_dst = st_dst_reg(cube_sc); - cube_sc_dst.writemask = WRITEMASK_X; + if (lod_info.file != PROGRAM_UNDEFINED) { + // If we have both a cube array *and* a bias/lod, stick the + // comparator into the .Y of the second argument. + st_src_reg tmp = get_temp(glsl_type::vec2_type); + cube_sc_dst = st_dst_reg(tmp); + cube_sc_dst.writemask = WRITEMASK_X; + emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, lod_info); + lod_info = tmp; + cube_sc_dst.writemask = WRITEMASK_Y; + } else { + cube_sc = get_temp(glsl_type::float_type); + cube_sc_dst = st_dst_reg(cube_sc); + cube_sc_dst.writemask = WRITEMASK_X; + } emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result); - cube_sc_dst.writemask = WRITEMASK_X; } else { if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && @@ -4279,7 +4584,20 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) if (is_cube_array && ir->shadow_comparator) { inst = emit_asm(ir, opcode, result_dst, coord, cube_sc); } else { - inst = emit_asm(ir, opcode, result_dst, coord, component); + if (this->tg4_component_in_swizzle) { + inst = emit_asm(ir, opcode, result_dst, coord); + int idx = 0; + foreach_in_list(immediate_storage, entry, &this->immediates) { + if (component.index == idx) { + gl_constant_value value = entry->values[component.swizzle]; + inst->gather_component = value.i; + break; + } + idx++; + } + } else { + inst = emit_asm(ir, opcode, result_dst, coord, component); + } } } else inst = emit_asm(ir, opcode, result_dst, coord); @@ -4299,9 +4617,11 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) if (ir->offset) { if (!inst->tex_offsets) - inst->tex_offsets = rzalloc_array(inst, st_src_reg, MAX_GLSL_TEXTURE_OFFSET); + inst->tex_offsets = rzalloc_array(inst, st_src_reg, + MAX_GLSL_TEXTURE_OFFSET); - for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && offset[i].file != PROGRAM_UNDEFINED; i++) + for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && + offset[i].file != PROGRAM_UNDEFINED; i++) inst->tex_offsets[i] = offset[i]; inst->tex_offset_num_offset = i; } @@ -4343,10 +4663,16 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir) } } +void +glsl_to_tgsi_visitor::visit(ir_demote *ir) +{ + emit_asm(ir, TGSI_OPCODE_DEMOTE); +} + void glsl_to_tgsi_visitor::visit(ir_if *ir) { - unsigned if_opcode; + enum tgsi_opcode if_opcode; glsl_to_tgsi_instruction *if_inst; ir->condition->accept(this); @@ -4409,18 +4735,21 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() num_outputs = 0; num_input_arrays = 0; num_output_arrays = 0; + num_atomics = 0; + num_atomic_arrays = 0; num_immediates = 0; num_address_regs = 0; samplers_used = 0; images_used = 0; indirect_addr_consts = false; wpos_transform_const = -1; - glsl_version = 0; native_integers = false; mem_ctx = ralloc_context(NULL); ctx = NULL; prog = NULL; precise = 0; + need_uarl = false; + tg4_component_in_swizzle = false; shader_program = NULL; shader = NULL; options = NULL; @@ -4494,6 +4823,7 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) v->image_targets[idx] = st_translate_texture_target(inst->tex_target, false); v->image_formats[idx] = inst->image_format; + v->image_wr[idx] = !inst->read_only; } } } @@ -4598,7 +4928,8 @@ glsl_to_tgsi_visitor::simplify_cmp(void) && !(inst->dst[0].writemask & prevWriteMask) && inst->src[2].file == inst->dst[0].file && inst->src[2].index == inst->dst[0].index - && inst->dst[0].writemask == get_src_arg_mask(inst->dst[0], inst->src[2])) { + && inst->dst[0].writemask == + get_src_arg_mask(inst->dst[0], inst->src[2])) { inst->op = TGSI_OPCODE_MOV; inst->info = tgsi_get_opcode_info(inst->op); @@ -4610,8 +4941,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void) } static void -rename_temp_handle_src(struct rename_reg_pair *renames, - struct st_src_reg *src) +rename_temp_handle_src(struct rename_reg_pair *renames, st_src_reg *src) { if (src && src->file == PROGRAM_TEMPORARY) { int old_idx = src->index; @@ -4670,7 +5000,7 @@ glsl_to_tgsi_visitor::get_first_temp_write(int *first_writes) } if (inst->op == TGSI_OPCODE_BGNLOOP) { - if(depth++ == 0) + if (depth++ == 0) loop_start = i; } else if (inst->op == TGSI_OPCODE_ENDLOOP) { if (--depth == 0) @@ -4702,7 +5032,7 @@ glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads) } } if (inst->op == TGSI_OPCODE_BGNLOOP) { - if(depth++ == 0) + if (depth++ == 0) loop_start = i; } else if (inst->op == TGSI_OPCODE_ENDLOOP) { if (--depth == 0) @@ -4737,7 +5067,7 @@ glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int * last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2; } if (inst->op == TGSI_OPCODE_BGNLOOP) { - if(depth++ == 0) + if (depth++ == 0) loop_start = i; } else if (inst->op == TGSI_OPCODE_ENDLOOP) { if (--depth == 0) { @@ -5057,7 +5387,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void) write_level[4 * r + c] = level-1; } } - if(inst->op == TGSI_OPCODE_ENDIF) + if (inst->op == TGSI_OPCODE_ENDIF) --level; break; @@ -5222,7 +5552,7 @@ glsl_to_tgsi_visitor::merge_two_dsts(void) defined = 0; inst2 = (glsl_to_tgsi_instruction *) inst->next; - do { + while (!inst2->is_tail_sentinel()) { if (inst->op == inst2->op && inst2->dst[defined].file == PROGRAM_UNDEFINED && inst->src[0].file == inst2->src[0].file && @@ -5231,9 +5561,9 @@ glsl_to_tgsi_visitor::merge_two_dsts(void) inst->src[0].swizzle == inst2->src[0].swizzle) break; inst2 = (glsl_to_tgsi_instruction *) inst2->next; - } while (inst2); + } - if (!inst2) { + if (inst2->is_tail_sentinel()) { /* Undefined destinations are not allowed, substitute with an unused * temporary register. */ @@ -5249,6 +5579,107 @@ glsl_to_tgsi_visitor::merge_two_dsts(void) } } +template +void test_indirect_access(const st_reg& reg, bool *has_indirect_access) +{ + if (reg.file == PROGRAM_ARRAY) { + if (reg.reladdr || reg.reladdr2 || reg.has_index2) { + has_indirect_access[reg.array_id] = true; + if (reg.reladdr) + test_indirect_access(*reg.reladdr, has_indirect_access); + if (reg.reladdr2) + test_indirect_access(*reg.reladdr2, has_indirect_access); + } + } +} + +template +void remap_array(st_reg& reg, const int *array_remap_info, + const bool *has_indirect_access) +{ + if (reg.file == PROGRAM_ARRAY) { + if (!has_indirect_access[reg.array_id]) { + reg.file = PROGRAM_TEMPORARY; + reg.index = reg.index + array_remap_info[reg.array_id]; + reg.array_id = 0; + } else { + reg.array_id = array_remap_info[reg.array_id]; + } + + if (reg.reladdr) + remap_array(*reg.reladdr, array_remap_info, has_indirect_access); + + if (reg.reladdr2) + remap_array(*reg.reladdr2, array_remap_info, has_indirect_access); + } +} + +/* One-dimensional arrays whose elements are only accessed directly are + * replaced by an according set of temporary registers that then can become + * subject to further optimization steps like copy propagation and + * register merging. + */ +void +glsl_to_tgsi_visitor::split_arrays(void) +{ + if (!next_array) + return; + + bool *has_indirect_access = rzalloc_array(mem_ctx, bool, next_array + 1); + + foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { + for (unsigned j = 0; j < num_inst_src_regs(inst); j++) + test_indirect_access(inst->src[j], has_indirect_access); + + for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) + test_indirect_access(inst->tex_offsets[j], has_indirect_access); + + for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) + test_indirect_access(inst->dst[j], has_indirect_access); + + test_indirect_access(inst->resource, has_indirect_access); + } + + unsigned array_offset = 0; + unsigned n_remaining_arrays = 0; + + /* Double use: For arrays that get split this value will contain + * the base index of the temporary registers this array is replaced + * with. For arrays that remain it contains the new array ID. + */ + int *array_remap_info = rzalloc_array(has_indirect_access, int, + next_array + 1); + + for (unsigned i = 1; i <= next_array; ++i) { + if (!has_indirect_access[i]) { + array_remap_info[i] = this->next_temp + array_offset; + array_offset += array_sizes[i - 1]; + } else { + array_sizes[n_remaining_arrays] = array_sizes[i-1]; + array_remap_info[i] = ++n_remaining_arrays; + } + } + + if (next_array != n_remaining_arrays) { + foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { + for (unsigned j = 0; j < num_inst_src_regs(inst); j++) + remap_array(inst->src[j], array_remap_info, has_indirect_access); + + for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) + remap_array(inst->tex_offsets[j], array_remap_info, has_indirect_access); + + for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) { + remap_array(inst->dst[j], array_remap_info, has_indirect_access); + } + remap_array(inst->resource, array_remap_info, has_indirect_access); + } + } + + ralloc_free(has_indirect_access); + this->next_temp += array_offset; + next_array = n_remaining_arrays; +} + /* Merges temporary registers together where possible to reduce the number of * registers needed to run a program. * @@ -5257,20 +5688,35 @@ glsl_to_tgsi_visitor::merge_two_dsts(void) void glsl_to_tgsi_visitor::merge_registers(void) { - assert(need_uarl); - struct lifetime *lifetimes = - rzalloc_array(mem_ctx, struct lifetime, this->next_temp); + class array_live_range *arr_live_ranges = NULL; + + struct register_live_range *reg_live_ranges = + rzalloc_array(mem_ctx, struct register_live_range, this->next_temp); - if (get_temp_registers_required_lifetimes(mem_ctx, &this->instructions, - this->next_temp, lifetimes)) { + if (this->next_array > 0) { + arr_live_ranges = new array_live_range[this->next_array]; + for (unsigned i = 0; i < this->next_array; ++i) + arr_live_ranges[i] = array_live_range(i+1, this->array_sizes[i]); + } + + + if (get_temp_registers_required_live_ranges(reg_live_ranges, &this->instructions, + this->next_temp, reg_live_ranges, + this->next_array, arr_live_ranges)) { struct rename_reg_pair *renames = - rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp); - get_temp_registers_remapping(mem_ctx, this->next_temp, lifetimes, renames); + rzalloc_array(reg_live_ranges, struct rename_reg_pair, this->next_temp); + get_temp_registers_remapping(reg_live_ranges, this->next_temp, + reg_live_ranges, renames); rename_temp_registers(renames); - ralloc_free(renames); + + this->next_array = merge_arrays(this->next_array, this->array_sizes, + &this->instructions, arr_live_ranges); } - ralloc_free(lifetimes); + if (arr_live_ranges) + delete[] arr_live_ranges; + + ralloc_free(reg_live_ranges); } /* Reassign indices to temporary registers by reusing unused indices created @@ -5303,6 +5749,27 @@ glsl_to_tgsi_visitor::renumber_registers(void) ralloc_free(first_writes); } +#ifndef NDEBUG +void glsl_to_tgsi_visitor::print_stats() +{ + int narray_registers = 0; + for (unsigned i = 0; i < this->next_array; ++i) + narray_registers += this->array_sizes[i]; + + int ninstructions = 0; + foreach_in_list(glsl_to_tgsi_instruction, inst, &instructions) { + ++ninstructions; + } + + simple_mtx_lock(&print_stats_mutex); + stats_log << next_array << ", " + << next_temp << ", " + << narray_registers << ", " + << next_temp + narray_registers << ", " + << ninstructions << "\n"; + simple_mtx_unlock(&print_stats_mutex); +} +#endif /* ------------------------- TGSI conversion stuff -------------------------- */ /** @@ -5327,6 +5794,7 @@ struct st_translate { struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS]; struct ureg_src images[PIPE_MAX_SHADER_IMAGES]; struct ureg_src systemValues[SYSTEM_VALUE_MAX]; + struct ureg_src hw_atomics[PIPE_MAX_HW_ATOMIC_BUFFERS]; struct ureg_src shared_memory; unsigned *array_sizes; struct inout_decl *input_decls; @@ -5337,107 +5805,22 @@ struct st_translate { const ubyte *inputMapping; const ubyte *outputMapping; - unsigned procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */ + enum pipe_shader_type procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */ bool need_uarl; + bool tg4_component_in_swizzle; }; -/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ -unsigned -_mesa_sysval_to_semantic(unsigned sysval) -{ - switch (sysval) { - /* Vertex shader */ - case SYSTEM_VALUE_VERTEX_ID: - return TGSI_SEMANTIC_VERTEXID; - case SYSTEM_VALUE_INSTANCE_ID: - return TGSI_SEMANTIC_INSTANCEID; - case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: - return TGSI_SEMANTIC_VERTEXID_NOBASE; - case SYSTEM_VALUE_BASE_VERTEX: - return TGSI_SEMANTIC_BASEVERTEX; - case SYSTEM_VALUE_BASE_INSTANCE: - return TGSI_SEMANTIC_BASEINSTANCE; - case SYSTEM_VALUE_DRAW_ID: - return TGSI_SEMANTIC_DRAWID; - - /* Geometry shader */ - case SYSTEM_VALUE_INVOCATION_ID: - return TGSI_SEMANTIC_INVOCATIONID; - - /* Fragment shader */ - case SYSTEM_VALUE_FRAG_COORD: - return TGSI_SEMANTIC_POSITION; - case SYSTEM_VALUE_FRONT_FACE: - return TGSI_SEMANTIC_FACE; - case SYSTEM_VALUE_SAMPLE_ID: - return TGSI_SEMANTIC_SAMPLEID; - case SYSTEM_VALUE_SAMPLE_POS: - return TGSI_SEMANTIC_SAMPLEPOS; - case SYSTEM_VALUE_SAMPLE_MASK_IN: - return TGSI_SEMANTIC_SAMPLEMASK; - case SYSTEM_VALUE_HELPER_INVOCATION: - return TGSI_SEMANTIC_HELPER_INVOCATION; - - /* Tessellation shader */ - case SYSTEM_VALUE_TESS_COORD: - return TGSI_SEMANTIC_TESSCOORD; - case SYSTEM_VALUE_VERTICES_IN: - return TGSI_SEMANTIC_VERTICESIN; - case SYSTEM_VALUE_PRIMITIVE_ID: - return TGSI_SEMANTIC_PRIMID; - case SYSTEM_VALUE_TESS_LEVEL_OUTER: - return TGSI_SEMANTIC_TESSOUTER; - case SYSTEM_VALUE_TESS_LEVEL_INNER: - return TGSI_SEMANTIC_TESSINNER; - - /* Compute shader */ - case SYSTEM_VALUE_LOCAL_INVOCATION_ID: - return TGSI_SEMANTIC_THREAD_ID; - case SYSTEM_VALUE_WORK_GROUP_ID: - return TGSI_SEMANTIC_BLOCK_ID; - case SYSTEM_VALUE_NUM_WORK_GROUPS: - return TGSI_SEMANTIC_GRID_SIZE; - case SYSTEM_VALUE_LOCAL_GROUP_SIZE: - return TGSI_SEMANTIC_BLOCK_SIZE; - - /* ARB_shader_ballot */ - case SYSTEM_VALUE_SUBGROUP_SIZE: - return TGSI_SEMANTIC_SUBGROUP_SIZE; - case SYSTEM_VALUE_SUBGROUP_INVOCATION: - return TGSI_SEMANTIC_SUBGROUP_INVOCATION; - case SYSTEM_VALUE_SUBGROUP_EQ_MASK: - return TGSI_SEMANTIC_SUBGROUP_EQ_MASK; - case SYSTEM_VALUE_SUBGROUP_GE_MASK: - return TGSI_SEMANTIC_SUBGROUP_GE_MASK; - case SYSTEM_VALUE_SUBGROUP_GT_MASK: - return TGSI_SEMANTIC_SUBGROUP_GT_MASK; - case SYSTEM_VALUE_SUBGROUP_LE_MASK: - return TGSI_SEMANTIC_SUBGROUP_LE_MASK; - case SYSTEM_VALUE_SUBGROUP_LT_MASK: - return TGSI_SEMANTIC_SUBGROUP_LT_MASK; - - /* Unhandled */ - case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: - case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: - case SYSTEM_VALUE_VERTEX_CNT: - default: - assert(!"Unexpected SYSTEM_VALUE_ enum"); - return TGSI_SEMANTIC_COUNT; - } -} - /** * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. */ static struct ureg_src emit_immediate(struct st_translate *t, gl_constant_value values[4], - int type, int size) + GLenum type, int size) { struct ureg_program *ureg = t->ureg; - switch(type) - { + switch (type) { case GL_FLOAT: return ureg_DECL_immediate(ureg, &values[0].f, size); case GL_DOUBLE: @@ -5466,7 +5849,7 @@ dst_register(struct st_translate *t, gl_register_file file, unsigned index, { unsigned array; - switch(file) { + switch (file) { case PROGRAM_UNDEFINED: return ureg_dst_undef(); @@ -5515,7 +5898,9 @@ dst_register(struct st_translate *t, gl_register_file file, unsigned index, return t->outputs[t->outputMapping[index]]; } else { - struct inout_decl *decl = find_inout_array(t->output_decls, t->num_output_decls, array_id); + struct inout_decl *decl = + find_inout_array(t->output_decls, + t->num_output_decls, array_id); unsigned mesa_index = decl->mesa_index; int slot = t->outputMapping[mesa_index]; @@ -5594,18 +5979,20 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg) int index = src_reg->index; int double_reg2 = src_reg->double_reg2 ? 1 : 0; - switch(src_reg->file) { + switch (src_reg->file) { case PROGRAM_UNDEFINED: src = ureg_imm4f(t->ureg, 0, 0, 0, 0); break; case PROGRAM_TEMPORARY: case PROGRAM_ARRAY: - src = ureg_src(dst_register(t, src_reg->file, src_reg->index, src_reg->array_id)); + src = ureg_src(dst_register(t, src_reg->file, src_reg->index, + src_reg->array_id)); break; case PROGRAM_OUTPUT: { - struct ureg_dst dst = dst_register(t, src_reg->file, src_reg->index, src_reg->array_id); + struct ureg_dst dst = dst_register(t, src_reg->file, src_reg->index, + src_reg->array_id); assert(dst.WriteMask != 0); unsigned shift = ffs(dst.WriteMask) - 1; src = ureg_swizzle(ureg_src(dst), @@ -5646,7 +6033,8 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg) src = t->inputs[t->inputMapping[index] + double_reg2]; } else { - struct inout_decl *decl = find_inout_array(t->input_decls, t->num_input_decls, + struct inout_decl *decl = find_inout_array(t->input_decls, + t->num_input_decls, src_reg->array_id); unsigned mesa_index = decl->mesa_index; int slot = t->inputMapping[mesa_index]; @@ -5668,6 +6056,11 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg) src = t->systemValues[src_reg->index]; break; + case PROGRAM_HW_ATOMIC: + src = ureg_src_array_register(TGSI_FILE_HW_ATOMIC, src_reg->index, + src_reg->array_id); + break; + default: assert(!"unknown src register file"); return ureg_src_undef(); @@ -5740,7 +6133,7 @@ compile_tgsi_instruction(struct st_translate *t, int num_dst; int num_src; - unsigned tex_target = 0; + enum tgsi_texture_type tex_target = TGSI_TEXTURE_BUFFER; num_dst = num_inst_dst_regs(inst); num_src = num_inst_src_regs(inst); @@ -5753,7 +6146,7 @@ compile_tgsi_instruction(struct st_translate *t, for (i = 0; i < num_src; i++) src[i] = translate_src(t, &inst->src[i]); - switch(inst->op) { + switch (inst->op) { case TGSI_OPCODE_BGNLOOP: case TGSI_OPCODE_ELSE: case TGSI_OPCODE_ENDLOOP: @@ -5778,8 +6171,11 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_TXL2: case TGSI_OPCODE_TG4: case TGSI_OPCODE_LODQ: + case TGSI_OPCODE_SAMP2HND: if (inst->resource.file == PROGRAM_SAMPLER) { src[num_src] = t->samplers[inst->resource.index]; + if (t->tg4_component_in_swizzle && inst->op == TGSI_OPCODE_TG4) + src[num_src].SwizzleX = inst->gather_component; } else { /* Bindless samplers. */ src[num_src] = translate_src(t, &inst->resource); @@ -5816,6 +6212,10 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_ATOMUMAX: case TGSI_OPCODE_ATOMIMIN: case TGSI_OPCODE_ATOMIMAX: + case TGSI_OPCODE_ATOMFADD: + case TGSI_OPCODE_IMG2HND: + case TGSI_OPCODE_ATOMINC_WRAP: + case TGSI_OPCODE_ATOMDEC_WRAP: for (i = num_src - 1; i >= 0; i--) src[i + 1] = src[i]; num_src++; @@ -5823,6 +6223,8 @@ compile_tgsi_instruction(struct st_translate *t, src[0] = t->shared_memory; } else if (inst->resource.file == PROGRAM_BUFFER) { src[0] = t->buffers[inst->resource.index]; + } else if (inst->resource.file == PROGRAM_HW_ATOMIC) { + src[0] = translate_src(t, &inst->resource); } else if (inst->resource.file == PROGRAM_CONSTANT) { assert(inst->resource.has_index2); src[0] = ureg_src_register(TGSI_FILE_CONSTBUF, inst->resource.index); @@ -5878,6 +6280,34 @@ compile_tgsi_instruction(struct st_translate *t, } } +/* Invert SamplePos.y when rendering to the default framebuffer. */ +static void +emit_samplepos_adjustment(struct st_translate *t, int wpos_y_transform) +{ + struct ureg_program *ureg = t->ureg; + + assert(wpos_y_transform >= 0); + struct ureg_src trans_const = ureg_DECL_constant(ureg, wpos_y_transform); + struct ureg_src samplepos_sysval = t->systemValues[SYSTEM_VALUE_SAMPLE_POS]; + struct ureg_dst samplepos_flipped = ureg_DECL_temporary(ureg); + struct ureg_dst is_fbo = ureg_DECL_temporary(ureg); + + ureg_ADD(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_Y), + ureg_imm1f(ureg, 1), ureg_negate(samplepos_sysval)); + + /* If trans.x == 1, use samplepos.y, else use 1 - samplepos.y. */ + ureg_FSEQ(ureg, ureg_writemask(is_fbo, TGSI_WRITEMASK_Y), + ureg_scalar(trans_const, TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1)); + ureg_UCMP(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_Y), + ureg_src(is_fbo), samplepos_sysval, ureg_src(samplepos_flipped)); + ureg_MOV(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_X), + samplepos_sysval); + + /* Use the result in place of the system value. */ + t->systemValues[SYSTEM_VALUE_SAMPLE_POS] = ureg_src(samplepos_flipped); +} + + /** * Emit the TGSI instructions for inverting and adjusting WPOS. * This code is unavoidable because it also depends on whether @@ -5899,7 +6329,7 @@ emit_wpos_adjustment(struct gl_context *ctx, * where T = INPUT[WPOS] is inverted by Y. */ struct ureg_src wpostrans = ureg_DECL_constant(ureg, wpos_transform_const); - struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); + struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); struct ureg_src *wpos = ctx->Const.GLSLFragCoordIsSysVal ? &t->systemValues[SYSTEM_VALUE_FRAG_COORD] : @@ -5929,7 +6359,7 @@ emit_wpos_adjustment(struct gl_context *ctx, } else { /* MOV wpos_temp, input[wpos] */ - ureg_MOV( ureg, wpos_temp, wpos_input ); + ureg_MOV(ureg, wpos_temp, wpos_input); } /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be @@ -5938,19 +6368,19 @@ emit_wpos_adjustment(struct gl_context *ctx, if (invert) { /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy */ - ureg_MAD( ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), - wpos_input, - ureg_scalar(wpostrans, 0), - ureg_scalar(wpostrans, 1)); + ureg_MAD(ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), + wpos_input, + ureg_scalar(wpostrans, 0), + ureg_scalar(wpostrans, 1)); } else { /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww */ - ureg_MAD( ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), - wpos_input, - ureg_scalar(wpostrans, 2), - ureg_scalar(wpostrans, 3)); + ureg_MAD(ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), + wpos_input, + ureg_scalar(wpostrans, 2), + ureg_scalar(wpostrans, 3)); } /* Use wpos_temp as position input from here on: @@ -6001,7 +6431,7 @@ emit_wpos(struct st_context *st, * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 */ - if (program->OriginUpperLeft) { + if (program->info.fs.origin_upper_left) { /* Fragment shader wants origin in upper-left */ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { /* the driver supports upper-left origin */ @@ -6028,7 +6458,7 @@ emit_wpos(struct st_context *st, assert(0); } - if (program->PixelCenterInteger) { + if (program->info.fs.pixel_center_integer) { /* Fragment shader wants pixel center integer */ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { /* the driver supports pixel center integer */ @@ -6124,7 +6554,7 @@ sort_inout_decls_by_slot(struct inout_decl *decls, std::sort(decls, decls + count, sorter); } -static unsigned +static enum tgsi_interpolate_mode st_translate_interp(enum glsl_interp_mode glsl_qual, GLuint varying) { switch (glsl_qual) { @@ -6166,7 +6596,7 @@ st_translate_interp(enum glsl_interp_mode glsl_qual, GLuint varying) extern "C" enum pipe_error st_translate_program( struct gl_context *ctx, - uint procType, + enum pipe_shader_type procType, struct ureg_program *ureg, glsl_to_tgsi_visitor *program, const struct gl_program *proginfo, @@ -6191,6 +6621,17 @@ st_translate_program( assert(numInputs <= ARRAY_SIZE(t->inputs)); assert(numOutputs <= ARRAY_SIZE(t->outputs)); + ASSERT_BITFIELD_SIZE(st_src_reg, type, GLSL_TYPE_ERROR); + ASSERT_BITFIELD_SIZE(st_dst_reg, type, GLSL_TYPE_ERROR); + ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, tex_type, GLSL_TYPE_ERROR); + ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, image_format, PIPE_FORMAT_COUNT); + ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, tex_target, + (gl_texture_index) (NUM_TEXTURE_TARGETS - 1)); + ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, image_format, + (enum pipe_format) (PIPE_FORMAT_COUNT - 1)); + ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, op, + (enum tgsi_opcode) (TGSI_OPCODE_LAST - 1)); + t = CALLOC_STRUCT(st_translate); if (!t) { ret = PIPE_ERROR_OUT_OF_MEMORY; @@ -6199,6 +6640,7 @@ st_translate_program( t->procType = procType; t->need_uarl = !screen->get_param(screen, PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS); + t->tg4_component_in_swizzle = screen->get_param(screen, PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE); t->inputMapping = inputMapping; t->outputMapping = outputMapping; t->ureg = ureg; @@ -6232,19 +6674,20 @@ st_translate_program( tgsi_usage_mask = TGSI_WRITEMASK_XYZW; } - unsigned interp_mode = 0; - unsigned interp_location = 0; + enum tgsi_interpolate_mode interp_mode = TGSI_INTERPOLATE_CONSTANT; + enum tgsi_interpolate_loc interp_location = TGSI_INTERPOLATE_LOC_CENTER; if (procType == PIPE_SHADER_FRAGMENT) { assert(interpMode); interp_mode = interpMode[slot] != TGSI_INTERPOLATE_COUNT ? - interpMode[slot] : + (enum tgsi_interpolate_mode) interpMode[slot] : st_translate_interp(decl->interp, inputSlotToAttr[slot]); - interp_location = decl->interp_loc; + interp_location = (enum tgsi_interpolate_loc) decl->interp_loc; } src = ureg_DECL_fs_input_cyl_centroid_layout(ureg, - inputSemanticName[slot], inputSemanticIndex[slot], + (enum tgsi_semantic) inputSemanticName[slot], + inputSemanticIndex[slot], interp_mode, 0, interp_location, slot, tgsi_usage_mask, decl->array_id, decl->size); @@ -6298,16 +6741,18 @@ st_translate_program( } dst = ureg_DECL_output_layout(ureg, - outputSemanticName[slot], outputSemanticIndex[slot], + (enum tgsi_semantic) outputSemanticName[slot], + outputSemanticIndex[slot], decl->gs_out_streams, - slot, tgsi_usage_mask, decl->array_id, decl->size); - + slot, tgsi_usage_mask, decl->array_id, decl->size, decl->invariant); + dst.Invariant = decl->invariant; for (unsigned j = 0; j < decl->size; ++j) { if (t->outputs[slot + j].File != TGSI_FILE_OUTPUT) { /* The ArrayID is set up in dst_register */ t->outputs[slot + j] = dst; t->outputs[slot + j].ArrayID = 0; t->outputs[slot + j].Index += j; + t->outputs[slot + j].Invariant = decl->invariant; } } } @@ -6368,6 +6813,12 @@ st_translate_program( goto out; } } + + if (program->shader->Program->sh.fs.BlendSupport) + ureg_property(ureg, + TGSI_PROPERTY_FS_BLEND_EQUATION_ADVANCED, + program->shader->Program->sh.fs.BlendSupport); + } else if (procType == PIPE_SHADER_VERTEX) { for (i = 0; i < numOutputs; i++) { @@ -6385,6 +6836,9 @@ st_translate_program( emit_compute_block_size(proginfo, ureg); } + if (program->shader->Program->info.layer_viewport_relative) + ureg_property(ureg, TGSI_PROPERTY_LAYER_VIEWPORT_RELATIVE, 1); + /* Declare address register. */ if (program->num_address_regs > 0) { @@ -6396,11 +6850,11 @@ st_translate_program( /* Declare misc input registers */ { - GLbitfield sysInputs = proginfo->info.system_values_read; + GLbitfield64 sysInputs = proginfo->info.system_values_read; for (i = 0; sysInputs; i++) { - if (sysInputs & (1 << i)) { - unsigned semName = _mesa_sysval_to_semantic(i); + if (sysInputs & (1ull << i)) { + enum tgsi_semantic semName = tgsi_get_sysval_semantic(i); t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0); @@ -6420,7 +6874,8 @@ st_translate_program( (void) pscreen; if (!ctx->Const.NativeIntegers) { struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg); - ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]); + ureg_U2F(t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), + t->systemValues[i]); t->systemValues[i] = ureg_scalar(ureg_src(temp), 0); } } @@ -6430,7 +6885,11 @@ st_translate_program( emit_wpos(st_context(ctx), t, proginfo, ureg, program->wpos_transform_const); - sysInputs &= ~(1 << i); + if (procType == PIPE_SHADER_FRAGMENT && + semName == TGSI_SEMANTIC_SAMPLEPOS) + emit_samplepos_adjustment(t, program->wpos_transform_const); + + sysInputs &= ~(1ull << i); } } } @@ -6454,6 +6913,8 @@ st_translate_program( t->num_constants = proginfo->Parameters->NumParameters; for (i = 0; i < proginfo->Parameters->NumParameters; i++) { + unsigned pvo = proginfo->Parameters->ParameterValueOffset[i]; + switch (proginfo->Parameters->Parameters[i].Type) { case PROGRAM_STATE_VAR: case PROGRAM_UNIFORM: @@ -6471,7 +6932,7 @@ st_translate_program( t->constants[i] = ureg_DECL_constant(ureg, i); else t->constants[i] = emit_immediate(t, - proginfo->Parameters->ParameterValues[i], + proginfo->Parameters->ParameterValues + pvo, proginfo->Parameters->Parameters[i].DataType, 4); break; @@ -6511,12 +6972,13 @@ st_translate_program( /* texture samplers */ for (i = 0; i < frag_const->MaxTextureImageUnits; i++) { if (program->samplers_used & (1u << i)) { - unsigned type = st_translate_texture_type(program->sampler_types[i]); + enum tgsi_return_type type = + st_translate_texture_type(program->sampler_types[i]); t->samplers[i] = ureg_DECL_sampler(ureg, i); - ureg_DECL_sampler_view( ureg, i, program->sampler_targets[i], - type, type, type, type ); + ureg_DECL_sampler_view(ureg, i, program->sampler_targets[i], + type, type, type, type); } } @@ -6524,16 +6986,27 @@ st_translate_program( { struct gl_program *prog = program->prog; - for (i = 0; i < prog->info.num_abos; i++) { - unsigned index = prog->sh.AtomicBuffers[i]->Binding; - assert(index < frag_const->MaxAtomicBuffers); - t->buffers[index] = ureg_DECL_buffer(ureg, index, true); + if (!st_context(ctx)->has_hw_atomics) { + for (i = 0; i < prog->info.num_abos; i++) { + unsigned index = (prog->info.num_ssbos + + prog->sh.AtomicBuffers[i]->Binding); + assert(prog->sh.AtomicBuffers[i]->Binding < + frag_const->MaxAtomicBuffers); + t->buffers[index] = ureg_DECL_buffer(ureg, index, true); + } + } else { + for (i = 0; i < program->num_atomics; i++) { + struct hwatomic_decl *ainfo = &program->atomic_info[i]; + gl_uniform_storage *uni_storage = &prog->sh.data->UniformStorage[ainfo->location]; + int base = uni_storage->offset / ATOMIC_COUNTER_SIZE; + ureg_DECL_hw_atomic(ureg, base, base + ainfo->size - 1, ainfo->binding, + ainfo->array_id); + } } assert(prog->info.num_ssbos <= frag_const->MaxShaderStorageBlocks); for (i = 0; i < prog->info.num_ssbos; i++) { - unsigned index = frag_const->MaxAtomicBuffers + i; - t->buffers[index] = ureg_DECL_buffer(ureg, index, false); + t->buffers[i] = ureg_DECL_buffer(ureg, i, false); } } @@ -6545,7 +7018,8 @@ st_translate_program( t->images[i] = ureg_DECL_image(ureg, i, program->image_targets[i], program->image_formats[i], - true, false); + program->image_wr[i], + false); } } @@ -6569,6 +7043,8 @@ st_translate_program( } } break; + default: + ; /* nothing - silence compiler warning */ } out: @@ -6615,7 +7091,6 @@ get_mesa_program_tgsi(struct gl_context *ctx, v->shader_program = shader_program; v->shader = shader; v->options = options; - v->glsl_version = ctx->Const.GLSLVersion; v->native_integers = ctx->Const.NativeIntegers; v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget, @@ -6626,6 +7101,7 @@ get_mesa_program_tgsi(struct gl_context *ctx, PIPE_CAP_TGSI_TEX_TXF_LZ); v->need_uarl = !pscreen->get_param(pscreen, PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS); + v->tg4_component_in_swizzle = pscreen->get_param(pscreen, PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE); v->variables = _mesa_hash_table_create(v->mem_ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); skip_merge_registers = @@ -6680,8 +7156,17 @@ get_mesa_program_tgsi(struct gl_context *ctx, while (v->eliminate_dead_code()); v->merge_two_dsts(); - if (!skip_merge_registers) + + if (!skip_merge_registers) { + v->split_arrays(); + v->copy_propagate(); + while (v->eliminate_dead_code()); + v->merge_registers(); + v->copy_propagate(); + while (v->eliminate_dead_code()); + } + v->renumber_registers(); /* Write the END instruction. */ @@ -6697,14 +7182,28 @@ get_mesa_program_tgsi(struct gl_context *ctx, } do_set_program_inouts(shader->ir, prog, shader->Stage); + _mesa_copy_linked_program_data(shader_program, shader); - shrink_array_declarations(v->inputs, v->num_inputs, - &prog->info.inputs_read, - prog->info.double_inputs_read, - &prog->info.patch_inputs_read); - shrink_array_declarations(v->outputs, v->num_outputs, - &prog->info.outputs_written, 0ULL, - &prog->info.patch_outputs_written); + + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_SKIP_SHRINK_IO_ARRAYS)) { + mark_array_io(v->inputs, v->num_inputs, + &prog->info.inputs_read, + prog->DualSlotInputs, + &prog->info.patch_inputs_read); + + mark_array_io(v->outputs, v->num_outputs, + &prog->info.outputs_written, 0ULL, + &prog->info.patch_outputs_written); + } else { + shrink_array_declarations(v->inputs, v->num_inputs, + &prog->info.inputs_read, + prog->DualSlotInputs, + &prog->info.patch_inputs_read); + shrink_array_declarations(v->outputs, v->num_outputs, + &prog->info.outputs_written, 0ULL, + &prog->info.patch_outputs_written); + } + count_resources(v, prog); /* The GLSL IR won't be needed anymore. */ @@ -6714,8 +7213,9 @@ get_mesa_program_tgsi(struct gl_context *ctx, /* This must be done before the uniform storage is associated. */ if (shader->Stage == MESA_SHADER_FRAGMENT && (prog->info.inputs_read & VARYING_BIT_POS || - prog->info.system_values_read & (1 << SYSTEM_VALUE_FRAG_COORD))) { - static const gl_state_index wposTransformState[STATE_LENGTH] = { + prog->info.system_values_read & (1ull << SYSTEM_VALUE_FRAG_COORD) || + prog->info.system_values_read & (1ull << SYSTEM_VALUE_SAMPLE_POS))) { + static const gl_state_index16 wposTransformState[STATE_LENGTH] = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM }; @@ -6733,41 +7233,16 @@ get_mesa_program_tgsi(struct gl_context *ctx, * prog->ParameterValues to get reallocated (e.g., anything that adds a * program constant) has to happen before creating this linkage. */ - _mesa_associate_uniform_storage(ctx, shader_program, prog, true); + _mesa_associate_uniform_storage(ctx, shader_program, prog); if (!shader_program->data->LinkStatus) { free_glsl_to_tgsi_visitor(v); _mesa_reference_program(ctx, &shader->Program, NULL); return NULL; } - struct st_vertex_program *stvp; - struct st_fragment_program *stfp; - struct st_common_program *stp; - struct st_compute_program *stcp; + st_program(prog)->glsl_to_tgsi = v; - switch (shader->Stage) { - case MESA_SHADER_VERTEX: - stvp = (struct st_vertex_program *)prog; - stvp->glsl_to_tgsi = v; - break; - case MESA_SHADER_FRAGMENT: - stfp = (struct st_fragment_program *)prog; - stfp->glsl_to_tgsi = v; - break; - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_TESS_EVAL: - case MESA_SHADER_GEOMETRY: - stp = st_common_program(prog); - stp->glsl_to_tgsi = v; - break; - case MESA_SHADER_COMPUTE: - stcp = (struct st_compute_program *)prog; - stcp->glsl_to_tgsi = v; - break; - default: - assert(!"should not be reached"); - return NULL; - } + PRINT_STATS(v->print_stats()); return prog; } @@ -6822,123 +7297,29 @@ has_unsupported_control_flow(exec_list *ir, return visitor.unsupported; } -extern "C" { - /** * Link a shader. - * Called via ctx->Driver.LinkShader() * This actually involves converting GLSL IR into an intermediate TGSI-like IR * with code lowering and other optimizations. */ GLboolean -st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) +st_link_tgsi(struct gl_context *ctx, struct gl_shader_program *prog) { - /* Return early if we are loading the shader from on-disk cache */ - if (st_load_tgsi_from_disk_cache(ctx, prog)) { - return GL_TRUE; - } - struct pipe_screen *pscreen = ctx->st->pipe->screen; - assert(prog->data->LinkStatus); for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (prog->_LinkedShaders[i] == NULL) + struct gl_linked_shader *shader = prog->_LinkedShaders[i]; + if (shader == NULL) continue; - struct gl_linked_shader *shader = prog->_LinkedShaders[i]; exec_list *ir = shader->ir; gl_shader_stage stage = shader->Stage; + enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(stage); const struct gl_shader_compiler_options *options = &ctx->Const.ShaderCompilerOptions[stage]; - enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(stage); - bool have_dround = pscreen->get_shader_param(pscreen, ptarget, - PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED); - bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget, - PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED); - bool have_ldexp = pscreen->get_shader_param(pscreen, ptarget, - PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED); + unsigned if_threshold = pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_LOWER_IF_THRESHOLD); - - /* If there are forms of indirect addressing that the driver - * cannot handle, perform the lowering pass. - */ - if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput || - options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) { - lower_variable_index_to_cond_assign(stage, ir, - options->EmitNoIndirectInput, - options->EmitNoIndirectOutput, - options->EmitNoIndirectTemp, - options->EmitNoIndirectUniform); - } - - if (!pscreen->get_param(pscreen, PIPE_CAP_INT64_DIVMOD)) - lower_64bit_integer_instructions(ir, DIV64 | MOD64); - - if (ctx->Extensions.ARB_shading_language_packing) { - unsigned lower_inst = LOWER_PACK_SNORM_2x16 | - LOWER_UNPACK_SNORM_2x16 | - LOWER_PACK_UNORM_2x16 | - LOWER_UNPACK_UNORM_2x16 | - LOWER_PACK_SNORM_4x8 | - LOWER_UNPACK_SNORM_4x8 | - LOWER_UNPACK_UNORM_4x8 | - LOWER_PACK_UNORM_4x8; - - if (ctx->Extensions.ARB_gpu_shader5) - lower_inst |= LOWER_PACK_USE_BFI | - LOWER_PACK_USE_BFE; - if (!ctx->st->has_half_float_packing) - lower_inst |= LOWER_PACK_HALF_2x16 | - LOWER_UNPACK_HALF_2x16; - - lower_packing_builtins(ir, lower_inst); - } - - if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS)) - lower_offset_arrays(ir); - do_mat_op_to_vec(ir); - - if (stage == MESA_SHADER_FRAGMENT) - lower_blend_equation_advanced(shader); - - lower_instructions(ir, - MOD_TO_FLOOR | - FDIV_TO_MUL_RCP | - EXP_TO_EXP2 | - LOG_TO_LOG2 | - (have_ldexp ? 0 : LDEXP_TO_ARITH) | - (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) | - CARRY_TO_ARITH | - BORROW_TO_ARITH | - (have_dround ? 0 : DOPS_TO_DFRAC) | - (options->EmitNoPow ? POW_TO_EXP2 : 0) | - (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) | - (options->EmitNoSat ? SAT_TO_CLAMP : 0) | - (ctx->Const.ForceGLSLAbsSqrt ? SQRT_TO_ABS_SQRT : 0) | - /* Assume that if ARB_gpu_shader5 is not supported - * then all of the extended integer functions need - * lowering. It may be necessary to add some caps - * for individual instructions. - */ - (!ctx->Extensions.ARB_gpu_shader5 - ? BIT_COUNT_TO_MATH | - EXTRACT_TO_SHIFTS | - INSERT_TO_SHIFTS | - REVERSE_TO_SHIFTS | - FIND_LSB_TO_FLOAT_CAST | - FIND_MSB_TO_FLOAT_CAST | - IMUL_HIGH_TO_MUL - : 0)); - - do_vec_index_to_cond_assign(ir); - lower_vector_insert(ir, true); - lower_quadop_vector(ir, false); - lower_noise(ir); - if (options->MaxIfDepth == 0) { - lower_discard(ir); - } - if (ctx->Const.GLSLOptimizeConservatively) { /* Do it once and repeat only if there's unsupported control flow. */ do { @@ -6958,39 +7339,26 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) } while (progress); } - validate_ir_tree(ir); - } - - build_program_resource_list(ctx, prog); + /* Do this again to lower ir_binop_vector_extract introduced + * by optimization passes. + */ + do_vec_index_to_cond_assign(ir); - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - struct gl_linked_shader *shader = prog->_LinkedShaders[i]; - if (shader == NULL) - continue; + validate_ir_tree(ir); - enum pipe_shader_type ptarget = - pipe_shader_type_from_mesa(shader->Stage); - enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir) - pscreen->get_shader_param(pscreen, ptarget, - PIPE_SHADER_CAP_PREFERRED_IR); - - struct gl_program *linked_prog = NULL; - if (preferred_ir == PIPE_SHADER_IR_NIR) { - /* TODO only for GLSL VS/FS/CS for now: */ - switch (shader->Stage) { - case MESA_SHADER_VERTEX: - case MESA_SHADER_FRAGMENT: - case MESA_SHADER_COMPUTE: - linked_prog = st_nir_get_mesa_program(ctx, prog, shader); - default: - break; - } - } else { - linked_prog = get_mesa_program_tgsi(ctx, prog, shader); - } + struct gl_program *linked_prog = + get_mesa_program_tgsi(ctx, prog, shader); + st_set_prog_affected_state_flags(linked_prog); if (linked_prog) { - st_set_prog_affected_state_flags(linked_prog); + /* This is really conservative: */ + linked_prog->info.writes_memory = + linked_prog->info.num_ssbos || + linked_prog->info.num_images || + ctx->Extensions.ARB_bindless_texture || + (linked_prog->sh.LinkedTransformFeedback && + linked_prog->sh.LinkedTransformFeedback->NumVarying); + if (!ctx->Driver.ProgramStringNotify(ctx, _mesa_shader_stage_to_program(i), linked_prog)) { @@ -7002,41 +7370,3 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) return GL_TRUE; } - -void -st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, - const ubyte outputMapping[], - struct pipe_stream_output_info *so) -{ - if (!glsl_to_tgsi->shader_program->last_vert_prog) - return; - - struct gl_transform_feedback_info *info = - glsl_to_tgsi->shader_program->last_vert_prog->sh.LinkedTransformFeedback; - st_translate_stream_output_info2(info, outputMapping, so); -} - -void -st_translate_stream_output_info2(struct gl_transform_feedback_info *info, - const ubyte outputMapping[], - struct pipe_stream_output_info *so) -{ - unsigned i; - - for (i = 0; i < info->NumOutputs; i++) { - so->output[i].register_index = - outputMapping[info->Outputs[i].OutputRegister]; - so->output[i].start_component = info->Outputs[i].ComponentOffset; - so->output[i].num_components = info->Outputs[i].NumComponents; - so->output[i].output_buffer = info->Outputs[i].OutputBuffer; - so->output[i].dst_offset = info->Outputs[i].DstOffset; - so->output[i].stream = info->Outputs[i].StreamId; - } - - for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { - so->stride[i] = info->Buffers[i].Stride; - } - so->num_outputs = info->NumOutputs; -} - -} /* extern "C" */