X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fstate_tracker%2Fst_glsl_to_tgsi.cpp;h=7564119ac11a55c17e317bae254efc0966815253;hb=a2dc11a7818c04d8dc0324e8fcba98d60baea529;hp=9308eb4841e12eca8896d890e275d6a6cfce30ad;hpb=4191c1a57c1e806a078bfc5b074b557ff2b54c35;p=mesa.git diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 9308eb4841e..7564119ac11 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -32,15 +32,16 @@ #include "st_glsl_to_tgsi.h" -#include "glsl_parser_extras.h" -#include "ir_optimization.h" +#include "compiler/glsl/glsl_parser_extras.h" +#include "compiler/glsl/ir_optimization.h" +#include "compiler/glsl/program.h" #include "main/errors.h" #include "main/shaderobj.h" #include "main/uniforms.h" #include "main/shaderapi.h" +#include "main/shaderimage.h" #include "program/prog_instruction.h" -#include "program/sampler.h" #include "pipe/p_context.h" #include "pipe/p_screen.h" @@ -50,9 +51,11 @@ #include "util/u_memory.h" #include "st_program.h" #include "st_mesa_to_tgsi.h" +#include "st_format.h" +#include "st_glsl_types.h" +#include "st_nir.h" -#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \ (1 << PROGRAM_CONSTANT) | \ (1 << PROGRAM_UNIFORM)) @@ -88,7 +91,7 @@ public: this->is_double_vertex_input = false; } - st_src_reg(gl_register_file file, int index, int type) + st_src_reg(gl_register_file file, int index, enum glsl_base_type type) { this->type = type; this->file = file; @@ -104,7 +107,7 @@ public: this->is_double_vertex_input = false; } - st_src_reg(gl_register_file file, int index, int type, int index2D) + st_src_reg(gl_register_file file, int index, enum glsl_base_type type, int index2D) { this->type = type; this->file = file; @@ -143,7 +146,7 @@ public: int index2D; GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ int negate; /**< NEGATE_XYZW mask from mesa */ - int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ + enum glsl_base_type type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ /** Register index should be offset by the integer in this reg. */ st_src_reg *reladdr; st_src_reg *reladdr2; @@ -159,13 +162,12 @@ public: class st_dst_reg { public: - st_dst_reg(gl_register_file file, int writemask, int type, int index) + st_dst_reg(gl_register_file file, int writemask, enum glsl_base_type type, int index) { this->file = file; this->index = index; this->index2D = 0; this->writemask = writemask; - this->cond_mask = COND_TR; this->reladdr = NULL; this->reladdr2 = NULL; this->has_index2 = false; @@ -173,13 +175,12 @@ public: this->array_id = 0; } - st_dst_reg(gl_register_file file, int writemask, int type) + st_dst_reg(gl_register_file file, int writemask, enum glsl_base_type type) { this->file = file; this->index = 0; this->index2D = 0; this->writemask = writemask; - this->cond_mask = COND_TR; this->reladdr = NULL; this->reladdr2 = NULL; this->has_index2 = false; @@ -194,7 +195,6 @@ public: this->index = 0; this->index2D = 0; this->writemask = 0; - this->cond_mask = COND_TR; this->reladdr = NULL; this->reladdr2 = NULL; this->has_index2 = false; @@ -207,8 +207,7 @@ public: int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ int index2D; int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ - GLuint cond_mask:4; - int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ + enum glsl_base_type type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ /** Register index should be offset by the integer in this reg. */ st_src_reg *reladdr; st_src_reg *reladdr2; @@ -238,7 +237,6 @@ st_dst_reg::st_dst_reg(st_src_reg reg) this->file = reg.file; this->index = reg.index; this->writemask = WRITEMASK_XYZW; - this->cond_mask = COND_TR; this->reladdr = reg.reladdr; this->index2D = reg.index2D; this->reladdr2 = reg.reladdr2; @@ -258,15 +256,20 @@ public: GLboolean cond_update; bool saturate; st_src_reg sampler; /**< sampler register */ + int sampler_base; int sampler_array_size; /**< 1-based size of sampler array, 1 if not array */ int tex_target; /**< One of TEXTURE_*_INDEX */ glsl_base_type tex_type; GLboolean tex_shadow; + unsigned image_format; st_src_reg tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; unsigned tex_offset_num_offset; int dead_mask; /**< Used in dead code elimination */ + st_src_reg buffer; /**< buffer register */ + unsigned buffer_access; /**< buffer access type */ + class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ const struct tgsi_opcode_info *info; }; @@ -340,10 +343,10 @@ struct array_decl { unsigned mesa_index; unsigned array_id; unsigned array_size; - unsigned array_type; + enum glsl_base_type array_type; }; -static unsigned +static enum glsl_base_type find_array_type(struct array_decl *arrays, unsigned count, unsigned array_id) { unsigned i; @@ -373,7 +376,7 @@ public: struct gl_context *ctx; struct gl_program *prog; struct gl_shader_program *shader_program; - struct gl_shader *shader; + struct gl_linked_shader *shader; struct gl_shader_compiler_options *options; int next_temp; @@ -388,9 +391,13 @@ public: unsigned num_output_arrays; int num_address_regs; - int samplers_used; + uint32_t samplers_used; glsl_base_type sampler_types[PIPE_MAX_SAMPLERS]; int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */ + int buffers_used; + int images_used; + int image_targets[PIPE_MAX_SHADER_IMAGES]; + unsigned image_formats[PIPE_MAX_SHADER_IMAGES]; bool indirect_addr_consts; int wpos_transform_const; @@ -398,6 +405,7 @@ public: bool native_integers; bool have_sqrt; bool have_fma; + bool use_shared_memory; variable_storage *find_variable_storage(ir_variable *var); @@ -412,7 +420,7 @@ public: st_src_reg st_src_reg_for_double(double val); st_src_reg st_src_reg_for_float(float val); st_src_reg st_src_reg_for_int(int val); - st_src_reg st_src_reg_for_type(int type, int val); + st_src_reg st_src_reg_for_type(enum glsl_base_type type, int val); /** * \name Visit methods @@ -444,6 +452,14 @@ public: virtual void visit(ir_barrier *); /*@}*/ + void visit_expression(ir_expression *, st_src_reg *) ATTRIBUTE_NOINLINE; + + void visit_atomic_counter_intrinsic(ir_call *); + void visit_ssbo_intrinsic(ir_call *); + void visit_membar_intrinsic(ir_call *); + void visit_shared_intrinsic(ir_call *); + void visit_image_intrinsic(ir_call *); + st_src_reg result; /** List of variable_storage */ @@ -495,6 +511,19 @@ public: void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); + void get_deref_offsets(ir_dereference *ir, + unsigned *array_size, + unsigned *base, + unsigned *index, + st_src_reg *reladdr); + void calc_deref_offsets(ir_dereference *head, + ir_dereference *tail, + unsigned *array_elements, + unsigned *base, + unsigned *index, + st_src_reg *indirect, + unsigned *location); + bool try_emit_mad(ir_expression *ir, int mul_operand); bool try_emit_mad_for_and_not(ir_expression *ir, @@ -557,6 +586,28 @@ swizzle_for_size(int size) return size_swizzles[size - 1]; } +static bool +is_resource_instruction(unsigned opcode) +{ + switch (opcode) { + case TGSI_OPCODE_RESQ: + case TGSI_OPCODE_LOAD: + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMXCHG: + case TGSI_OPCODE_ATOMCAS: + case TGSI_OPCODE_ATOMAND: + case TGSI_OPCODE_ATOMOR: + case TGSI_OPCODE_ATOMXOR: + case TGSI_OPCODE_ATOMUMIN: + case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_ATOMIMIN: + case TGSI_OPCODE_ATOMIMAX: + return true; + default: + return false; + } +} + static unsigned num_inst_dst_regs(const glsl_to_tgsi_instruction *op) { @@ -566,7 +617,8 @@ num_inst_dst_regs(const glsl_to_tgsi_instruction *op) static unsigned num_inst_src_regs(const glsl_to_tgsi_instruction *op) { - return op->info->is_tex ? op->info->num_src - 1 : op->info->num_src; + return op->info->is_tex || is_resource_instruction(op->op) ? + op->info->num_src - 1 : op->info->num_src; } glsl_to_tgsi_instruction * @@ -577,7 +629,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, { glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); int num_reladdr = 0, i, j; - bool dst_is_double[2]; + bool dst_is_64bit[2]; op = get_opcode(ir, op, dst, src0, src1); @@ -661,8 +713,6 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, } } - this->instructions.push_tail(inst); - /* * This section contains the double processing. * GLSL just represents doubles as single channel values, @@ -682,23 +732,21 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, * GLSL [0].w -> TGSI [1].zw */ for (j = 0; j < 2; j++) { - dst_is_double[j] = false; - if (inst->dst[j].type == GLSL_TYPE_DOUBLE) - dst_is_double[j] = true; - else if (inst->dst[j].file == PROGRAM_OUTPUT && inst->dst[j].type == GLSL_TYPE_ARRAY) { - unsigned type = find_array_type(this->output_arrays, this->num_output_arrays, inst->dst[j].array_id); - if (type == GLSL_TYPE_DOUBLE) - dst_is_double[j] = true; + dst_is_64bit[j] = glsl_base_type_is_64bit(inst->dst[j].type); + if (!dst_is_64bit[j] && inst->dst[j].file == PROGRAM_OUTPUT && inst->dst[j].type == GLSL_TYPE_ARRAY) { + enum glsl_base_type type = find_array_type(this->output_arrays, this->num_output_arrays, inst->dst[j].array_id); + if (glsl_base_type_is_64bit(type)) + dst_is_64bit[j] = true; } } - if (dst_is_double[0] || dst_is_double[1] || - inst->src[0].type == GLSL_TYPE_DOUBLE) { + if (dst_is_64bit[0] || dst_is_64bit[1] || + glsl_base_type_is_64bit(inst->src[0].type)) { glsl_to_tgsi_instruction *dinst = NULL; int initial_src_swz[4], initial_src_idx[4]; int initial_dst_idx[2], initial_dst_writemask[2]; /* select the writemask for dst0 or dst1 */ - unsigned writemask = inst->dst[0].file == PROGRAM_UNDEFINED ? inst->dst[1].writemask : inst->dst[0].writemask; + unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED ? inst->dst[0].writemask : inst->dst[1].writemask; /* copy out the writemask, index and swizzles for all src/dsts. */ for (j = 0; j < 2; j++) { @@ -715,10 +763,22 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, * scan all the components in the dst writemask * generate an instruction for each of them if required. */ + st_src_reg addr; while (writemask) { int i = u_bit_scan(&writemask); + /* before emitting the instruction, see if we have to adjust store + * address */ + if (i > 1 && inst->op == TGSI_OPCODE_STORE && + addr.file == PROGRAM_UNDEFINED) { + /* We have to advance the buffer address by 16 */ + addr = get_temp(glsl_type::uint_type); + emit_asm(ir, TGSI_OPCODE_UADD, st_dst_reg(addr), + inst->src[0], st_src_reg_for_int(16)); + } + + /* first time use previous instruction */ if (dinst == NULL) { dinst = inst; @@ -728,16 +788,21 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, *dinst = *inst; dinst->next = NULL; dinst->prev = NULL; - this->instructions.push_tail(dinst); } + this->instructions.push_tail(dinst); /* modify the destination if we are splitting */ for (j = 0; j < 2; j++) { - if (dst_is_double[j]) { + if (dst_is_64bit[j]) { dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY; dinst->dst[j].index = initial_dst_idx[j]; - if (i > 1) + if (i > 1) { + if (dinst->op == TGSI_OPCODE_STORE) { + dinst->src[0] = addr; + } else { dinst->dst[j].index++; + } + } } else { /* if we aren't writing to a double, just get the bit of the initial writemask for this channel */ @@ -749,7 +814,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, for (j = 0; j < 4; j++) { int swz = GET_SWZ(initial_src_swz[j], i); - if (dinst->src[j].type == GLSL_TYPE_DOUBLE) { + if (glsl_base_type_is_64bit(dinst->src[j].type)) { dinst->src[j].index = initial_src_idx[j]; if (swz > 1) { dinst->src[j].double_reg2 = true; @@ -766,13 +831,15 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, - F2D is a float src0, DLDEXP is integer src1 */ if (op == TGSI_OPCODE_F2D || op == TGSI_OPCODE_DLDEXP || - (op == TGSI_OPCODE_UCMP && dst_is_double[0])) { + (op == TGSI_OPCODE_UCMP && dst_is_64bit[0])) { dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz); } } } } inst = dinst; + } else { + this->instructions.push_tail(inst); } @@ -797,7 +864,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1) { - int type = GLSL_TYPE_FLOAT; + enum glsl_base_type type = GLSL_TYPE_FLOAT; if (op == TGSI_OPCODE_MOV) return op; @@ -807,7 +874,9 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, assert(src1.type != GLSL_TYPE_ARRAY); assert(src1.type != GLSL_TYPE_STRUCT); - if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE) + if (is_resource_instruction(op)) + type = src1.type; + else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE) type = GLSL_TYPE_DOUBLE; else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) type = GLSL_TYPE_FLOAT; @@ -891,6 +960,9 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, case3fid(FLR, FLR, DFLR); case3fid(ROUND, ROUND, DROUND); + case2iu(ATOMIMAX, ATOMUMAX); + case2iu(ATOMIMIN, ATOMUMIN); + default: break; } @@ -1081,7 +1153,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val) } st_src_reg -glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) +glsl_to_tgsi_visitor::st_src_reg_for_type(enum glsl_base_type type, int val) { if (native_integers) return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : @@ -1093,71 +1165,13 @@ glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) static int attrib_type_size(const struct glsl_type *type, bool is_vs_input) { - unsigned int i; - int size; - - switch (type->base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - if (type->is_matrix()) { - return type->matrix_columns; - } else { - /* Regardless of size of vector, it gets a vec4. This is bad - * packing for things like floats, but otherwise arrays become a - * mess. Hopefully a later pass over the code can pack scalars - * down if appropriate. - */ - return 1; - } - break; - case GLSL_TYPE_DOUBLE: - if (type->is_matrix()) { - if (type->vector_elements <= 2 || is_vs_input) - return type->matrix_columns; - else - return type->matrix_columns * 2; - } else { - /* For doubles if we have a double or dvec2 they fit in one - * vec4, else they need 2 vec4s. - */ - if (type->vector_elements <= 2 || is_vs_input) - return 1; - else - return 2; - } - break; - case GLSL_TYPE_ARRAY: - assert(type->length > 0); - return attrib_type_size(type->fields.array, is_vs_input) * type->length; - case GLSL_TYPE_STRUCT: - size = 0; - for (i = 0; i < type->length; i++) { - size += attrib_type_size(type->fields.structure[i].type, is_vs_input); - } - return size; - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_IMAGE: - case GLSL_TYPE_SUBROUTINE: - /* Samplers take up one slot in UNIFORMS[], but they're baked in - * at link time. - */ - return 1; - case GLSL_TYPE_ATOMIC_UINT: - case GLSL_TYPE_INTERFACE: - case GLSL_TYPE_VOID: - case GLSL_TYPE_ERROR: - assert(!"Invalid type in type_size"); - break; - } - return 0; + return st_glsl_attrib_type_size(type, is_vs_input); } static int type_size(const struct glsl_type *type) { - return attrib_type_size(type, false); + return st_glsl_type_size(type); } /** @@ -1464,10 +1478,7 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, void glsl_to_tgsi_visitor::visit(ir_expression *ir) { - unsigned int operand; st_src_reg op[ARRAY_SIZE(ir->operands)]; - st_src_reg result_src; - st_dst_reg result_dst; /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) */ @@ -1490,7 +1501,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) if (ir->operation == ir_quadop_vector) assert(!"ir_quadop_vector should have been lowered"); - for (operand = 0; operand < ir->get_num_operands(); operand++) { + for (unsigned int operand = 0; operand < ir->get_num_operands(); operand++) { this->result.file = PROGRAM_UNDEFINED; ir->operands[operand]->accept(this); if (this->result.file == PROGRAM_UNDEFINED) { @@ -1507,6 +1518,19 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) assert(!ir->operands[operand]->type->is_matrix()); } + visit_expression(ir, op); +} + +/* The non-recursive part of the expression visitor lives in a separate + * function and should be prevented from being inlined, to avoid a stack + * explosion when deeply nested expressions are visited. + */ +void +glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) +{ + st_src_reg result_src; + st_dst_reg result_dst; + int vector_elements = ir->operands[0]->type->vector_elements; if (ir->operands[1]) { vector_elements = MAX2(vector_elements, @@ -1875,13 +1899,15 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) if (have_sqrt) { emit_scalar(ir, TGSI_OPCODE_SQRT, result_dst, op[0]); } else { - /* sqrt(x) = x * rsq(x). */ - emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); - emit_asm(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); - /* For incoming channels <= 0, set the result to 0. */ - op[0].negate = ~op[0].negate; - emit_asm(ir, TGSI_OPCODE_CMP, result_dst, - op[0], result_src, st_src_reg_for_float(0.0)); + /* This is the only instruction sequence that makes the game "Risen" + * render correctly. ABS is not required for the game, but since GLSL + * declares negative values as "undefined", allowing us to do whatever + * we want, I choose to use ABS to match DX9 and pre-GLSL RSQ + * behavior. + */ + emit_scalar(ir, TGSI_OPCODE_ABS, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, result_src); + emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, result_src); } break; case ir_unop_rsq: @@ -1903,6 +1929,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_u2i: /* Converting between signed and unsigned integers is a no-op. */ result_src = op[0]; + result_src.type = result_dst.type; break; case ir_unop_b2i: if (native_integers) { @@ -1931,12 +1958,14 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_bitcast_f2i: - result_src = op[0]; - result_src.type = GLSL_TYPE_INT; - break; case ir_unop_bitcast_f2u: - result_src = op[0]; - result_src.type = GLSL_TYPE_UINT; + /* Make sure we don't propagate the negate modifier to integer opcodes. */ + if (op[0].negate) + emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); + else + result_src = op[0]; + result_src.type = ir->operation == ir_unop_bitcast_f2i ? GLSL_TYPE_INT : + GLSL_TYPE_UINT; break; case ir_unop_bitcast_i2f: case ir_unop_bitcast_u2f: @@ -2064,7 +2093,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) } cbuf.swizzle = swizzle_for_size(ir->type->vector_elements); - if (cbuf.type == GLSL_TYPE_DOUBLE) + if (glsl_base_type_is_64bit(cbuf.type)) cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8, const_offset % 16 / 8, const_offset % 16 / 8, @@ -2125,9 +2154,29 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_interpolate_at_centroid: emit_asm(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]); break; - case ir_binop_interpolate_at_offset: - emit_asm(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], op[1]); + case ir_binop_interpolate_at_offset: { + /* The y coordinate needs to be flipped for the default fb */ + static const gl_state_index transform_y_state[STATE_LENGTH] + = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM }; + + unsigned transform_y_index = + _mesa_add_state_reference(this->prog->Parameters, + transform_y_state); + + st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR, + transform_y_index, + glsl_type::vec4_type); + transform_y.swizzle = SWIZZLE_XXXX; + + st_src_reg temp = get_temp(glsl_type::vec2_type); + st_dst_reg temp_dst = st_dst_reg(temp); + + emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[1]); + temp_dst.writemask = WRITEMASK_Y; + emit_asm(ir, TGSI_OPCODE_MUL, temp_dst, transform_y, op[1]); + emit_asm(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], temp); break; + } case ir_binop_interpolate_at_sample: emit_asm(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]); break; @@ -2170,6 +2219,32 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit_asm(ir, TGSI_OPCODE_UP2H, result_dst, op[0]); break; + case ir_unop_get_buffer_size: { + ir_constant *const_offset = ir->operands[0]->as_constant(); + st_src_reg buffer( + PROGRAM_BUFFER, + ctx->Const.Program[shader->Stage].MaxAtomicBuffers + + (const_offset ? const_offset->value.u[0] : 0), + GLSL_TYPE_UINT); + if (!const_offset) { + buffer.reladdr = ralloc(mem_ctx, st_src_reg); + *buffer.reladdr = op[0]; + emit_arl(ir, sampler_reladdr, op[0]); + } + emit_asm(ir, TGSI_OPCODE_RESQ, result_dst)->buffer = buffer; + break; + } + + case ir_unop_vote_any: + emit_asm(ir, TGSI_OPCODE_VOTE_ANY, result_dst, op[0]); + break; + case ir_unop_vote_all: + emit_asm(ir, TGSI_OPCODE_VOTE_ALL, result_dst, op[0]); + break; + case ir_unop_vote_eq: + emit_asm(ir, TGSI_OPCODE_VOTE_EQ, result_dst, op[0]); + break; + case ir_unop_pack_snorm_2x16: case ir_unop_pack_unorm_2x16: case ir_unop_pack_snorm_4x8: @@ -2177,14 +2252,9 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_unpack_snorm_2x16: case ir_unop_unpack_unorm_2x16: - case ir_unop_unpack_half_2x16_split_x: - case ir_unop_unpack_half_2x16_split_y: case ir_unop_unpack_snorm_4x8: case ir_unop_unpack_unorm_4x8: - case ir_binop_pack_half_2x16_split: - case ir_binop_bfm: - case ir_triop_bfi: case ir_quadop_vector: case ir_binop_vector_extract: case ir_triop_vector_insert: @@ -2195,10 +2265,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) */ assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()"); break; - - case ir_unop_get_buffer_size: - assert(!"Not implemented yet"); - break; } this->result = result_src; @@ -2291,7 +2357,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) switch (var->data.mode) { case ir_var_uniform: entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, - var->data.location); + var->data.param_index); this->variables.push_tail(entry); break; case ir_var_shader_in: @@ -2393,7 +2459,8 @@ shrink_array_declarations(struct array_decl *arrays, unsigned count, GLbitfield64 double_usage_mask, GLbitfield patch_usage_mask) { - unsigned i, j; + unsigned i; + int j; /* Fix array declarations by removing unused array elements at both ends * of the arrays. For example, mat4[3] where only mat[1] is used. @@ -2402,7 +2469,7 @@ shrink_array_declarations(struct array_decl *arrays, unsigned count, struct array_decl *decl = &arrays[i]; /* Shrink the beginning. */ - for (j = 0; j < decl->array_size; j++) { + for (j = 0; j < (int)decl->array_size; j++) { if (decl->mesa_index >= VARYING_SLOT_PATCH0) { if (patch_usage_mask & BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j)) @@ -2757,7 +2824,7 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type * } l->index++; r->index++; - if (type->is_dual_slot_double()) { + if (type->is_dual_slot()) { l->index++; if (r->is_double_vertex_input == false) r->index++; @@ -2783,7 +2850,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); if (ir->lhs->type->is_array() || ir->lhs->type->without_array()->is_matrix()) { - if (ir->lhs->type->without_array()->is_double()) { + if (ir->lhs->type->without_array()->is_64bit()) { switch (ir->lhs->type->without_array()->vector_elements) { case 1: l.writemask = WRITEMASK_X; @@ -2802,7 +2869,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) l.writemask = WRITEMASK_XYZW; } } else if (ir->lhs->type->is_scalar() && - !ir->lhs->type->is_double() && + !ir->lhs->type->is_64bit() && ir->lhs->variable_referenced()->data.mode == ir_var_shader_out) { /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the * FINISHME: W component of fragment shader output zero, work correctly. @@ -3075,14 +3142,558 @@ glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) return entry; } +void +glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) +{ + const char *callee = ir->callee->function_name(); + exec_node *param = ir->actual_parameters.get_head(); + ir_dereference *deref = static_cast(param); + ir_variable *location = deref->variable_referenced(); + + st_src_reg buffer( + PROGRAM_BUFFER, location->data.binding, GLSL_TYPE_ATOMIC_UINT); + + /* Calculate the surface offset */ + st_src_reg offset; + unsigned array_size = 0, base = 0, index = 0; + + get_deref_offsets(deref, &array_size, &base, &index, &offset); + + if (offset.file != PROGRAM_UNDEFINED) { + emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset), + offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE)); + emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset), + offset, st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE)); + } else { + offset = st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE); + } + + ir->return_deref->accept(this); + st_dst_reg dst(this->result); + dst.writemask = WRITEMASK_X; + + glsl_to_tgsi_instruction *inst; + + if (!strcmp("__intrinsic_atomic_read", callee)) { + inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset); + } else if (!strcmp("__intrinsic_atomic_increment", callee)) { + inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset, + st_src_reg_for_int(1)); + } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) { + inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset, + st_src_reg_for_int(-1)); + emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1)); + } else { + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + + st_src_reg data = this->result, data2 = undef_src; + unsigned opcode; + if (!strcmp("__intrinsic_atomic_add", callee)) + opcode = TGSI_OPCODE_ATOMUADD; + else if (!strcmp("__intrinsic_atomic_min", callee)) + opcode = TGSI_OPCODE_ATOMIMIN; + else if (!strcmp("__intrinsic_atomic_max", callee)) + opcode = TGSI_OPCODE_ATOMIMAX; + else if (!strcmp("__intrinsic_atomic_and", callee)) + opcode = TGSI_OPCODE_ATOMAND; + else if (!strcmp("__intrinsic_atomic_or", callee)) + opcode = TGSI_OPCODE_ATOMOR; + else if (!strcmp("__intrinsic_atomic_xor", callee)) + opcode = TGSI_OPCODE_ATOMXOR; + else if (!strcmp("__intrinsic_atomic_exchange", callee)) + opcode = TGSI_OPCODE_ATOMXCHG; + else if (!strcmp("__intrinsic_atomic_comp_swap", callee)) { + opcode = TGSI_OPCODE_ATOMCAS; + param = param->get_next(); + val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + data2 = this->result; + } else if (!strcmp("__intrinsic_atomic_sub", callee)) { + opcode = TGSI_OPCODE_ATOMUADD; + st_src_reg res = get_temp(glsl_type::uvec4_type); + st_dst_reg dstres = st_dst_reg(res); + dstres.writemask = dst.writemask; + emit_asm(ir, TGSI_OPCODE_INEG, dstres, data); + data = res; + } else { + assert(!"Unexpected intrinsic"); + return; + } + + inst = emit_asm(ir, opcode, dst, offset, data, data2); + } + + inst->buffer = buffer; +} + +void +glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir) +{ + const char *callee = ir->callee->function_name(); + exec_node *param = ir->actual_parameters.get_head(); + + ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); + + ir_constant *const_block = block->as_constant(); + + st_src_reg buffer( + PROGRAM_BUFFER, + ctx->Const.Program[shader->Stage].MaxAtomicBuffers + + (const_block ? const_block->value.u[0] : 0), + GLSL_TYPE_UINT); + + if (!const_block) { + block->accept(this); + buffer.reladdr = ralloc(mem_ctx, st_src_reg); + *buffer.reladdr = this->result; + emit_arl(ir, sampler_reladdr, this->result); + } + + /* Calculate the surface offset */ + offset->accept(this); + st_src_reg off = this->result; + + st_dst_reg dst = undef_dst; + if (ir->return_deref) { + ir->return_deref->accept(this); + dst = st_dst_reg(this->result); + dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; + } + + glsl_to_tgsi_instruction *inst; + + if (!strcmp("__intrinsic_load_ssbo", callee)) { + inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off); + if (dst.type == GLSL_TYPE_BOOL) + emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), st_src_reg_for_int(0)); + } else if (!strcmp("__intrinsic_store_ssbo", callee)) { + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + + param = param->get_next(); + ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); + assert(write_mask); + dst.writemask = write_mask->value.u[0]; + + dst.type = this->result.type; + inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result); + } else { + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + + st_src_reg data = this->result, data2 = undef_src; + unsigned opcode; + if (!strcmp("__intrinsic_atomic_add_ssbo", callee)) + opcode = TGSI_OPCODE_ATOMUADD; + else if (!strcmp("__intrinsic_atomic_min_ssbo", callee)) + opcode = TGSI_OPCODE_ATOMIMIN; + else if (!strcmp("__intrinsic_atomic_max_ssbo", callee)) + opcode = TGSI_OPCODE_ATOMIMAX; + else if (!strcmp("__intrinsic_atomic_and_ssbo", callee)) + opcode = TGSI_OPCODE_ATOMAND; + else if (!strcmp("__intrinsic_atomic_or_ssbo", callee)) + opcode = TGSI_OPCODE_ATOMOR; + else if (!strcmp("__intrinsic_atomic_xor_ssbo", callee)) + opcode = TGSI_OPCODE_ATOMXOR; + else if (!strcmp("__intrinsic_atomic_exchange_ssbo", callee)) + opcode = TGSI_OPCODE_ATOMXCHG; + else if (!strcmp("__intrinsic_atomic_comp_swap_ssbo", callee)) { + opcode = TGSI_OPCODE_ATOMCAS; + param = param->get_next(); + val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + data2 = this->result; + } else { + assert(!"Unexpected intrinsic"); + return; + } + + inst = emit_asm(ir, opcode, dst, off, data, data2); + } + + param = param->get_next(); + ir_constant *access = NULL; + if (!param->is_tail_sentinel()) { + access = ((ir_instruction *)param)->as_constant(); + assert(access); + } + + /* The emit_asm() might have actually split the op into pieces, e.g. for + * double stores. We have to go back and fix up all the generated ops. + */ + unsigned op = inst->op; + do { + inst->buffer = buffer; + if (access) + inst->buffer_access = access->value.u[0]; + inst = (glsl_to_tgsi_instruction *)inst->get_prev(); + if (inst->op == TGSI_OPCODE_UADD) + inst = (glsl_to_tgsi_instruction *)inst->get_prev(); + } while (inst && inst->op == op && inst->buffer.file == PROGRAM_UNDEFINED); +} + +void +glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir) +{ + const char *callee = ir->callee->function_name(); + + if (!strcmp("__intrinsic_memory_barrier", callee)) + emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, + st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER | + TGSI_MEMBAR_ATOMIC_BUFFER | + TGSI_MEMBAR_SHADER_IMAGE | + TGSI_MEMBAR_SHARED)); + else if (!strcmp("__intrinsic_memory_barrier_atomic_counter", callee)) + emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, + st_src_reg_for_int(TGSI_MEMBAR_ATOMIC_BUFFER)); + else if (!strcmp("__intrinsic_memory_barrier_buffer", callee)) + emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, + st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER)); + else if (!strcmp("__intrinsic_memory_barrier_image", callee)) + emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, + st_src_reg_for_int(TGSI_MEMBAR_SHADER_IMAGE)); + else if (!strcmp("__intrinsic_memory_barrier_shared", callee)) + emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, + st_src_reg_for_int(TGSI_MEMBAR_SHARED)); + else if (!strcmp("__intrinsic_group_memory_barrier", callee)) + emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, + st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER | + TGSI_MEMBAR_ATOMIC_BUFFER | + TGSI_MEMBAR_SHADER_IMAGE | + TGSI_MEMBAR_SHARED | + TGSI_MEMBAR_THREAD_GROUP)); + else + assert(!"Unexpected memory barrier intrinsic"); +} + +void +glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir) +{ + const char *callee = ir->callee->function_name(); + exec_node *param = ir->actual_parameters.get_head(); + + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); + + st_src_reg buffer(PROGRAM_MEMORY, 0, GLSL_TYPE_UINT); + + /* Calculate the surface offset */ + offset->accept(this); + st_src_reg off = this->result; + + st_dst_reg dst = undef_dst; + if (ir->return_deref) { + ir->return_deref->accept(this); + dst = st_dst_reg(this->result); + dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; + } + + glsl_to_tgsi_instruction *inst; + + if (!strcmp("__intrinsic_load_shared", callee)) { + inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off); + inst->buffer = buffer; + } else if (!strcmp("__intrinsic_store_shared", callee)) { + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + + param = param->get_next(); + ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); + assert(write_mask); + dst.writemask = write_mask->value.u[0]; + + dst.type = this->result.type; + inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result); + inst->buffer = buffer; + } else { + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + + st_src_reg data = this->result, data2 = undef_src; + unsigned opcode; + if (!strcmp("__intrinsic_atomic_add_shared", callee)) + opcode = TGSI_OPCODE_ATOMUADD; + else if (!strcmp("__intrinsic_atomic_min_shared", callee)) + opcode = TGSI_OPCODE_ATOMIMIN; + else if (!strcmp("__intrinsic_atomic_max_shared", callee)) + opcode = TGSI_OPCODE_ATOMIMAX; + else if (!strcmp("__intrinsic_atomic_and_shared", callee)) + opcode = TGSI_OPCODE_ATOMAND; + else if (!strcmp("__intrinsic_atomic_or_shared", callee)) + opcode = TGSI_OPCODE_ATOMOR; + else if (!strcmp("__intrinsic_atomic_xor_shared", callee)) + opcode = TGSI_OPCODE_ATOMXOR; + else if (!strcmp("__intrinsic_atomic_exchange_shared", callee)) + opcode = TGSI_OPCODE_ATOMXCHG; + else if (!strcmp("__intrinsic_atomic_comp_swap_shared", callee)) { + opcode = TGSI_OPCODE_ATOMCAS; + param = param->get_next(); + val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + data2 = this->result; + } else { + assert(!"Unexpected intrinsic"); + return; + } + + inst = emit_asm(ir, opcode, dst, off, data, data2); + inst->buffer = buffer; + } +} + +void +glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) +{ + const char *callee = ir->callee->function_name(); + exec_node *param = ir->actual_parameters.get_head(); + + ir_dereference *img = (ir_dereference *)param; + const ir_variable *imgvar = img->variable_referenced(); + const glsl_type *type = imgvar->type->without_array(); + unsigned sampler_array_size = 1, sampler_base = 0; + + st_src_reg reladdr; + st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT); + + get_deref_offsets(img, &sampler_array_size, &sampler_base, + (unsigned int *)&image.index, &reladdr); + if (reladdr.file != PROGRAM_UNDEFINED) { + image.reladdr = ralloc(mem_ctx, st_src_reg); + *image.reladdr = reladdr; + emit_arl(ir, sampler_reladdr, reladdr); + } + + st_dst_reg dst = undef_dst; + if (ir->return_deref) { + ir->return_deref->accept(this); + dst = st_dst_reg(this->result); + dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; + } + + glsl_to_tgsi_instruction *inst; + + if (!strcmp("__intrinsic_image_size", callee)) { + dst.writemask = WRITEMASK_XYZ; + inst = emit_asm(ir, TGSI_OPCODE_RESQ, dst); + } else if (!strcmp("__intrinsic_image_samples", callee)) { + st_src_reg res = get_temp(glsl_type::ivec4_type); + st_dst_reg dstres = st_dst_reg(res); + dstres.writemask = WRITEMASK_W; + inst = emit_asm(ir, TGSI_OPCODE_RESQ, dstres); + res.swizzle = SWIZZLE_WWWW; + emit_asm(ir, TGSI_OPCODE_MOV, dst, res); + } else { + st_src_reg arg1 = undef_src, arg2 = undef_src; + st_src_reg coord; + st_dst_reg coord_dst; + coord = get_temp(glsl_type::ivec4_type); + coord_dst = st_dst_reg(coord); + coord_dst.writemask = (1 << type->coordinate_components()) - 1; + param = param->get_next(); + ((ir_dereference *)param)->accept(this); + emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); + coord.swizzle = SWIZZLE_XXXX; + switch (type->coordinate_components()) { + case 4: assert(!"unexpected coord count"); + /* fallthrough */ + case 3: coord.swizzle |= SWIZZLE_Z << 6; + /* fallthrough */ + case 2: coord.swizzle |= SWIZZLE_Y << 3; + } + + if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { + param = param->get_next(); + ((ir_dereference *)param)->accept(this); + st_src_reg sample = this->result; + sample.swizzle = SWIZZLE_XXXX; + coord_dst.writemask = WRITEMASK_W; + emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample); + coord.swizzle |= SWIZZLE_W << 9; + } + + param = param->get_next(); + if (!param->is_tail_sentinel()) { + ((ir_dereference *)param)->accept(this); + arg1 = this->result; + param = param->get_next(); + } + + if (!param->is_tail_sentinel()) { + ((ir_dereference *)param)->accept(this); + arg2 = this->result; + param = param->get_next(); + } + + assert(param->is_tail_sentinel()); + + unsigned opcode; + if (!strcmp("__intrinsic_image_load", callee)) + opcode = TGSI_OPCODE_LOAD; + else if (!strcmp("__intrinsic_image_store", callee)) + opcode = TGSI_OPCODE_STORE; + else if (!strcmp("__intrinsic_image_atomic_add", callee)) + opcode = TGSI_OPCODE_ATOMUADD; + else if (!strcmp("__intrinsic_image_atomic_min", callee)) + opcode = TGSI_OPCODE_ATOMIMIN; + else if (!strcmp("__intrinsic_image_atomic_max", callee)) + opcode = TGSI_OPCODE_ATOMIMAX; + else if (!strcmp("__intrinsic_image_atomic_and", callee)) + opcode = TGSI_OPCODE_ATOMAND; + else if (!strcmp("__intrinsic_image_atomic_or", callee)) + opcode = TGSI_OPCODE_ATOMOR; + else if (!strcmp("__intrinsic_image_atomic_xor", callee)) + opcode = TGSI_OPCODE_ATOMXOR; + else if (!strcmp("__intrinsic_image_atomic_exchange", callee)) + opcode = TGSI_OPCODE_ATOMXCHG; + else if (!strcmp("__intrinsic_image_atomic_comp_swap", callee)) + opcode = TGSI_OPCODE_ATOMCAS; + else { + assert(!"Unexpected intrinsic"); + return; + } + + inst = emit_asm(ir, opcode, dst, coord, arg1, arg2); + if (opcode == TGSI_OPCODE_STORE) + inst->dst[0].writemask = WRITEMASK_XYZW; + } + + inst->buffer = image; + inst->sampler_array_size = sampler_array_size; + inst->sampler_base = sampler_base; + + switch (type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: + inst->tex_target = (type->sampler_array) + ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; + break; + case GLSL_SAMPLER_DIM_2D: + inst->tex_target = (type->sampler_array) + ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; + break; + case GLSL_SAMPLER_DIM_3D: + inst->tex_target = TEXTURE_3D_INDEX; + break; + case GLSL_SAMPLER_DIM_CUBE: + inst->tex_target = (type->sampler_array) + ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX; + break; + case GLSL_SAMPLER_DIM_RECT: + inst->tex_target = TEXTURE_RECT_INDEX; + break; + case GLSL_SAMPLER_DIM_BUF: + inst->tex_target = TEXTURE_BUFFER_INDEX; + break; + case GLSL_SAMPLER_DIM_EXTERNAL: + inst->tex_target = TEXTURE_EXTERNAL_INDEX; + break; + case GLSL_SAMPLER_DIM_MS: + inst->tex_target = (type->sampler_array) + ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX; + break; + default: + assert(!"Should not get here."); + } + + inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx), + _mesa_get_shader_image_format(imgvar->data.image_format)); + + if (imgvar->data.image_coherent) + inst->buffer_access |= TGSI_MEMORY_COHERENT; + if (imgvar->data.image_restrict) + inst->buffer_access |= TGSI_MEMORY_RESTRICT; + if (imgvar->data.image_volatile) + inst->buffer_access |= TGSI_MEMORY_VOLATILE; +} + void glsl_to_tgsi_visitor::visit(ir_call *ir) { glsl_to_tgsi_instruction *call_inst; ir_function_signature *sig = ir->callee; - function_entry *entry = get_function_signature(sig); + const char *callee = sig->function_name(); + function_entry *entry; int i; + /* Filter out intrinsics */ + if (!strcmp("__intrinsic_atomic_read", callee) || + !strcmp("__intrinsic_atomic_increment", callee) || + !strcmp("__intrinsic_atomic_predecrement", callee) || + !strcmp("__intrinsic_atomic_add", callee) || + !strcmp("__intrinsic_atomic_sub", callee) || + !strcmp("__intrinsic_atomic_min", callee) || + !strcmp("__intrinsic_atomic_max", callee) || + !strcmp("__intrinsic_atomic_and", callee) || + !strcmp("__intrinsic_atomic_or", callee) || + !strcmp("__intrinsic_atomic_xor", callee) || + !strcmp("__intrinsic_atomic_exchange", callee) || + !strcmp("__intrinsic_atomic_comp_swap", callee)) { + visit_atomic_counter_intrinsic(ir); + return; + } + + if (!strcmp("__intrinsic_load_ssbo", callee) || + !strcmp("__intrinsic_store_ssbo", callee) || + !strcmp("__intrinsic_atomic_add_ssbo", callee) || + !strcmp("__intrinsic_atomic_min_ssbo", callee) || + !strcmp("__intrinsic_atomic_max_ssbo", callee) || + !strcmp("__intrinsic_atomic_and_ssbo", callee) || + !strcmp("__intrinsic_atomic_or_ssbo", callee) || + !strcmp("__intrinsic_atomic_xor_ssbo", callee) || + !strcmp("__intrinsic_atomic_exchange_ssbo", callee) || + !strcmp("__intrinsic_atomic_comp_swap_ssbo", callee)) { + visit_ssbo_intrinsic(ir); + return; + } + + if (!strcmp("__intrinsic_memory_barrier", callee) || + !strcmp("__intrinsic_memory_barrier_atomic_counter", callee) || + !strcmp("__intrinsic_memory_barrier_buffer", callee) || + !strcmp("__intrinsic_memory_barrier_image", callee) || + !strcmp("__intrinsic_memory_barrier_shared", callee) || + !strcmp("__intrinsic_group_memory_barrier", callee)) { + visit_membar_intrinsic(ir); + return; + } + + if (!strcmp("__intrinsic_load_shared", callee) || + !strcmp("__intrinsic_store_shared", callee) || + !strcmp("__intrinsic_atomic_add_shared", callee) || + !strcmp("__intrinsic_atomic_min_shared", callee) || + !strcmp("__intrinsic_atomic_max_shared", callee) || + !strcmp("__intrinsic_atomic_and_shared", callee) || + !strcmp("__intrinsic_atomic_or_shared", callee) || + !strcmp("__intrinsic_atomic_xor_shared", callee) || + !strcmp("__intrinsic_atomic_exchange_shared", callee) || + !strcmp("__intrinsic_atomic_comp_swap_shared", callee)) { + visit_shared_intrinsic(ir); + return; + } + + if (!strcmp("__intrinsic_image_load", callee) || + !strcmp("__intrinsic_image_store", callee) || + !strcmp("__intrinsic_image_atomic_add", callee) || + !strcmp("__intrinsic_image_atomic_min", callee) || + !strcmp("__intrinsic_image_atomic_max", callee) || + !strcmp("__intrinsic_image_atomic_and", callee) || + !strcmp("__intrinsic_image_atomic_or", callee) || + !strcmp("__intrinsic_image_atomic_xor", callee) || + !strcmp("__intrinsic_image_atomic_exchange", callee) || + !strcmp("__intrinsic_image_atomic_comp_swap", callee) || + !strcmp("__intrinsic_image_size", callee) || + !strcmp("__intrinsic_image_samples", callee)) { + visit_image_intrinsic(ir); + return; + } + + entry = get_function_signature(sig); /* Process in parameters. */ foreach_two_lists(formal_node, &sig->parameters, actual_node, &ir->actual_parameters) { @@ -3102,7 +3713,6 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) l.index = storage->index; l.reladdr = NULL; l.writemask = WRITEMASK_XYZW; - l.cond_mask = COND_TR; for (i = 0; i < type_size(param->type); i++) { emit_asm(ir, TGSI_OPCODE_MOV, l, r); @@ -3149,18 +3759,113 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) this->result = entry->return_reg; } +void +glsl_to_tgsi_visitor::calc_deref_offsets(ir_dereference *head, + ir_dereference *tail, + unsigned *array_elements, + unsigned *base, + unsigned *index, + st_src_reg *indirect, + unsigned *location) +{ + switch (tail->ir_type) { + case ir_type_dereference_record: { + ir_dereference_record *deref_record = tail->as_dereference_record(); + const glsl_type *struct_type = deref_record->record->type; + int field_index = deref_record->record->type->field_index(deref_record->field); + + calc_deref_offsets(head, deref_record->record->as_dereference(), array_elements, base, index, indirect, location); + + assert(field_index >= 0); + *location += struct_type->record_location_offset(field_index); + break; + } + + case ir_type_dereference_array: { + ir_dereference_array *deref_arr = tail->as_dereference_array(); + ir_constant *array_index = deref_arr->array_index->constant_expression_value(); + + if (!array_index) { + st_src_reg temp_reg; + st_dst_reg temp_dst; + + temp_reg = get_temp(glsl_type::uint_type); + temp_dst = st_dst_reg(temp_reg); + temp_dst.writemask = 1; + + deref_arr->array_index->accept(this); + if (*array_elements != 1) + emit_asm(NULL, TGSI_OPCODE_MUL, temp_dst, this->result, st_src_reg_for_int(*array_elements)); + else + emit_asm(NULL, TGSI_OPCODE_MOV, temp_dst, this->result); + + if (indirect->file == PROGRAM_UNDEFINED) + *indirect = temp_reg; + else { + temp_dst = st_dst_reg(*indirect); + temp_dst.writemask = 1; + emit_asm(NULL, TGSI_OPCODE_ADD, temp_dst, *indirect, temp_reg); + } + } else + *index += array_index->value.u[0] * *array_elements; + + *array_elements *= deref_arr->array->type->length; + + calc_deref_offsets(head, deref_arr->array->as_dereference(), array_elements, base, index, indirect, location); + break; + } + default: + break; + } +} + +void +glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir, + unsigned *array_size, + unsigned *base, + unsigned *index, + st_src_reg *reladdr) +{ + GLuint shader = _mesa_program_enum_to_shader_stage(this->prog->Target); + unsigned location = 0; + ir_variable *var = ir->variable_referenced(); + + memset(reladdr, 0, sizeof(*reladdr)); + reladdr->file = PROGRAM_UNDEFINED; + + *base = 0; + *array_size = 1; + + assert(var); + location = var->data.location; + calc_deref_offsets(ir, ir, array_size, base, index, reladdr, &location); + + /* + * If we end up with no indirect then adjust the base to the index, + * and set the array size to 1. + */ + if (reladdr->file == PROGRAM_UNDEFINED) { + *base = *index; + *array_size = 1; + } + + if (location != 0xffffffff) { + *base += this->shader_program->UniformStorage[location].opaque[shader].index; + *index += this->shader_program->UniformStorage[location].opaque[shader].index; + } +} + void glsl_to_tgsi_visitor::visit(ir_texture *ir) { st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy; st_src_reg offset[MAX_GLSL_TEXTURE_OFFSET], sample_index, component; - st_src_reg levels_src; + st_src_reg levels_src, reladdr; st_dst_reg result_dst, coord_dst, cube_sc_dst; glsl_to_tgsi_instruction *inst = NULL; unsigned opcode = TGSI_OPCODE_NOP; const glsl_type *sampler_type = ir->sampler->type; - ir_rvalue *sampler_index = - _mesa_get_sampler_array_nonconst_index(ir->sampler); + unsigned sampler_array_size = 1, sampler_index = 0, sampler_base = 0; bool is_cube_array = false; unsigned i; @@ -3174,7 +3879,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) ir->coordinate->accept(this); /* Put our coords in a temp. We'll need to modify them for shadow, - * projection, or LOD, so the only case we'd use it as is is if + * projection, or LOD, so the only case we'd use it as-is is if * we're doing plain old texturing. The optimization passes on * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. */ @@ -3382,10 +4087,10 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) coord_dst.writemask = WRITEMASK_XYZW; } - if (sampler_index) { - sampler_index->accept(this); - emit_arl(ir, sampler_reladdr, this->result); - } + get_deref_offsets(ir->sampler, &sampler_array_size, &sampler_base, + &sampler_index, &reladdr); + if (reladdr.file != PROGRAM_UNDEFINED) + emit_arl(ir, sampler_reladdr, reladdr); if (opcode == TGSI_OPCODE_TXD) inst = emit_asm(ir, opcode, result_dst, coord, dx, dy); @@ -3418,16 +4123,13 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) if (ir->shadow_comparitor) inst->tex_shadow = GL_TRUE; - inst->sampler.index = _mesa_get_sampler_uniform_value(ir->sampler, - this->shader_program, - this->prog); - if (sampler_index) { + inst->sampler.index = sampler_index; + inst->sampler_array_size = sampler_array_size; + inst->sampler_base = sampler_base; + + if (reladdr.file != PROGRAM_UNDEFINED) { inst->sampler.reladdr = ralloc(mem_ctx, st_src_reg); - memcpy(inst->sampler.reladdr, &sampler_reladdr, sizeof(sampler_reladdr)); - inst->sampler_array_size = - ir->sampler->as_dereference_array()->array->type->array_size(); - } else { - inst->sampler_array_size = 1; + memcpy(inst->sampler.reladdr, &reladdr, sizeof(reladdr)); } if (ir->offset) { @@ -3576,6 +4278,8 @@ glsl_to_tgsi_visitor::visit(ir_barrier *ir) glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() { + STATIC_ASSERT(sizeof(samplers_used) * 8 >= PIPE_MAX_SAMPLERS); + result.file = PROGRAM_UNDEFINED; next_temp = 1; array_sizes = NULL; @@ -3588,6 +4292,8 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() current_function = NULL; num_address_regs = 0; samplers_used = 0; + buffers_used = 0; + images_used = 0; indirect_addr_consts = false; wpos_transform_const = -1; glsl_version = 0; @@ -3600,6 +4306,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() options = NULL; have_sqrt = false; have_fma = false; + use_shared_memory = false; } glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() @@ -3622,12 +4329,14 @@ static void count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) { v->samplers_used = 0; + v->buffers_used = 0; + v->images_used = 0; foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) { if (inst->info->is_tex) { for (int i = 0; i < inst->sampler_array_size; i++) { - unsigned idx = inst->sampler.index + i; - v->samplers_used |= 1 << idx; + unsigned idx = inst->sampler_base + i; + v->samplers_used |= 1u << idx; debug_assert(idx < (int)ARRAY_SIZE(v->sampler_types)); v->sampler_types[idx] = inst->tex_type; @@ -3639,6 +4348,24 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) } } } + if (inst->buffer.file != PROGRAM_UNDEFINED && ( + is_resource_instruction(inst->op) || + inst->op == TGSI_OPCODE_STORE)) { + if (inst->buffer.file == PROGRAM_BUFFER) { + v->buffers_used |= 1 << inst->buffer.index; + } else if (inst->buffer.file == PROGRAM_MEMORY) { + v->use_shared_memory = true; + } else { + assert(inst->buffer.file == PROGRAM_IMAGE); + for (int i = 0; i < inst->sampler_array_size; i++) { + unsigned idx = inst->sampler_base + i; + v->images_used |= 1 << idx; + v->image_targets[idx] = + st_translate_texture_target(inst->tex_target, false); + v->image_formats[idx] = inst->image_format; + } + } + } } prog->SamplersUsed = v->samplers_used; @@ -3745,6 +4472,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void) && inst->dst[0].writemask == get_src_arg_mask(inst->dst[0], inst->src[2])) { inst->op = TGSI_OPCODE_MOV; + inst->info = tgsi_get_opcode_info(inst->op); inst->src[0] = inst->src[1]; } } @@ -3827,9 +4555,11 @@ glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int * last_reads[inst->src[j].index] = (depth == 0) ? i : -2; } for (j = 0; j < num_inst_dst_regs(inst); j++) { - if (inst->dst[j].file == PROGRAM_TEMPORARY) + if (inst->dst[j].file == PROGRAM_TEMPORARY) { if (first_writes[inst->dst[j].index] == -1) first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start; + last_reads[inst->dst[j].index] = (depth == 0) ? i : -2; + } } for (j = 0; j < inst->tex_offset_num_offset; j++) { if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) @@ -4234,12 +4964,16 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void) foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) { if (!inst->dead_mask || !inst->dst[0].writemask) continue; - else if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) { + /* No amount of dead masks should remove memory stores */ + if (inst->info->is_store) + continue; + + if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) { inst->remove(); delete inst; removed++; } else { - if (inst->dst[0].type == GLSL_TYPE_DOUBLE) { + if (glsl_base_type_is_64bit(inst->dst[0].type)) { if (inst->dead_mask == WRITEMASK_XY || inst->dead_mask == WRITEMASK_ZW) inst->dst[0].writemask &= ~(inst->dead_mask); @@ -4343,6 +5077,7 @@ glsl_to_tgsi_visitor::merge_registers(void) /* Update the first_writes and last_reads arrays with the new * values for the merged register index, and mark the newly unused * register index as such. */ + assert(last_reads[j] >= last_reads[i]); last_reads[i] = last_reads[j]; first_writes[j] = -1; last_reads[j] = -1; @@ -4412,7 +5147,10 @@ struct st_translate { struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; struct ureg_dst address[3]; struct ureg_src samplers[PIPE_MAX_SAMPLERS]; + struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS]; + struct ureg_src images[PIPE_MAX_SHADER_IMAGES]; struct ureg_src systemValues[SYSTEM_VALUE_MAX]; + struct ureg_src shared_memory; struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; unsigned *array_sizes; struct array_decl *input_arrays; @@ -4437,43 +5175,78 @@ struct st_translate { unsigned insn_size; unsigned insn_count; - unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ + unsigned procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */ boolean error; }; /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ -const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { - /* Vertex shader - */ - TGSI_SEMANTIC_VERTEXID, - TGSI_SEMANTIC_INSTANCEID, - TGSI_SEMANTIC_VERTEXID_NOBASE, - TGSI_SEMANTIC_BASEVERTEX, - TGSI_SEMANTIC_BASEINSTANCE, - TGSI_SEMANTIC_DRAWID, - - /* Geometry shader - */ - TGSI_SEMANTIC_INVOCATIONID, +unsigned +_mesa_sysval_to_semantic(unsigned sysval) +{ + switch (sysval) { + /* Vertex shader */ + case SYSTEM_VALUE_VERTEX_ID: + return TGSI_SEMANTIC_VERTEXID; + case SYSTEM_VALUE_INSTANCE_ID: + return TGSI_SEMANTIC_INSTANCEID; + case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: + return TGSI_SEMANTIC_VERTEXID_NOBASE; + case SYSTEM_VALUE_BASE_VERTEX: + return TGSI_SEMANTIC_BASEVERTEX; + case SYSTEM_VALUE_BASE_INSTANCE: + return TGSI_SEMANTIC_BASEINSTANCE; + case SYSTEM_VALUE_DRAW_ID: + return TGSI_SEMANTIC_DRAWID; + + /* Geometry shader */ + case SYSTEM_VALUE_INVOCATION_ID: + return TGSI_SEMANTIC_INVOCATIONID; + + /* Fragment shader */ + case SYSTEM_VALUE_FRAG_COORD: + return TGSI_SEMANTIC_POSITION; + case SYSTEM_VALUE_FRONT_FACE: + return TGSI_SEMANTIC_FACE; + case SYSTEM_VALUE_SAMPLE_ID: + return TGSI_SEMANTIC_SAMPLEID; + case SYSTEM_VALUE_SAMPLE_POS: + return TGSI_SEMANTIC_SAMPLEPOS; + case SYSTEM_VALUE_SAMPLE_MASK_IN: + return TGSI_SEMANTIC_SAMPLEMASK; + case SYSTEM_VALUE_HELPER_INVOCATION: + return TGSI_SEMANTIC_HELPER_INVOCATION; + + /* Tessellation shader */ + case SYSTEM_VALUE_TESS_COORD: + return TGSI_SEMANTIC_TESSCOORD; + case SYSTEM_VALUE_VERTICES_IN: + return TGSI_SEMANTIC_VERTICESIN; + case SYSTEM_VALUE_PRIMITIVE_ID: + return TGSI_SEMANTIC_PRIMID; + case SYSTEM_VALUE_TESS_LEVEL_OUTER: + return TGSI_SEMANTIC_TESSOUTER; + case SYSTEM_VALUE_TESS_LEVEL_INNER: + return TGSI_SEMANTIC_TESSINNER; + + /* Compute shader */ + case SYSTEM_VALUE_LOCAL_INVOCATION_ID: + return TGSI_SEMANTIC_THREAD_ID; + case SYSTEM_VALUE_WORK_GROUP_ID: + return TGSI_SEMANTIC_BLOCK_ID; + case SYSTEM_VALUE_NUM_WORK_GROUPS: + return TGSI_SEMANTIC_GRID_SIZE; + + /* Unhandled */ + case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: + case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: + case SYSTEM_VALUE_VERTEX_CNT: + default: + assert(!"Unexpected SYSTEM_VALUE_ enum"); + return TGSI_SEMANTIC_COUNT; + } +} - /* Fragment shader - */ - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_FACE, - TGSI_SEMANTIC_SAMPLEID, - TGSI_SEMANTIC_SAMPLEPOS, - TGSI_SEMANTIC_SAMPLEMASK, - TGSI_SEMANTIC_HELPER_INVOCATION, - - /* Tessellation shaders - */ - TGSI_SEMANTIC_TESSCOORD, - TGSI_SEMANTIC_VERTICESIN, - TGSI_SEMANTIC_PRIMID, - TGSI_SEMANTIC_TESSOUTER, - TGSI_SEMANTIC_TESSINNER, -}; /** * Make note of a branch to a label in the TGSI code. @@ -4564,7 +5337,7 @@ dst_register(struct st_translate *t, gl_register_file file, unsigned index, case PROGRAM_TEMPORARY: /* Allocate space for temporaries on demand. */ if (index >= t->temps_size) { - const int inc = 4096; + const int inc = align(index - t->temps_size + 1, 4096); t->temps = (struct ureg_dst*) realloc(t->temps, @@ -4595,10 +5368,10 @@ dst_register(struct st_translate *t, gl_register_file file, unsigned index, case PROGRAM_OUTPUT: if (!array_id) { - if (t->procType == TGSI_PROCESSOR_FRAGMENT) + if (t->procType == PIPE_SHADER_FRAGMENT) assert(index < FRAG_RESULT_MAX); - else if (t->procType == TGSI_PROCESSOR_TESS_CTRL || - t->procType == TGSI_PROCESSOR_TESS_EVAL) + else if (t->procType == PIPE_SHADER_TESS_CTRL || + t->procType == PIPE_SHADER_TESS_EVAL) assert(index < VARYING_SLOT_TESS_MAX); else assert(index < VARYING_SLOT_MAX); @@ -4785,6 +5558,15 @@ translate_tex_offset(struct st_translate *t, offset.SwizzleZ = imm_src.SwizzleZ; offset.Padding = 0; break; + case PROGRAM_INPUT: + imm_src = t->inputs[t->inputMapping[in_offset->index]]; + offset.File = imm_src.File; + offset.Index = imm_src.Index; + offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0); + offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1); + offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2); + offset.Padding = 0; + break; case PROGRAM_TEMPORARY: imm_src = ureg_src(t->temps[in_offset->index]); offset.File = imm_src.File; @@ -4819,14 +5601,14 @@ compile_tgsi_instruction(struct st_translate *t, const glsl_to_tgsi_instruction *inst) { struct ureg_program *ureg = t->ureg; - GLuint i; + int i; struct ureg_dst dst[2]; struct ureg_src src[4]; struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET]; - unsigned num_dst; - unsigned num_src; - unsigned tex_target; + int num_dst; + int num_src; + unsigned tex_target = 0; num_dst = num_inst_dst_regs(inst); num_src = num_inst_src_regs(inst); @@ -4873,7 +5655,7 @@ compile_tgsi_instruction(struct st_translate *t, src[num_src] = ureg_src_indirect(src[num_src], ureg_src(t->address[2])); num_src++; - for (i = 0; i < inst->tex_offset_num_offset; i++) { + for (i = 0; i < (int)inst->tex_offset_num_offset; i++) { texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i); } tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); @@ -4886,6 +5668,55 @@ compile_tgsi_instruction(struct st_translate *t, src, num_src); return; + case TGSI_OPCODE_RESQ: + case TGSI_OPCODE_LOAD: + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMXCHG: + case TGSI_OPCODE_ATOMCAS: + case TGSI_OPCODE_ATOMAND: + case TGSI_OPCODE_ATOMOR: + case TGSI_OPCODE_ATOMXOR: + case TGSI_OPCODE_ATOMUMIN: + case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_ATOMIMIN: + case TGSI_OPCODE_ATOMIMAX: + for (i = num_src - 1; i >= 0; i--) + src[i + 1] = src[i]; + num_src++; + if (inst->buffer.file == PROGRAM_MEMORY) { + src[0] = t->shared_memory; + } else if (inst->buffer.file == PROGRAM_BUFFER) { + src[0] = t->buffers[inst->buffer.index]; + } else { + src[0] = t->images[inst->buffer.index]; + tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); + } + if (inst->buffer.reladdr) + src[0] = ureg_src_indirect(src[0], ureg_src(t->address[2])); + assert(src[0].File != TGSI_FILE_NULL); + ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src, + inst->buffer_access, + tex_target, inst->image_format); + break; + + case TGSI_OPCODE_STORE: + if (inst->buffer.file == PROGRAM_MEMORY) { + dst[0] = ureg_dst(t->shared_memory); + } else if (inst->buffer.file == PROGRAM_BUFFER) { + dst[0] = ureg_dst(t->buffers[inst->buffer.index]); + } else { + dst[0] = ureg_dst(t->images[inst->buffer.index]); + tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); + } + dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask); + if (inst->buffer.reladdr) + dst[0] = ureg_dst_indirect(dst[0], ureg_src(t->address[2])); + assert(dst[0].File != TGSI_FILE_NULL); + ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src, + inst->buffer_access, + tex_target, inst->image_format); + break; + case TGSI_OPCODE_SCS: dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); @@ -4906,10 +5737,11 @@ compile_tgsi_instruction(struct st_translate *t, * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). */ static void -emit_wpos_adjustment( struct st_translate *t, - int wpos_transform_const, - boolean invert, - GLfloat adjX, GLfloat adjY[2]) +emit_wpos_adjustment(struct gl_context *ctx, + struct st_translate *t, + int wpos_transform_const, + boolean invert, + GLfloat adjX, GLfloat adjY[2]) { struct ureg_program *ureg = t->ureg; @@ -4921,7 +5753,11 @@ emit_wpos_adjustment( struct st_translate *t, */ struct ureg_src wpostrans = ureg_DECL_constant(ureg, wpos_transform_const); struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); - struct ureg_src wpos_input = t->inputs[t->inputMapping[VARYING_SLOT_POS]]; + struct ureg_src *wpos = + ctx->Const.GLSLFragCoordIsSysVal ? + &t->systemValues[SYSTEM_VALUE_FRAG_COORD] : + &t->inputs[t->inputMapping[VARYING_SLOT_POS]]; + struct ureg_src wpos_input = *wpos; /* First, apply the coordinate shift: */ if (adjX || adjY[0] || adjY[1]) { @@ -4972,7 +5808,7 @@ emit_wpos_adjustment( struct st_translate *t, /* Use wpos_temp as position input from here on: */ - t->inputs[t->inputMapping[VARYING_SLOT_POS]] = ureg_src(wpos_temp); + *wpos = ureg_src(wpos_temp); } @@ -4998,7 +5834,7 @@ emit_wpos(struct st_context *st, * * The bias of the y-coordinate depends on whether y-inversion takes place * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are - * drawing to an FBO (causes additional inversion), and whether the the pipe + * drawing to an FBO (causes additional inversion), and whether the pipe * driver origin and the requested origin differ (the latter condition is * stored in the 'invert' variable). * @@ -5081,7 +5917,7 @@ emit_wpos(struct st_context *st, /* we invert after adjustment so that we avoid the MOV to temporary, * and reuse the adjustment ADD instead */ - emit_wpos_adjustment(t, wpos_transform_const, invert, adjX, adjY); + emit_wpos_adjustment(st->ctx, t, wpos_transform_const, invert, adjX, adjY); } /** @@ -5128,6 +5964,20 @@ find_array(unsigned attr, struct array_decl *arrays, unsigned count, return false; } +static void +emit_compute_block_size(const struct gl_program *program, + struct ureg_program *ureg) { + const struct gl_compute_program *cp = + (const struct gl_compute_program *)program; + + ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, + cp->LocalSize[0]); + ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, + cp->LocalSize[1]); + ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, + cp->LocalSize[2]); +} + /** * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. * \param program the program to translate @@ -5170,34 +6020,13 @@ st_translate_program( { struct st_translate *t; unsigned i; + struct gl_program_constants *frag_const = + &ctx->Const.Program[MESA_SHADER_FRAGMENT]; enum pipe_error ret = PIPE_OK; assert(numInputs <= ARRAY_SIZE(t->inputs)); assert(numOutputs <= ARRAY_SIZE(t->outputs)); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_FRONT_FACE] == - TGSI_SEMANTIC_FACE); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_VERTEX_ID] == - TGSI_SEMANTIC_VERTEXID); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_INSTANCE_ID] == - TGSI_SEMANTIC_INSTANCEID); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_ID] == - TGSI_SEMANTIC_SAMPLEID); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_POS] == - TGSI_SEMANTIC_SAMPLEPOS); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_MASK_IN] == - TGSI_SEMANTIC_SAMPLEMASK); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_INVOCATION_ID] == - TGSI_SEMANTIC_INVOCATIONID); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE] == - TGSI_SEMANTIC_VERTEXID_NOBASE); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_BASE_VERTEX] == - TGSI_SEMANTIC_BASEVERTEX); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_TESS_COORD] == - TGSI_SEMANTIC_TESSCOORD); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_HELPER_INVOCATION] == - TGSI_SEMANTIC_HELPER_INVOCATION); - t = CALLOC_STRUCT(st_translate); if (!t) { ret = PIPE_ERROR_OUT_OF_MEMORY; @@ -5217,7 +6046,7 @@ st_translate_program( * Declare input attributes. */ switch (procType) { - case TGSI_PROCESSOR_FRAGMENT: + case PIPE_SHADER_FRAGMENT: for (i = 0; i < numInputs; i++) { unsigned array_id = 0; unsigned array_size; @@ -5238,9 +6067,9 @@ st_translate_program( } } break; - case TGSI_PROCESSOR_GEOMETRY: - case TGSI_PROCESSOR_TESS_EVAL: - case TGSI_PROCESSOR_TESS_CTRL: + case PIPE_SHADER_GEOMETRY: + case PIPE_SHADER_TESS_EVAL: + case PIPE_SHADER_TESS_CTRL: for (i = 0; i < numInputs; i++) { unsigned array_id = 0; unsigned array_size; @@ -5259,11 +6088,13 @@ st_translate_program( } } break; - case TGSI_PROCESSOR_VERTEX: + case PIPE_SHADER_VERTEX: for (i = 0; i < numInputs; i++) { t->inputs[i] = ureg_DECL_vs_input(ureg, i); } break; + case PIPE_SHADER_COMPUTE: + break; default: assert(0); } @@ -5272,12 +6103,13 @@ st_translate_program( * Declare output attributes. */ switch (procType) { - case TGSI_PROCESSOR_FRAGMENT: + case PIPE_SHADER_FRAGMENT: + case PIPE_SHADER_COMPUTE: break; - case TGSI_PROCESSOR_GEOMETRY: - case TGSI_PROCESSOR_TESS_EVAL: - case TGSI_PROCESSOR_TESS_CTRL: - case TGSI_PROCESSOR_VERTEX: + case PIPE_SHADER_GEOMETRY: + case PIPE_SHADER_TESS_EVAL: + case PIPE_SHADER_TESS_CTRL: + case PIPE_SHADER_VERTEX: for (i = 0; i < numOutputs; i++) { unsigned array_id = 0; unsigned array_size; @@ -5302,7 +6134,10 @@ st_translate_program( assert(0); } - if (procType == TGSI_PROCESSOR_FRAGMENT) { + if (procType == PIPE_SHADER_FRAGMENT) { + if (program->shader->info.EarlyFragmentTests) + ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1); + if (proginfo->InputsRead & VARYING_BIT_POS) { /* Must do this after setting up t->inputs. */ emit_wpos(st_context(ctx), t, proginfo, ureg, @@ -5347,7 +6182,7 @@ st_translate_program( } } } - else if (procType == TGSI_PROCESSOR_VERTEX) { + else if (procType == PIPE_SHADER_VERTEX) { for (i = 0; i < numOutputs; i++) { if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) { /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */ @@ -5359,6 +6194,10 @@ st_translate_program( } } + if (procType == PIPE_SHADER_COMPUTE) { + emit_compute_block_size(proginfo, ureg); + } + /* Declare address register. */ if (program->num_address_regs > 0) { @@ -5374,7 +6213,7 @@ st_translate_program( for (i = 0; sysInputs; i++) { if (sysInputs & (1 << i)) { - unsigned semName = _mesa_sysval_to_semantic[i]; + unsigned semName = _mesa_sysval_to_semantic(i); t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0); @@ -5389,7 +6228,7 @@ st_translate_program( */ struct st_context *st = st_context(ctx); struct pipe_screen *pscreen = st->pipe->screen; - assert(procType == TGSI_PROCESSOR_VERTEX); + assert(procType == PIPE_SHADER_VERTEX); assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS)); (void) pscreen; if (!ctx->Const.NativeIntegers) { @@ -5399,6 +6238,11 @@ st_translate_program( } } + if (procType == PIPE_SHADER_FRAGMENT && + semName == TGSI_SEMANTIC_POSITION) + emit_wpos(st_context(ctx), t, proginfo, ureg, + program->wpos_transform_const); + sysInputs &= ~(1 << i); } } @@ -5480,8 +6324,8 @@ st_translate_program( assert(i == program->num_immediates); /* texture samplers */ - for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) { - if (program->samplers_used & (1 << i)) { + for (i = 0; i < frag_const->MaxTextureImageUnits; i++) { + if (program->samplers_used & (1u << i)) { unsigned type; t->samplers[i] = ureg_DECL_sampler(ureg, i); @@ -5505,6 +6349,31 @@ st_translate_program( } } + for (i = 0; i < frag_const->MaxAtomicBuffers; i++) { + if (program->buffers_used & (1 << i)) { + t->buffers[i] = ureg_DECL_buffer(ureg, i, true); + } + } + + for (; i < frag_const->MaxAtomicBuffers + frag_const->MaxShaderStorageBlocks; + i++) { + if (program->buffers_used & (1 << i)) { + t->buffers[i] = ureg_DECL_buffer(ureg, i, false); + } + } + + if (program->use_shared_memory) + t->shared_memory = ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED); + + for (i = 0; i < program->shader->NumImages; i++) { + if (program->images_used & (1 << i)) { + t->images[i] = ureg_DECL_image(ureg, i, + program->image_targets[i], + program->image_formats[i], + true, false); + } + } + /* Emit each instruction in turn: */ foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) { @@ -5519,6 +6388,42 @@ st_translate_program( t->insn[t->labels[i].branch_target]); } + /* Set the next shader stage hint for VS and TES. */ + switch (procType) { + case PIPE_SHADER_VERTEX: + case PIPE_SHADER_TESS_EVAL: + if (program->shader_program->SeparateShader) + break; + + for (i = program->shader->Stage+1; i <= MESA_SHADER_FRAGMENT; i++) { + if (program->shader_program->_LinkedShaders[i]) { + unsigned next; + + switch (i) { + case MESA_SHADER_TESS_CTRL: + next = PIPE_SHADER_TESS_CTRL; + break; + case MESA_SHADER_TESS_EVAL: + next = PIPE_SHADER_TESS_EVAL; + break; + case MESA_SHADER_GEOMETRY: + next = PIPE_SHADER_GEOMETRY; + break; + case MESA_SHADER_FRAGMENT: + next = PIPE_SHADER_FRAGMENT; + break; + default: + assert(0); + continue; + } + + ureg_set_next_shader_processor(ureg, next); + break; + } + } + break; + } + out: if (t) { free(t->arrays); @@ -5547,16 +6452,16 @@ out: * generating Mesa IR. */ static struct gl_program * -get_mesa_program(struct gl_context *ctx, - struct gl_shader_program *shader_program, - struct gl_shader *shader) +get_mesa_program_tgsi(struct gl_context *ctx, + struct gl_shader_program *shader_program, + struct gl_linked_shader *shader) { glsl_to_tgsi_visitor* v; struct gl_program *prog; GLenum target = _mesa_shader_stage_to_program(shader->Stage); bool progress; struct gl_shader_compiler_options *options = - &ctx->Const.ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(shader->Type)]; + &ctx->Const.ShaderCompilerOptions[shader->Stage]; struct pipe_screen *pscreen = ctx->st->pipe->screen; unsigned ptarget = st_shader_stage_to_ptarget(shader->Stage); @@ -5651,8 +6556,8 @@ get_mesa_program(struct gl_context *ctx, /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ v->simplify_cmp(); - if (shader->Type != GL_TESS_CONTROL_SHADER && - shader->Type != GL_TESS_EVALUATION_SHADER) + if (shader->Stage != MESA_SHADER_TESS_CTRL && + shader->Stage != MESA_SHADER_TESS_EVAL) v->copy_propagate(); while (v->eliminate_dead_code()); @@ -5683,9 +6588,14 @@ get_mesa_program(struct gl_context *ctx, prog->OutputsWritten, 0ULL, prog->PatchOutputsWritten); count_resources(v, prog); + /* The GLSL IR won't be needed anymore. */ + ralloc_free(shader->ir); + shader->ir = NULL; + /* This must be done before the uniform storage is associated. */ - if (shader->Type == GL_FRAGMENT_SHADER && - prog->InputsRead & VARYING_BIT_POS){ + if (shader->Stage == MESA_SHADER_FRAGMENT && + (prog->InputsRead & VARYING_BIT_POS || + prog->SystemValuesRead & (1 << SYSTEM_VALUE_FRAG_COORD))) { static const gl_state_index wposTransformState[STATE_LENGTH] = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM }; @@ -5717,28 +6627,33 @@ get_mesa_program(struct gl_context *ctx, struct st_geometry_program *stgp; struct st_tessctrl_program *sttcp; struct st_tesseval_program *sttep; + struct st_compute_program *stcp; - switch (shader->Type) { - case GL_VERTEX_SHADER: + switch (shader->Stage) { + case MESA_SHADER_VERTEX: stvp = (struct st_vertex_program *)prog; stvp->glsl_to_tgsi = v; break; - case GL_FRAGMENT_SHADER: + case MESA_SHADER_FRAGMENT: stfp = (struct st_fragment_program *)prog; stfp->glsl_to_tgsi = v; break; - case GL_GEOMETRY_SHADER: + case MESA_SHADER_GEOMETRY: stgp = (struct st_geometry_program *)prog; stgp->glsl_to_tgsi = v; break; - case GL_TESS_CONTROL_SHADER: + case MESA_SHADER_TESS_CTRL: sttcp = (struct st_tessctrl_program *)prog; sttcp->glsl_to_tgsi = v; break; - case GL_TESS_EVALUATION_SHADER: + case MESA_SHADER_TESS_EVAL: sttep = (struct st_tesseval_program *)prog; sttep->glsl_to_tgsi = v; break; + case MESA_SHADER_COMPUTE: + stcp = (struct st_compute_program *)prog; + stcp->glsl_to_tgsi = v; + break; default: assert(!"should not be reached"); return NULL; @@ -5747,73 +6662,31 @@ get_mesa_program(struct gl_context *ctx, return prog; } -extern "C" { - -static void -st_dump_program_for_shader_db(struct gl_context *ctx, - struct gl_shader_program *prog) +static struct gl_program * +get_mesa_program(struct gl_context *ctx, + struct gl_shader_program *shader_program, + struct gl_linked_shader *shader) { - /* Dump only successfully compiled and linked shaders to the specified - * file. This is for shader-db. - * - * These options allow some pre-processing of shaders while dumping, - * because some apps have ill-formed shaders. - */ - const char *dump_filename = os_get_option("ST_DUMP_SHADERS"); - const char *insert_directives = os_get_option("ST_DUMP_INSERT"); - - if (dump_filename && prog->Name != 0) { - FILE *f = fopen(dump_filename, "a"); - - if (f) { - for (unsigned i = 0; i < prog->NumShaders; i++) { - const struct gl_shader *sh = prog->Shaders[i]; - const char *source; - bool skip_version = false; - - if (!sh) - continue; - - source = sh->Source; - - /* This string mustn't be changed. shader-db uses it to find - * where the shader begins. - */ - fprintf(f, "GLSL %s shader %d source for linked program %d:\n", - _mesa_shader_stage_to_string(sh->Stage), - i, prog->Name); - - /* Dump the forced version if set. */ - if (ctx->Const.ForceGLSLVersion) { - fprintf(f, "#version %i\n", ctx->Const.ForceGLSLVersion); - skip_version = true; - } - - /* Insert directives (optional). */ - if (insert_directives) { - if (!ctx->Const.ForceGLSLVersion && prog->Version) - fprintf(f, "#version %i\n", prog->Version); - fprintf(f, "%s\n", insert_directives); - skip_version = true; - } - - if (skip_version && strncmp(source, "#version ", 9) == 0) { - const char *next_line = strstr(source, "\n"); - - if (next_line) - source = next_line + 1; - else - continue; - } - - fprintf(f, "%s", source); - fprintf(f, "\n"); - } - fclose(f); + struct pipe_screen *pscreen = ctx->st->pipe->screen; + unsigned ptarget = st_shader_stage_to_ptarget(shader->Stage); + enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir) + pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_PREFERRED_IR); + if (preferred_ir == PIPE_SHADER_IR_NIR) { + /* TODO only for GLSL VS/FS for now: */ + switch (shader->Stage) { + case MESA_SHADER_VERTEX: + case MESA_SHADER_FRAGMENT: + return st_nir_get_mesa_program(ctx, shader_program, shader); + default: + break; } } + return get_mesa_program_tgsi(ctx, shader_program, shader); } + +extern "C" { + /** * Link a shader. * Called via ctx->Driver.LinkShader() @@ -5832,7 +6705,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) bool progress; exec_list *ir = prog->_LinkedShaders[i]->ir; - gl_shader_stage stage = _mesa_shader_enum_to_shader_stage(prog->_LinkedShaders[i]->Type); + gl_shader_stage stage = prog->_LinkedShaders[i]->Stage; const struct gl_shader_compiler_options *options = &ctx->Const.ShaderCompilerOptions[stage]; unsigned ptarget = st_shader_stage_to_ptarget(stage); @@ -5888,7 +6761,21 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) (have_dround ? 0 : DOPS_TO_DFRAC) | (options->EmitNoPow ? POW_TO_EXP2 : 0) | (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) | - (options->EmitNoSat ? SAT_TO_CLAMP : 0)); + (options->EmitNoSat ? SAT_TO_CLAMP : 0) | + /* Assume that if ARB_gpu_shader5 is not supported + * then all of the extended integer functions need + * lowering. It may be necessary to add some caps + * for individual instructions. + */ + (!ctx->Extensions.ARB_gpu_shader5 + ? BIT_COUNT_TO_MATH | + EXTRACT_TO_SHIFTS | + INSERT_TO_SHIFTS | + REVERSE_TO_SHIFTS | + FIND_LSB_TO_FLOAT_CAST | + FIND_MSB_TO_FLOAT_CAST | + IMUL_HIGH_TO_MUL + : 0)); do_vec_index_to_cond_assign(ir); lower_vector_insert(ir, true); @@ -5914,6 +6801,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) validate_ir_tree(ir); } + build_program_resource_list(ctx, prog); + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { struct gl_program *linked_prog; @@ -5938,7 +6827,6 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) _mesa_reference_program(ctx, &linked_prog, NULL); } - st_dump_program_for_shader_db(ctx, prog); return GL_TRUE; } @@ -5947,9 +6835,17 @@ st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, const GLuint outputMapping[], struct pipe_stream_output_info *so) { - unsigned i; struct gl_transform_feedback_info *info = &glsl_to_tgsi->shader_program->LinkedTransformFeedback; + st_translate_stream_output_info2(info, outputMapping, so); +} + +void +st_translate_stream_output_info2(struct gl_transform_feedback_info *info, + const GLuint outputMapping[], + struct pipe_stream_output_info *so) +{ + unsigned i; for (i = 0; i < info->NumOutputs; i++) { so->output[i].register_index = @@ -5962,7 +6858,7 @@ st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, } for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { - so->stride[i] = info->BufferStride[i]; + so->stride[i] = info->Buffers[i].Stride; } so->num_outputs = info->NumOutputs; }