X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fstate_tracker%2Fst_glsl_to_tgsi.cpp;h=baa835924bcd4644cbf3396e0f9fbcea61166925;hb=268ba028dcbdaea25a972a460c1636f485d5c5bc;hp=83dadabf65c3d3eeee8b01b88675180e00cb5168;hpb=ce55afc4d6d3d97440856cad117d02c20fb952a9;p=mesa.git diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 83dadabf65c..baa835924bc 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -56,6 +56,7 @@ #include "st_nir.h" #include "st_shader_cache.h" +#include "util/hash_table.h" #include #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \ @@ -86,6 +87,13 @@ static int swizzle_for_type(const glsl_type *type, int component = 0) return swizzle; } +static unsigned is_precise(const ir_variable *ir) +{ + if (!ir) + return 0; + return ir->data.precise || ir->data.invariant; +} + /** * This struct is a corresponding struct to TGSI ureg_src. */ @@ -165,7 +173,7 @@ public: explicit st_src_reg(st_dst_reg reg); - int16_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ + int32_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ int16_t index2D; uint16_t swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ int negate:4; /**< NEGATE_XYZW mask from mesa */ @@ -239,7 +247,7 @@ public: explicit st_dst_reg(st_src_reg reg); - int16_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ + int32_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ int16_t index2D; gl_register_file file:5; /**< PROGRAM_* from Mesa */ unsigned writemask:4; /**< Bitfield of WRITEMASK_[XYZW] */ @@ -288,13 +296,14 @@ public: st_dst_reg dst[2]; st_src_reg src[4]; - st_src_reg resource; /**< sampler or buffer register */ + st_src_reg resource; /**< sampler, image or buffer register */ st_src_reg *tex_offsets; /** Pointer to the ir source this tree came from for debugging */ ir_instruction *ir; unsigned op:8; /**< TGSI opcode */ + unsigned precise:1; unsigned saturate:1; unsigned is_64bit_expanded:1; unsigned sampler_base:5; @@ -310,7 +319,9 @@ public: const struct tgsi_opcode_info *info; }; -class variable_storage : public exec_node { +class variable_storage { + DECLARE_RZALLOC_CXX_OPERATORS(variable_storage) + public: variable_storage(ir_variable *var, gl_register_file file, int index, unsigned array_id = 0) @@ -346,8 +357,8 @@ public: int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ }; -static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); -static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); +static const st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); +static const st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); struct inout_decl { unsigned mesa_index; @@ -388,7 +399,7 @@ find_array_type(struct inout_decl *decls, unsigned count, unsigned array_id) } struct rename_reg_pair { - int old_reg; + bool valid; int new_reg; }; @@ -420,7 +431,6 @@ public: uint32_t samplers_used; glsl_base_type sampler_types[PIPE_MAX_SAMPLERS]; int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */ - int buffers_used; int images_used; int image_targets[PIPE_MAX_SHADER_IMAGES]; unsigned image_formats[PIPE_MAX_SHADER_IMAGES]; @@ -433,6 +443,7 @@ public: bool have_fma; bool use_shared_memory; bool has_tex_txf_lz; + bool precise; variable_storage *find_variable_storage(ir_variable *var); @@ -489,7 +500,7 @@ public: st_src_reg result; /** List of variable_storage */ - exec_list variables; + struct hash_table *variables; /** List of immediate_storage */ exec_list immediates; @@ -557,8 +568,9 @@ public: void simplify_cmp(void); - void rename_temp_registers(int num_renames, struct rename_reg_pair *renames); + void rename_temp_registers(struct rename_reg_pair *renames); void get_first_temp_read(int *first_reads); + void get_first_temp_write(int *first_writes); void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes); void get_last_temp_write(int *last_writes); @@ -688,6 +700,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, STATIC_ASSERT(TGSI_OPCODE_LAST <= 255); inst->op = op; + inst->precise = this->precise; inst->info = tgsi_get_opcode_info(op); inst->dst[0] = dst; inst->dst[1] = dst1; @@ -1307,13 +1320,13 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) variable_storage * glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) { + struct hash_entry *entry; - foreach_in_list(variable_storage, entry, &this->variables) { - if (entry->var == var) - return entry; - } + entry = _mesa_hash_table_search(this->variables, var); + if (!entry) + return NULL; - return NULL; + return (variable_storage *)entry->data; } void @@ -1346,7 +1359,8 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) if (i == ir->get_num_state_slots()) { /* We'll set the index later. */ storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); - this->variables.push_tail(storage); + + _mesa_hash_table_insert(this->variables, ir, storage); dst = undef_dst; } else { @@ -1361,7 +1375,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index, dst.array_id); - this->variables.push_tail(storage); + _mesa_hash_table_insert(this->variables, ir, storage); } @@ -1543,7 +1557,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) */ - if (ir->operation == ir_binop_add) { + if (!this->precise && ir->operation == ir_binop_add) { if (try_emit_mad(ir, 1)) return; if (try_emit_mad(ir, 0)) @@ -2327,6 +2341,10 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) case ir_unop_pack_int_2x32: case ir_unop_unpack_uint_2x32: case ir_unop_pack_uint_2x32: + case ir_unop_unpack_sampler_2x32: + case ir_unop_pack_sampler_2x32: + case ir_unop_unpack_image_2x32: + case ir_unop_pack_image_2x32: emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); break; @@ -2599,7 +2617,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) case ir_var_uniform: entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, var->data.param_index); - this->variables.push_tail(entry); + _mesa_hash_table_insert(this->variables, var, entry); break; case ir_var_shader_in: { /* The linker assigns locations for varyings and attributes, @@ -2646,7 +2664,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) decl->array_id); entry->component = component; - this->variables.push_tail(entry); + _mesa_hash_table_insert(this->variables, var, entry); + break; } case ir_var_shader_out: { @@ -2704,7 +2723,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) } entry->component = component; - this->variables.push_tail(entry); + _mesa_hash_table_insert(this->variables, var, entry); + break; } case ir_var_system_value: @@ -2718,7 +2738,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) entry = new(mem_ctx) variable_storage(var, src.file, src.index, src.array_id); - this->variables.push_tail(entry); + _mesa_hash_table_insert(this->variables, var, entry); break; } @@ -3137,6 +3157,8 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) st_dst_reg l; st_src_reg r; + /* all generated instructions need to be flaged as precise */ + this->precise = is_precise(ir->lhs->variable_referenced()); ir->rhs->accept(this); r = this->result; @@ -3228,6 +3250,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) } else { emit_block_mov(ir, ir->rhs->type, &l, &r, NULL, false); } + this->precise = 0; } @@ -3753,6 +3776,59 @@ glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir) } } +static void +get_image_qualifiers(ir_dereference *ir, const glsl_type **type, + bool *memory_coherent, bool *memory_volatile, + bool *memory_restrict, unsigned *image_format) +{ + + switch (ir->ir_type) { + case ir_type_dereference_record: { + ir_dereference_record *deref_record = ir->as_dereference_record(); + const glsl_type *struct_type = deref_record->record->type; + + for (unsigned i = 0; i < struct_type->length; i++) { + if (!strcmp(struct_type->fields.structure[i].name, + deref_record->field)) { + *type = struct_type->fields.structure[i].type; + *memory_coherent = + struct_type->fields.structure[i].memory_coherent; + *memory_volatile = + struct_type->fields.structure[i].memory_volatile; + *memory_restrict = + struct_type->fields.structure[i].memory_restrict; + *image_format = + struct_type->fields.structure[i].image_format; + break; + } + } + break; + } + + case ir_type_dereference_array: { + ir_dereference_array *deref_arr = ir->as_dereference_array(); + get_image_qualifiers((ir_dereference *)deref_arr->array, type, + memory_coherent, memory_volatile, memory_restrict, + image_format); + break; + } + + case ir_type_dereference_variable: { + ir_variable *var = ir->variable_referenced(); + + *type = var->type->without_array(); + *memory_coherent = var->data.memory_coherent; + *memory_volatile = var->data.memory_volatile; + *memory_restrict = var->data.memory_restrict; + *image_format = var->data.image_format; + break; + } + + default: + break; + } +} + void glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) { @@ -3760,15 +3836,21 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) ir_dereference *img = (ir_dereference *)param; const ir_variable *imgvar = img->variable_referenced(); - const glsl_type *type = imgvar->type->without_array(); unsigned sampler_array_size = 1, sampler_base = 0; + bool memory_coherent = false, memory_volatile = false, memory_restrict = false; + unsigned image_format = 0; + const glsl_type *type = NULL; + + get_image_qualifiers(img, &type, &memory_coherent, &memory_volatile, + &memory_restrict, &image_format); st_src_reg reladdr; st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT); - + uint16_t index = 0; get_deref_offsets(img, &sampler_array_size, &sampler_base, - (uint16_t*)&image.index, &reladdr, true); + &index, &reladdr, !imgvar->contains_bindless()); + image.index = index; if (reladdr.file != PROGRAM_UNDEFINED) { image.reladdr = ralloc(mem_ctx, st_src_reg); *image.reladdr = reladdr; @@ -3880,19 +3962,26 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) inst->dst[0].writemask = WRITEMASK_XYZW; } - inst->resource = image; - inst->sampler_array_size = sampler_array_size; - inst->sampler_base = sampler_base; + if (imgvar->contains_bindless()) { + img->accept(this); + inst->resource = this->result; + inst->resource.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, + SWIZZLE_X, SWIZZLE_Y); + } else { + inst->resource = image; + inst->sampler_array_size = sampler_array_size; + inst->sampler_base = sampler_base; + } inst->tex_target = type->sampler_index(); inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx), - _mesa_get_shader_image_format(imgvar->data.image_format)); + _mesa_get_shader_image_format(image_format)); - if (imgvar->data.image_coherent) + if (memory_coherent) inst->buffer_access |= TGSI_MEMORY_COHERENT; - if (imgvar->data.image_restrict) + if (memory_restrict) inst->buffer_access |= TGSI_MEMORY_RESTRICT; - if (imgvar->data.image_volatile) + if (memory_volatile) inst->buffer_access |= TGSI_MEMORY_VOLATILE; } @@ -3902,6 +3991,8 @@ glsl_to_tgsi_visitor::visit_generic_intrinsic(ir_call *ir, unsigned op) ir->return_deref->accept(this); st_dst_reg dst = st_dst_reg(this->result); + dst.writemask = u_bit_consecutive(0, ir->return_deref->var->type->vector_elements); + st_src_reg src[4] = { undef_src, undef_src, undef_src, undef_src }; unsigned num_src = 0; foreach_in_list(ir_rvalue, param, &ir->actual_parameters) { @@ -3989,15 +4080,9 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) visit_image_intrinsic(ir); return; - case ir_intrinsic_shader_clock: { - ir->return_deref->accept(this); - - st_dst_reg dst = st_dst_reg(this->result); - dst.writemask = TGSI_WRITEMASK_XY; - - emit_asm(ir, TGSI_OPCODE_CLOCK, dst); + case ir_intrinsic_shader_clock: + visit_generic_intrinsic(ir, TGSI_OPCODE_CLOCK); return; - } case ir_intrinsic_vote_all: visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_ALL); @@ -4154,8 +4239,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) unsigned opcode = TGSI_OPCODE_NOP; const glsl_type *sampler_type = ir->sampler->type; unsigned sampler_array_size = 1, sampler_base = 0; - uint16_t sampler_index = 0; bool is_cube_array = false, is_cube_shadow = false; + ir_variable *var = ir->sampler->variable_referenced(); unsigned i; /* if we are a cube array sampler or a cube shadow */ @@ -4385,10 +4470,18 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) coord_dst.writemask = WRITEMASK_XYZW; } + st_src_reg sampler(PROGRAM_SAMPLER, 0, GLSL_TYPE_UINT); + + uint16_t index = 0; get_deref_offsets(ir->sampler, &sampler_array_size, &sampler_base, - &sampler_index, &reladdr, true); - if (reladdr.file != PROGRAM_UNDEFINED) + &index, &reladdr, !var->contains_bindless()); + + sampler.index = index; + if (reladdr.file != PROGRAM_UNDEFINED) { + sampler.reladdr = ralloc(mem_ctx, st_src_reg); + *sampler.reladdr = reladdr; emit_arl(ir, sampler_reladdr, reladdr); + } if (opcode == TGSI_OPCODE_TXD) inst = emit_asm(ir, opcode, result_dst, coord, dx, dy); @@ -4419,13 +4512,15 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) if (ir->shadow_comparator) inst->tex_shadow = GL_TRUE; - inst->resource.index = sampler_index; - inst->sampler_array_size = sampler_array_size; - inst->sampler_base = sampler_base; - - if (reladdr.file != PROGRAM_UNDEFINED) { - inst->resource.reladdr = ralloc(mem_ctx, st_src_reg); - memcpy(inst->resource.reladdr, &reladdr, sizeof(reladdr)); + if (var->contains_bindless()) { + ir->sampler->accept(this); + inst->resource = this->result; + inst->resource.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, + SWIZZLE_X, SWIZZLE_Y); + } else { + inst->resource = sampler; + inst->sampler_array_size = sampler_array_size; + inst->sampler_base = sampler_base; } if (ir->offset) { @@ -4543,7 +4638,6 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() num_immediates = 0; num_address_regs = 0; samplers_used = 0; - buffers_used = 0; images_used = 0; indirect_addr_consts = false; wpos_transform_const = -1; @@ -4559,10 +4653,19 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() have_fma = false; use_shared_memory = false; has_tex_txf_lz = false; + variables = NULL; +} + +static void var_destroy(struct hash_entry *entry) +{ + variable_storage *storage = (variable_storage *)entry->data; + + delete storage; } glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() { + _mesa_hash_table_destroy(variables, var_destroy); free(array_sizes); ralloc_free(mem_ctx); } @@ -4581,7 +4684,6 @@ static void count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) { v->samplers_used = 0; - v->buffers_used = 0; v->images_used = 0; foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) { @@ -4607,12 +4709,9 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) if (inst->resource.file != PROGRAM_UNDEFINED && ( is_resource_instruction(inst->op) || inst->op == TGSI_OPCODE_STORE)) { - if (inst->resource.file == PROGRAM_BUFFER) { - v->buffers_used |= 1 << inst->resource.index; - } else if (inst->resource.file == PROGRAM_MEMORY) { + if (inst->resource.file == PROGRAM_MEMORY) { v->use_shared_memory = true; - } else { - assert(inst->resource.file == PROGRAM_IMAGE); + } else if (inst->resource.file == PROGRAM_IMAGE) { for (int i = 0; i < inst->sampler_array_size; i++) { unsigned idx = inst->sampler_base + i; v->images_used |= 1 << idx; @@ -4687,7 +4786,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void) /* Give up if we encounter relative addressing or flow control. */ if (inst->dst[0].reladdr || inst->dst[0].reladdr2 || inst->dst[1].reladdr || inst->dst[1].reladdr2 || - tgsi_get_opcode_info(inst->op)->is_branch || + inst->info->is_branch || inst->op == TGSI_OPCODE_CONT || inst->op == TGSI_OPCODE_END || inst->op == TGSI_OPCODE_RET) { @@ -4736,31 +4835,65 @@ glsl_to_tgsi_visitor::simplify_cmp(void) /* Replaces all references to a temporary register index with another index. */ void -glsl_to_tgsi_visitor::rename_temp_registers(int num_renames, struct rename_reg_pair *renames) +glsl_to_tgsi_visitor::rename_temp_registers(struct rename_reg_pair *renames) { foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { unsigned j; - int k; for (j = 0; j < num_inst_src_regs(inst); j++) { - if (inst->src[j].file == PROGRAM_TEMPORARY) - for (k = 0; k < num_renames; k++) - if (inst->src[j].index == renames[k].old_reg) - inst->src[j].index = renames[k].new_reg; + if (inst->src[j].file == PROGRAM_TEMPORARY) { + int old_idx = inst->src[j].index; + if (renames[old_idx].valid) + inst->src[j].index = renames[old_idx].new_reg; + } } for (j = 0; j < inst->tex_offset_num_offset; j++) { - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) - for (k = 0; k < num_renames; k++) - if (inst->tex_offsets[j].index == renames[k].old_reg) - inst->tex_offsets[j].index = renames[k].new_reg; + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) { + int old_idx = inst->tex_offsets[j].index; + if (renames[old_idx].valid) + inst->tex_offsets[j].index = renames[old_idx].new_reg; + } + } + + if (inst->resource.file == PROGRAM_TEMPORARY) { + int old_idx = inst->resource.index; + if (renames[old_idx].valid) + inst->resource.index = renames[old_idx].new_reg; } for (j = 0; j < num_inst_dst_regs(inst); j++) { - if (inst->dst[j].file == PROGRAM_TEMPORARY) - for (k = 0; k < num_renames; k++) - if (inst->dst[j].index == renames[k].old_reg) - inst->dst[j].index = renames[k].new_reg; + if (inst->dst[j].file == PROGRAM_TEMPORARY) { + int old_idx = inst->dst[j].index; + if (renames[old_idx].valid) + inst->dst[j].index = renames[old_idx].new_reg;} + } + } +} + +void +glsl_to_tgsi_visitor::get_first_temp_write(int *first_writes) +{ + int depth = 0; /* loop depth */ + int loop_start = -1; /* index of the first active BGNLOOP (if any) */ + unsigned i = 0, j; + + foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { + for (j = 0; j < num_inst_dst_regs(inst); j++) { + if (inst->dst[j].file == PROGRAM_TEMPORARY) { + if (first_writes[inst->dst[j].index] == -1) + first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start; + } + } + + if (inst->op == TGSI_OPCODE_BGNLOOP) { + if(depth++ == 0) + loop_start = i; + } else if (inst->op == TGSI_OPCODE_ENDLOOP) { + if (--depth == 0) + loop_start = -1; } + assert(depth >= 0); + i++; } } @@ -5178,6 +5311,21 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void) } } } + + if (inst->resource.file == PROGRAM_TEMPORARY) { + int src_chans; + + src_chans = 1 << GET_SWZ(inst->resource.swizzle, 0); + src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 1); + src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 2); + src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 3); + + for (int c = 0; c < 4; c++) { + if (src_chans & (1 << c)) + writes[4 * inst->resource.index + c] = NULL; + } + } + break; } @@ -5294,11 +5442,10 @@ glsl_to_tgsi_visitor::merge_two_dsts(void) void glsl_to_tgsi_visitor::merge_registers(void) { - int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); - int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); + int *last_reads = ralloc_array(mem_ctx, int, this->next_temp); + int *first_writes = ralloc_array(mem_ctx, int, this->next_temp); struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp); int i, j; - int num_renames = 0; /* Read the indices of the last read and first write to each temp register * into an array so that we don't have to traverse the instruction list as @@ -5325,9 +5472,8 @@ glsl_to_tgsi_visitor::merge_registers(void) * as the register at index j. */ if (first_writes[i] <= first_writes[j] && last_reads[i] <= first_writes[j]) { - renames[num_renames].old_reg = j; - renames[num_renames].new_reg = i; - num_renames++; + renames[j].new_reg = i; + renames[j].valid = true; /* Update the first_writes and last_reads arrays with the new * values for the merged register index, and mark the newly unused @@ -5340,7 +5486,7 @@ glsl_to_tgsi_visitor::merge_registers(void) } } - rename_temp_registers(num_renames, renames); + rename_temp_registers(renames); ralloc_free(renames); ralloc_free(last_reads); ralloc_free(first_writes); @@ -5353,28 +5499,27 @@ glsl_to_tgsi_visitor::renumber_registers(void) { int i = 0; int new_index = 0; - int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp); + int *first_writes = ralloc_array(mem_ctx, int, this->next_temp); struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp); - int num_renames = 0; + for (i = 0; i < this->next_temp; i++) { - first_reads[i] = -1; + first_writes[i] = -1; } - get_first_temp_read(first_reads); + get_first_temp_write(first_writes); for (i = 0; i < this->next_temp; i++) { - if (first_reads[i] < 0) continue; + if (first_writes[i] < 0) continue; if (i != new_index) { - renames[num_renames].old_reg = i; - renames[num_renames].new_reg = new_index; - num_renames++; + renames[i].new_reg = new_index; + renames[i].valid = true; } new_index++; } - rename_temp_registers(num_renames, renames); + rename_temp_registers(renames); this->next_temp = new_index; ralloc_free(renames); - ralloc_free(first_reads); + ralloc_free(first_writes); } /* ------------------------- TGSI conversion stuff -------------------------- */ @@ -5408,8 +5553,8 @@ struct st_translate { struct inout_decl *output_decls; unsigned num_output_decls; - const GLuint *inputMapping; - const GLuint *outputMapping; + const ubyte *inputMapping; + const ubyte *outputMapping; unsigned procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */ }; @@ -5816,7 +5961,7 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_IF: case TGSI_OPCODE_UIF: assert(num_dst == 0); - ureg_insn(ureg, inst->op, NULL, 0, src, num_src); + ureg_insn(ureg, inst->op, NULL, 0, src, num_src, inst->precise); return; case TGSI_OPCODE_TEX: @@ -5834,7 +5979,12 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_TXL2: case TGSI_OPCODE_TG4: case TGSI_OPCODE_LODQ: - src[num_src] = t->samplers[inst->resource.index]; + if (inst->resource.file == PROGRAM_SAMPLER) { + src[num_src] = t->samplers[inst->resource.index]; + } else { + /* Bindless samplers. */ + src[num_src] = translate_src(t, &inst->resource); + } assert(src[num_src].File != TGSI_FILE_NULL); if (inst->resource.reladdr) src[num_src] = @@ -5849,6 +5999,7 @@ compile_tgsi_instruction(struct st_translate *t, inst->op, dst, num_dst, tex_target, + st_translate_texture_type(inst->tex_type), texoffsets, inst->tex_offset_num_offset, src, num_src); return; @@ -5873,7 +6024,12 @@ compile_tgsi_instruction(struct st_translate *t, } else if (inst->resource.file == PROGRAM_BUFFER) { src[0] = t->buffers[inst->resource.index]; } else { - src[0] = t->images[inst->resource.index]; + if (inst->resource.file == PROGRAM_IMAGE) { + src[0] = t->images[inst->resource.index]; + } else { + /* Bindless images. */ + src[0] = translate_src(t, &inst->resource); + } tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); } if (inst->resource.reladdr) @@ -5890,7 +6046,12 @@ compile_tgsi_instruction(struct st_translate *t, } else if (inst->resource.file == PROGRAM_BUFFER) { dst[0] = ureg_dst(t->buffers[inst->resource.index]); } else { - dst[0] = ureg_dst(t->images[inst->resource.index]); + if (inst->resource.file == PROGRAM_IMAGE) { + dst[0] = ureg_dst(t->images[inst->resource.index]); + } else { + /* Bindless images. */ + dst[0] = ureg_dst(translate_src(t, &inst->resource)); + } tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); } dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask); @@ -5904,14 +6065,14 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_SCS: dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); - ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); + ureg_insn(ureg, inst->op, dst, num_dst, src, num_src, inst->precise); break; default: ureg_insn(ureg, inst->op, dst, num_dst, - src, num_src); + src, num_src, inst->precise); break; } } @@ -6144,7 +6305,7 @@ struct sort_inout_decls { return mapping[a.mesa_index] < mapping[b.mesa_index]; } - const GLuint *mapping; + const ubyte *mapping; }; /* Sort the given array of decls by the corresponding slot (TGSI file index). @@ -6155,7 +6316,7 @@ struct sort_inout_decls { static void sort_inout_decls_by_slot(struct inout_decl *decls, unsigned count, - const GLuint mapping[]) + const ubyte mapping[]) { sort_inout_decls sorter; sorter.mapping = mapping; @@ -6209,14 +6370,13 @@ st_translate_program( glsl_to_tgsi_visitor *program, const struct gl_program *proginfo, GLuint numInputs, - const GLuint inputMapping[], - const GLuint inputSlotToAttr[], + const ubyte inputMapping[], + const ubyte inputSlotToAttr[], const ubyte inputSemanticName[], const ubyte inputSemanticIndex[], - const GLuint interpMode[], + const ubyte interpMode[], GLuint numOutputs, - const GLuint outputMapping[], - const GLuint outputSlotToAttr[], + const ubyte outputMapping[], const ubyte outputSemanticName[], const ubyte outputSemanticIndex[]) { @@ -6354,9 +6514,14 @@ st_translate_program( } if (procType == PIPE_SHADER_FRAGMENT) { - if (program->shader->Program->info.fs.early_fragment_tests) + if (program->shader->Program->info.fs.early_fragment_tests || + program->shader->Program->info.fs.post_depth_coverage) { ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1); + if (program->shader->Program->info.fs.post_depth_coverage) + ureg_property(ureg, TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE, 1); + } + if (proginfo->info.inputs_read & VARYING_BIT_POS) { /* Must do this after setting up t->inputs. */ emit_wpos(st_context(ctx), t, proginfo, ureg, @@ -6543,39 +6708,29 @@ st_translate_program( /* texture samplers */ for (i = 0; i < frag_const->MaxTextureImageUnits; i++) { if (program->samplers_used & (1u << i)) { - unsigned type; + unsigned type = st_translate_texture_type(program->sampler_types[i]); t->samplers[i] = ureg_DECL_sampler(ureg, i); - switch (program->sampler_types[i]) { - case GLSL_TYPE_INT: - type = TGSI_RETURN_TYPE_SINT; - break; - case GLSL_TYPE_UINT: - type = TGSI_RETURN_TYPE_UINT; - break; - case GLSL_TYPE_FLOAT: - type = TGSI_RETURN_TYPE_FLOAT; - break; - default: - unreachable("not reached"); - } - ureg_DECL_sampler_view( ureg, i, program->sampler_targets[i], type, type, type, type ); } } - for (i = 0; i < frag_const->MaxAtomicBuffers; i++) { - if (program->buffers_used & (1 << i)) { - t->buffers[i] = ureg_DECL_buffer(ureg, i, true); + /* Declare atomic and shader storage buffers. */ + { + struct gl_program *prog = program->prog; + + for (i = 0; i < prog->info.num_abos; i++) { + unsigned index = prog->sh.AtomicBuffers[i]->Binding; + assert(index < frag_const->MaxAtomicBuffers); + t->buffers[index] = ureg_DECL_buffer(ureg, index, true); } - } - for (; i < frag_const->MaxAtomicBuffers + frag_const->MaxShaderStorageBlocks; - i++) { - if (program->buffers_used & (1 << i)) { - t->buffers[i] = ureg_DECL_buffer(ureg, i, false); + assert(prog->info.num_ssbos <= frag_const->MaxShaderStorageBlocks); + for (i = 0; i < prog->info.num_ssbos; i++) { + unsigned index = frag_const->MaxAtomicBuffers + i; + t->buffers[index] = ureg_DECL_buffer(ureg, index, false); } } @@ -6605,27 +6760,8 @@ st_translate_program( for (i = program->shader->Stage+1; i <= MESA_SHADER_FRAGMENT; i++) { if (program->shader_program->_LinkedShaders[i]) { - unsigned next; - - switch (i) { - case MESA_SHADER_TESS_CTRL: - next = PIPE_SHADER_TESS_CTRL; - break; - case MESA_SHADER_TESS_EVAL: - next = PIPE_SHADER_TESS_EVAL; - break; - case MESA_SHADER_GEOMETRY: - next = PIPE_SHADER_GEOMETRY; - break; - case MESA_SHADER_FRAGMENT: - next = PIPE_SHADER_FRAGMENT; - break; - default: - assert(0); - continue; - } - - ureg_set_next_shader_processor(ureg, next); + ureg_set_next_shader_processor( + ureg, pipe_shader_type_from_mesa((gl_shader_stage)i)); break; } } @@ -6685,6 +6821,9 @@ get_mesa_program_tgsi(struct gl_context *ctx, PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED); v->has_tex_txf_lz = pscreen->get_param(pscreen, PIPE_CAP_TGSI_TEX_TXF_LZ); + + v->variables = _mesa_hash_table_create(v->mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); skip_merge_registers = pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS); @@ -6704,10 +6843,10 @@ get_mesa_program_tgsi(struct gl_context *ctx, * optimization passes. */ { int i; - int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp); - int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp); - int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp); - int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp); + int *first_writes = ralloc_array(v->mem_ctx, int, v->next_temp); + int *first_reads = ralloc_array(v->mem_ctx, int, v->next_temp); + int *last_writes = ralloc_array(v->mem_ctx, int, v->next_temp); + int *last_reads = ralloc_array(v->mem_ctx, int, v->next_temp); for (i = 0; i < v->next_temp; i++) { first_writes[i] = -1; @@ -6793,8 +6932,7 @@ get_mesa_program_tgsi(struct gl_context *ctx, * prog->ParameterValues to get reallocated (e.g., anything that adds a * program constant) has to happen before creating this linkage. */ - _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters, - true); + _mesa_associate_uniform_storage(ctx, shader_program, prog, true); if (!shader_program->data->LinkStatus) { free_glsl_to_tgsi_visitor(v); _mesa_reference_program(ctx, &shader->Program, NULL); @@ -6803,9 +6941,7 @@ get_mesa_program_tgsi(struct gl_context *ctx, struct st_vertex_program *stvp; struct st_fragment_program *stfp; - struct st_geometry_program *stgp; - struct st_tessctrl_program *sttcp; - struct st_tesseval_program *sttep; + struct st_common_program *stp; struct st_compute_program *stcp; switch (shader->Stage) { @@ -6817,17 +6953,11 @@ get_mesa_program_tgsi(struct gl_context *ctx, stfp = (struct st_fragment_program *)prog; stfp->glsl_to_tgsi = v; break; - case MESA_SHADER_GEOMETRY: - stgp = (struct st_geometry_program *)prog; - stgp->glsl_to_tgsi = v; - break; case MESA_SHADER_TESS_CTRL: - sttcp = (struct st_tessctrl_program *)prog; - sttcp->glsl_to_tgsi = v; - break; case MESA_SHADER_TESS_EVAL: - sttep = (struct st_tesseval_program *)prog; - sttep->glsl_to_tgsi = v; + case MESA_SHADER_GEOMETRY: + stp = st_common_program(prog); + stp->glsl_to_tgsi = v; break; case MESA_SHADER_COMPUTE: stcp = (struct st_compute_program *)prog; @@ -7043,10 +7173,11 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) struct gl_program *linked_prog = NULL; if (preferred_ir == PIPE_SHADER_IR_NIR) { - /* TODO only for GLSL VS/FS for now: */ + /* TODO only for GLSL VS/FS/CS for now: */ switch (shader->Stage) { case MESA_SHADER_VERTEX: case MESA_SHADER_FRAGMENT: + case MESA_SHADER_COMPUTE: linked_prog = st_nir_get_mesa_program(ctx, prog, shader); default: break; @@ -7071,7 +7202,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) void st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, - const GLuint outputMapping[], + const ubyte outputMapping[], struct pipe_stream_output_info *so) { if (!glsl_to_tgsi->shader_program->last_vert_prog) @@ -7084,7 +7215,7 @@ st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, void st_translate_stream_output_info2(struct gl_transform_feedback_info *info, - const GLuint outputMapping[], + const ubyte outputMapping[], struct pipe_stream_output_info *so) { unsigned i;