X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_shader_nir.c;h=20031ad3f60321298e3ee2827a25146df6871b90;hb=925161c84c2bb86032203245679ce3615c1e62b2;hp=65da6384c7d78e5a28242a05f7fc5223e6f88a3a;hpb=6fefd6972438054e3fb45666b6714a5c9c4859b9;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 65da6384c7d..20031ad3f60 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -31,6 +31,7 @@ #include "compiler/nir/nir.h" #include "compiler/nir_types.h" +#include "compiler/nir/nir_builder.h" static nir_variable* tex_get_texture_var(nir_tex_instr *instr) { @@ -51,7 +52,121 @@ static nir_variable* intrinsic_get_var(nir_intrinsic_instr *instr) return nir_deref_instr_get_variable(nir_src_as_deref(instr->src[0])); } -static void scan_instruction(struct tgsi_shader_info *info, +static void gather_intrinsic_load_deref_input_info(const nir_shader *nir, + const nir_intrinsic_instr *instr, + nir_variable *var, + struct tgsi_shader_info *info) +{ + assert(var && var->data.mode == nir_var_shader_in); + + switch (nir->info.stage) { + case MESA_SHADER_VERTEX: { + unsigned i = var->data.driver_location; + unsigned attrib_count = glsl_count_attribute_slots(var->type, false); + uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa); + + for (unsigned j = 0; j < attrib_count; j++, i++) { + if (glsl_type_is_64bit(glsl_without_array(var->type))) { + unsigned dmask = mask; + + if (glsl_type_is_dual_slot(glsl_without_array(var->type)) && j % 2) + dmask >>= 2; + + dmask <<= var->data.location_frac / 2; + + if (dmask & 0x1) + info->input_usage_mask[i] |= TGSI_WRITEMASK_XY; + if (dmask & 0x2) + info->input_usage_mask[i] |= TGSI_WRITEMASK_ZW; + } else { + info->input_usage_mask[i] |= + (mask << var->data.location_frac) & 0xf; + } + } + break; + } + case MESA_SHADER_FRAGMENT: + if (var->data.location == VARYING_SLOT_COL0 || + var->data.location == VARYING_SLOT_COL1) { + unsigned index = var->data.location == VARYING_SLOT_COL1; + uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa); + info->colors_read |= mask << (index * 4); + } + break; + default:; + } +} + +static void gather_intrinsic_load_deref_output_info(const nir_shader *nir, + const nir_intrinsic_instr *instr, + nir_variable *var, + struct tgsi_shader_info *info) +{ + assert(var && var->data.mode == nir_var_shader_out); + + switch (nir->info.stage) { + case MESA_SHADER_TESS_CTRL: + if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || + var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER) + info->reads_tessfactor_outputs = true; + else if (var->data.patch) + info->reads_perpatch_outputs = true; + else + info->reads_pervertex_outputs = true; + break; + + case MESA_SHADER_FRAGMENT: + if (var->data.fb_fetch_output) + info->uses_fbfetch = true; + break; + default:; + } +} + +static void gather_intrinsic_store_deref_output_info(const nir_shader *nir, + const nir_intrinsic_instr *instr, + nir_variable *var, + struct tgsi_shader_info *info) +{ + assert(var && var->data.mode == nir_var_shader_out); + + switch (nir->info.stage) { + case MESA_SHADER_VERTEX: /* needed by LS, ES */ + case MESA_SHADER_TESS_EVAL: /* needed by ES */ + case MESA_SHADER_GEOMETRY: { + unsigned i = var->data.driver_location; + unsigned attrib_count = glsl_count_attribute_slots(var->type, false); + unsigned mask = nir_intrinsic_write_mask(instr); + + assert(!var->data.compact); + + for (unsigned j = 0; j < attrib_count; j++, i++) { + if (glsl_type_is_64bit(glsl_without_array(var->type))) { + unsigned dmask = mask; + + if (glsl_type_is_dual_slot(glsl_without_array(var->type)) && j % 2) + dmask >>= 2; + + dmask <<= var->data.location_frac / 2; + + if (dmask & 0x1) + info->output_usagemask[i] |= TGSI_WRITEMASK_XY; + if (dmask & 0x2) + info->output_usagemask[i] |= TGSI_WRITEMASK_ZW; + } else { + info->output_usagemask[i] |= + (mask << var->data.location_frac) & 0xf; + } + + } + break; + } + default:; + } +} + +static void scan_instruction(const struct nir_shader *nir, + struct tgsi_shader_info *info, nir_instr *instr) { if (instr->type == nir_instr_type_alu) { @@ -133,6 +248,9 @@ static void scan_instruction(struct tgsi_shader_info *info, case nir_intrinsic_load_base_vertex: info->uses_basevertex = 1; break; + case nir_intrinsic_load_draw_id: + info->uses_drawid = 1; + break; case nir_intrinsic_load_primitive_id: info->uses_primid = 1; break; @@ -143,39 +261,51 @@ static void scan_instruction(struct tgsi_shader_info *info, case nir_intrinsic_load_tess_level_outer: info->reads_tess_factors = true; break; - case nir_intrinsic_image_deref_load: { - nir_variable *var = intrinsic_get_var(intr); - if (var->data.bindless) { - info->uses_bindless_images = true; + case nir_intrinsic_bindless_image_load: + info->uses_bindless_images = true; - if (glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_BUF) - info->uses_bindless_buffer_load = true; - else - info->uses_bindless_image_load = true; - } + if (nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_BUF) + info->uses_bindless_buffer_load = true; + else + info->uses_bindless_image_load = true; break; - } - case nir_intrinsic_image_deref_size: - case nir_intrinsic_image_deref_samples: { - nir_variable *var = intrinsic_get_var(intr); - if (var->data.bindless) - info->uses_bindless_images = true; + case nir_intrinsic_bindless_image_size: + case nir_intrinsic_bindless_image_samples: + info->uses_bindless_images = true; break; - } - case nir_intrinsic_image_deref_store: { - const nir_deref_instr *image_deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr); - nir_variable *var = intrinsic_get_var(intr); - if (var->data.bindless) { - info->uses_bindless_images = true; + case nir_intrinsic_bindless_image_store: + info->uses_bindless_images = true; + + if (nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_BUF) + info->uses_bindless_buffer_store = true; + else + info->uses_bindless_image_store = true; - if (glsl_get_sampler_dim(image_deref->type) == GLSL_SAMPLER_DIM_BUF) - info->uses_bindless_buffer_store = true; - else - info->uses_bindless_image_store = true; - } info->writes_memory = true; + info->num_memory_instructions++; /* we only care about stores */ + break; + case nir_intrinsic_image_deref_store: + info->writes_memory = true; + info->num_memory_instructions++; /* we only care about stores */ + break; + case nir_intrinsic_bindless_image_atomic_add: + case nir_intrinsic_bindless_image_atomic_min: + case nir_intrinsic_bindless_image_atomic_max: + case nir_intrinsic_bindless_image_atomic_and: + case nir_intrinsic_bindless_image_atomic_or: + case nir_intrinsic_bindless_image_atomic_xor: + case nir_intrinsic_bindless_image_atomic_exchange: + case nir_intrinsic_bindless_image_atomic_comp_swap: + info->uses_bindless_images = true; + + if (nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_BUF) + info->uses_bindless_buffer_atomic = true; + else + info->uses_bindless_image_atomic = true; + + info->writes_memory = true; + info->num_memory_instructions++; /* we only care about stores */ break; - } case nir_intrinsic_image_deref_atomic_add: case nir_intrinsic_image_deref_atomic_min: case nir_intrinsic_image_deref_atomic_max: @@ -183,19 +313,10 @@ static void scan_instruction(struct tgsi_shader_info *info, case nir_intrinsic_image_deref_atomic_or: case nir_intrinsic_image_deref_atomic_xor: case nir_intrinsic_image_deref_atomic_exchange: - case nir_intrinsic_image_deref_atomic_comp_swap: { - nir_variable *var = intrinsic_get_var(intr); - if (var->data.bindless) { - info->uses_bindless_images = true; - - if (glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_BUF) - info->uses_bindless_buffer_atomic = true; - else - info->uses_bindless_image_atomic = true; - } + case nir_intrinsic_image_deref_atomic_comp_swap: info->writes_memory = true; + info->num_memory_instructions++; /* we only care about stores */ break; - } case nir_intrinsic_store_ssbo: case nir_intrinsic_ssbo_atomic_add: case nir_intrinsic_ssbo_atomic_imin: @@ -208,6 +329,7 @@ static void scan_instruction(struct tgsi_shader_info *info, case nir_intrinsic_ssbo_atomic_exchange: case nir_intrinsic_ssbo_atomic_comp_swap: info->writes_memory = true; + info->num_memory_instructions++; /* we only care about stores */ break; case nir_intrinsic_load_deref: { nir_variable *var = intrinsic_get_var(intr); @@ -216,6 +338,8 @@ static void scan_instruction(struct tgsi_shader_info *info, glsl_get_base_type(glsl_without_array(var->type)); if (mode == nir_var_shader_in) { + gather_intrinsic_load_deref_input_info(nir, intr, var, info); + switch (var->data.interpolation) { case INTERP_MODE_NONE: if (glsl_base_type_is_integer(base_type)) @@ -240,9 +364,18 @@ static void scan_instruction(struct tgsi_shader_info *info, info->uses_linear_center = true; break; } + } else if (mode == nir_var_shader_out) { + gather_intrinsic_load_deref_output_info(nir, intr, var, info); } break; } + case nir_intrinsic_store_deref: { + nir_variable *var = intrinsic_get_var(intr); + + if (var->data.mode == nir_var_shader_out) + gather_intrinsic_store_deref_output_info(nir, intr, var, info); + break; + } case nir_intrinsic_interp_deref_at_centroid: case nir_intrinsic_interp_deref_at_sample: case nir_intrinsic_interp_deref_at_offset: { @@ -303,6 +436,11 @@ void si_nir_scan_shader(const struct nir_shader *nir, info->properties[TGSI_PROPERTY_NEXT_SHADER] = pipe_shader_type_from_mesa(nir->info.next_stage); + if (nir->info.stage == MESA_SHADER_VERTEX) { + info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION] = + nir->info.vs.window_space_position; + } + if (nir->info.stage == MESA_SHADER_TESS_CTRL) { info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT] = nir->info.tess.tcs_vertices_out; @@ -362,7 +500,7 @@ void si_nir_scan_shader(const struct nir_shader *nir, } } - if (nir->info.stage == MESA_SHADER_COMPUTE) { + if (gl_shader_stage_is_compute(nir->info.stage)) { info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] = nir->info.cs.local_size[0]; info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] = nir->info.cs.local_size[1]; info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH] = nir->info.cs.local_size[2]; @@ -390,25 +528,8 @@ void si_nir_scan_shader(const struct nir_shader *nir, * variable->data.driver_location. */ if (nir->info.stage == MESA_SHADER_VERTEX) { - /* TODO: gather the actual input useage and remove this. */ - info->input_usage_mask[i] = TGSI_WRITEMASK_XYZW; - - if (glsl_type_is_dual_slot(variable->type)) { - num_inputs += 2; - - /* TODO: gather the actual input useage and remove this. */ - info->input_usage_mask[i+1] = TGSI_WRITEMASK_XYZW; - } else + if (glsl_type_is_dual_slot(glsl_without_array(variable->type))) num_inputs++; - continue; - } - - /* Fragment shader position is a system value. */ - if (nir->info.stage == MESA_SHADER_FRAGMENT && - variable->data.location == VARYING_SLOT_POS) { - if (variable->data.pixel_center_integer) - info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] = - TGSI_FS_COORD_PIXEL_CENTER_INTEGER; num_inputs++; continue; @@ -431,57 +552,53 @@ void si_nir_scan_shader(const struct nir_shader *nir, if (semantic_name == TGSI_SEMANTIC_PRIMID) info->uses_primid = true; - if (variable->data.sample) - info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_SAMPLE; - else if (variable->data.centroid) - info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTROID; - else - info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTER; + if (semantic_name == TGSI_SEMANTIC_COLOR) { + /* We only need this for color inputs. */ + if (variable->data.sample) + info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_SAMPLE; + else if (variable->data.centroid) + info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTROID; + else + info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTER; + } - enum glsl_base_type base_type = - glsl_get_base_type(glsl_without_array(variable->type)); + enum glsl_base_type base_type = + glsl_get_base_type(glsl_without_array(variable->type)); - switch (variable->data.interpolation) { - case INTERP_MODE_NONE: - if (glsl_base_type_is_integer(base_type)) { - info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT; - break; - } + switch (variable->data.interpolation) { + case INTERP_MODE_NONE: + if (glsl_base_type_is_integer(base_type)) { + info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT; + break; + } - if (semantic_name == TGSI_SEMANTIC_COLOR) { - info->input_interpolate[i] = TGSI_INTERPOLATE_COLOR; - break; - } - /* fall-through */ - - case INTERP_MODE_SMOOTH: - assert(!glsl_base_type_is_integer(base_type)); + if (semantic_name == TGSI_SEMANTIC_COLOR) { + info->input_interpolate[i] = TGSI_INTERPOLATE_COLOR; + break; + } + /* fall-through */ - info->input_interpolate[i] = TGSI_INTERPOLATE_PERSPECTIVE; - break; + case INTERP_MODE_SMOOTH: + assert(!glsl_base_type_is_integer(base_type)); - case INTERP_MODE_NOPERSPECTIVE: - assert(!glsl_base_type_is_integer(base_type)); + info->input_interpolate[i] = TGSI_INTERPOLATE_PERSPECTIVE; + break; - info->input_interpolate[i] = TGSI_INTERPOLATE_LINEAR; - break; + case INTERP_MODE_NOPERSPECTIVE: + assert(!glsl_base_type_is_integer(base_type)); - case INTERP_MODE_FLAT: - info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT; - break; - } + info->input_interpolate[i] = TGSI_INTERPOLATE_LINEAR; + break; - /* TODO make this more precise */ - if (variable->data.location == VARYING_SLOT_COL0) - info->colors_read |= 0x0f; - else if (variable->data.location == VARYING_SLOT_COL1) - info->colors_read |= 0xf0; + case INTERP_MODE_FLAT: + info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT; + break; + } } } info->num_inputs = num_inputs; - i = 0; uint64_t processed_outputs = 0; unsigned num_outputs = 0; @@ -563,22 +680,18 @@ void si_nir_scan_shader(const struct nir_shader *nir, unsigned streamw = (gs_out_streams >> 6) & 3; if (usagemask & TGSI_WRITEMASK_X) { - info->output_usagemask[i] |= TGSI_WRITEMASK_X; info->output_streams[i] |= streamx; info->num_stream_output_components[streamx]++; } if (usagemask & TGSI_WRITEMASK_Y) { - info->output_usagemask[i] |= TGSI_WRITEMASK_Y; info->output_streams[i] |= streamy << 2; info->num_stream_output_components[streamy]++; } if (usagemask & TGSI_WRITEMASK_Z) { - info->output_usagemask[i] |= TGSI_WRITEMASK_Z; info->output_streams[i] |= streamz << 4; info->num_stream_output_components[streamz]++; } if (usagemask & TGSI_WRITEMASK_W) { - info->output_usagemask[i] |= TGSI_WRITEMASK_W; info->output_streams[i] |= streamw << 6; info->num_stream_output_components[streamw]++; } @@ -630,20 +743,6 @@ void si_nir_scan_shader(const struct nir_shader *nir, info->writes_position = true; break; } - - if (nir->info.stage == MESA_SHADER_TESS_CTRL) { - switch (semantic_name) { - case TGSI_SEMANTIC_PATCH: - info->reads_perpatch_outputs = true; - break; - case TGSI_SEMANTIC_TESSINNER: - case TGSI_SEMANTIC_TESSOUTER: - info->reads_tessfactor_outputs = true; - break; - default: - info->reads_pervertex_outputs = true; - } - } } unsigned loc = variable->data.location; @@ -659,16 +758,24 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + struct set *ssbo_set = _mesa_set_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); /* Intialise const_file_max[0] */ info->const_file_max[0] = -1; + /* The first 8 are reserved for atomic counters using ssbo */ + unsigned ssbo_idx = 8; + unsigned ubo_idx = 1; nir_foreach_variable(variable, &nir->uniforms) { const struct glsl_type *type = variable->type; enum glsl_base_type base_type = glsl_get_base_type(glsl_without_array(type)); unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type)); + unsigned loc = variable->data.driver_location / 4; + int slot_count = glsl_count_attribute_slots(type, false); + int max_slot = MAX2(info->const_file_max[0], (int) loc) + slot_count; /* Gather buffers declared bitmasks. Note: radeonsi doesn't * really use the mask (other than ubo_idx == 1 for regular @@ -677,12 +784,16 @@ void si_nir_scan_shader(const struct nir_shader *nir, */ if (variable->interface_type != NULL) { if (variable->data.mode == nir_var_uniform || - variable->data.mode == nir_var_mem_ubo) { + variable->data.mode == nir_var_mem_ubo || + variable->data.mode == nir_var_mem_ssbo) { + + struct set *buf_set = variable->data.mode == nir_var_mem_ssbo ? + ssbo_set : ubo_set; unsigned block_count; if (base_type != GLSL_TYPE_INTERFACE) { struct set_entry *entry = - _mesa_set_search(ubo_set, variable->interface_type); + _mesa_set_search(buf_set, variable->interface_type); /* Check if we have already processed * a member from this ubo. @@ -695,16 +806,18 @@ void si_nir_scan_shader(const struct nir_shader *nir, block_count = aoa_size; } - info->const_buffers_declared |= u_bit_consecutive(ubo_idx, block_count); - ubo_idx += block_count; + if (variable->data.mode == nir_var_uniform || + variable->data.mode == nir_var_mem_ubo) { + info->const_buffers_declared |= u_bit_consecutive(ubo_idx, block_count); + ubo_idx += block_count; + } else { + assert(variable->data.mode == nir_var_mem_ssbo); - _mesa_set_add(ubo_set, variable->interface_type); - } + info->shader_buffers_declared |= u_bit_consecutive(ssbo_idx, block_count); + ssbo_idx += block_count; + } - if (variable->data.mode == nir_var_mem_ssbo) { - /* TODO: make this more accurate */ - info->shader_buffers_declared = - u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS); + _mesa_set_add(buf_set, variable->interface_type); } continue; @@ -713,41 +826,20 @@ void si_nir_scan_shader(const struct nir_shader *nir, /* We rely on the fact that nir_lower_samplers_as_deref has * eliminated struct dereferences. */ - if (base_type == GLSL_TYPE_SAMPLER) { - if (variable->data.bindless) { - info->const_buffers_declared |= 1; - info->const_file_max[0] += - glsl_count_attribute_slots(type, false); - } else { - info->samplers_declared |= - u_bit_consecutive(variable->data.binding, aoa_size); - } - } else if (base_type == GLSL_TYPE_IMAGE) { - if (variable->data.bindless) { - info->const_buffers_declared |= 1; - info->const_file_max[0] += - glsl_count_attribute_slots(type, false); - } else { - info->images_declared |= - u_bit_consecutive(variable->data.binding, aoa_size); - } + if (base_type == GLSL_TYPE_SAMPLER && !variable->data.bindless) { + info->samplers_declared |= + u_bit_consecutive(variable->data.binding, aoa_size); + } else if (base_type == GLSL_TYPE_IMAGE && !variable->data.bindless) { + info->images_declared |= + u_bit_consecutive(variable->data.binding, aoa_size); } else if (base_type != GLSL_TYPE_ATOMIC_UINT) { - if (strncmp(variable->name, "state.", 6) == 0 || - strncmp(variable->name, "gl_", 3) == 0) { - /* FIXME: figure out why piglit tests with builtin - * uniforms are failing without this. - */ - info->const_buffers_declared = - u_bit_consecutive(0, SI_NUM_CONST_BUFFERS); - } else { - info->const_buffers_declared |= 1; - info->const_file_max[0] += - glsl_count_attribute_slots(type, false); - } + info->const_buffers_declared |= 1; + info->const_file_max[0] = max_slot; } } _mesa_set_destroy(ubo_set, NULL); + _mesa_set_destroy(ssbo_set, NULL); info->num_written_clipdistance = nir->info.clip_distance_array_size; info->num_written_culldistance = nir->info.cull_distance_array_size; @@ -760,23 +852,156 @@ void si_nir_scan_shader(const struct nir_shader *nir, func = (struct nir_function *)exec_list_get_head_const(&nir->functions); nir_foreach_block(block, func->impl) { nir_foreach_instr(instr, block) - scan_instruction(info, instr); + scan_instruction(nir, info, instr); } } +void +si_nir_opts(struct nir_shader *nir) +{ + bool progress; + unsigned lower_flrp = + (nir->options->lower_flrp16 ? 16 : 0) | + (nir->options->lower_flrp32 ? 32 : 0) | + (nir->options->lower_flrp64 ? 64 : 0); + + do { + progress = false; + + NIR_PASS_V(nir, nir_lower_vars_to_ssa); + + NIR_PASS(progress, nir, nir_opt_copy_prop_vars); + NIR_PASS(progress, nir, nir_opt_dead_write_vars); + + NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL); + NIR_PASS_V(nir, nir_lower_phis_to_scalar); + + /* (Constant) copy propagation is needed for txf with offsets. */ + NIR_PASS(progress, nir, nir_copy_prop); + NIR_PASS(progress, nir, nir_opt_remove_phis); + NIR_PASS(progress, nir, nir_opt_dce); + if (nir_opt_trivial_continues(nir)) { + progress = true; + NIR_PASS(progress, nir, nir_copy_prop); + NIR_PASS(progress, nir, nir_opt_dce); + } + NIR_PASS(progress, nir, nir_opt_if, true); + NIR_PASS(progress, nir, nir_opt_dead_cf); + NIR_PASS(progress, nir, nir_opt_cse); + NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); + + /* Needed for algebraic lowering */ + NIR_PASS(progress, nir, nir_opt_algebraic); + NIR_PASS(progress, nir, nir_opt_constant_folding); + + if (lower_flrp != 0) { + bool lower_flrp_progress = false; + + NIR_PASS(lower_flrp_progress, nir, nir_lower_flrp, + lower_flrp, + false /* always_precise */, + nir->options->lower_ffma); + if (lower_flrp_progress) { + NIR_PASS(progress, nir, + nir_opt_constant_folding); + progress = true; + } + + /* Nothing should rematerialize any flrps, so we only + * need to do this lowering once. + */ + lower_flrp = 0; + } + + NIR_PASS(progress, nir, nir_opt_undef); + NIR_PASS(progress, nir, nir_opt_conditional_discard); + if (nir->options->max_unroll_iterations) { + NIR_PASS(progress, nir, nir_opt_loop_unroll, 0); + } + } while (progress); +} + +static int +type_size_vec4(const struct glsl_type *type, bool bindless) +{ + return glsl_count_attribute_slots(type, false); +} + +static void +si_nir_lower_color(nir_shader *nir) +{ + nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir); + + nir_builder b; + nir_builder_init(&b, entrypoint); + + nir_foreach_block(block, entrypoint) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = + nir_instr_as_intrinsic(instr); + + if (intrin->intrinsic != nir_intrinsic_load_deref) + continue; + + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + if (deref->mode != nir_var_shader_in) + continue; + + b.cursor = nir_before_instr(instr); + nir_variable *var = nir_deref_instr_get_variable(deref); + nir_ssa_def *def; + + if (var->data.location == VARYING_SLOT_COL0) { + def = nir_load_color0(&b); + } else if (var->data.location == VARYING_SLOT_COL1) { + def = nir_load_color1(&b); + } else { + continue; + } + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(def)); + nir_instr_remove(instr); + } + } +} + /** * Perform "lowering" operations on the NIR that are run once when the shader * selector is created. */ void -si_lower_nir(struct si_shader_selector* sel) +si_lower_nir(struct si_shader_selector* sel, unsigned wave_size) { /* Adjust the driver location of inputs and outputs. The state tracker * interprets them as slots, while the ac/nir backend interprets them * as individual components. */ - nir_foreach_variable(variable, &sel->nir->inputs) - variable->data.driver_location *= 4; + if (sel->nir->info.stage != MESA_SHADER_FRAGMENT) { + nir_foreach_variable(variable, &sel->nir->inputs) + variable->data.driver_location *= 4; + } else { + NIR_PASS_V(sel->nir, nir_lower_io_to_temporaries, + nir_shader_get_entrypoint(sel->nir), false, true); + + /* Since we're doing nir_lower_io_to_temporaries late, we need + * to lower all the copy_deref's introduced by + * lower_io_to_temporaries before calling nir_lower_io. + */ + NIR_PASS_V(sel->nir, nir_split_var_copies); + NIR_PASS_V(sel->nir, nir_lower_var_copies); + NIR_PASS_V(sel->nir, nir_lower_global_vars_to_local); + + si_nir_lower_color(sel->nir); + NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0); + + /* This pass needs actual constants */ + NIR_PASS_V(sel->nir, nir_opt_constant_folding); + NIR_PASS_V(sel->nir, nir_io_add_const_offset_to_base, + nir_var_shader_in); + } nir_foreach_variable(variable, &sel->nir->outputs) { variable->data.driver_location *= 4; @@ -796,10 +1021,6 @@ si_lower_nir(struct si_shader_selector* sel) * - ensure constant offsets for texture instructions are folded * and copy-propagated */ - NIR_PASS_V(sel->nir, nir_lower_returns); - NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa); - NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar); - NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar); static const struct nir_lower_tex_options lower_tex_options = { .lower_txp = ~0u, @@ -807,8 +1028,8 @@ si_lower_nir(struct si_shader_selector* sel) NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options); const nir_lower_subgroups_options subgroups_options = { - .subgroup_size = 64, - .ballot_bit_size = 64, + .subgroup_size = wave_size, + .ballot_bit_size = wave_size, .lower_to_scalar = true, .lower_subgroup_masks = true, .lower_vote_trivial = false, @@ -818,36 +1039,14 @@ si_lower_nir(struct si_shader_selector* sel) ac_lower_indirect_derefs(sel->nir, sel->screen->info.chip_class); - bool progress; - do { - progress = false; - - /* (Constant) copy propagation is needed for txf with offsets. */ - NIR_PASS(progress, sel->nir, nir_copy_prop); - NIR_PASS(progress, sel->nir, nir_opt_remove_phis); - NIR_PASS(progress, sel->nir, nir_opt_dce); - if (nir_opt_trivial_continues(sel->nir)) { - progress = true; - NIR_PASS(progress, sel->nir, nir_copy_prop); - NIR_PASS(progress, sel->nir, nir_opt_dce); - } - NIR_PASS(progress, sel->nir, nir_opt_if); - NIR_PASS(progress, sel->nir, nir_opt_dead_cf); - NIR_PASS(progress, sel->nir, nir_opt_cse); - NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8, true, true); - - /* Needed for algebraic lowering */ - NIR_PASS(progress, sel->nir, nir_opt_algebraic); - NIR_PASS(progress, sel->nir, nir_opt_constant_folding); - - NIR_PASS(progress, sel->nir, nir_opt_undef); - NIR_PASS(progress, sel->nir, nir_opt_conditional_discard); - if (sel->nir->options->max_unroll_iterations) { - NIR_PASS(progress, sel->nir, nir_opt_loop_unroll, 0); - } - } while (progress); + si_nir_opts(sel->nir); NIR_PASS_V(sel->nir, nir_lower_bool_to_int32); + + /* Strip the resulting shader so that the shader cache is more likely + * to hit from other similar shaders. + */ + nir_strip(sel->nir); } static void declare_nir_input_vs(struct si_shader_context *ctx, @@ -858,24 +1057,6 @@ static void declare_nir_input_vs(struct si_shader_context *ctx, si_llvm_load_input_vs(ctx, input_index, out); } -static void declare_nir_input_fs(struct si_shader_context *ctx, - struct nir_variable *variable, - unsigned input_index, - LLVMValueRef out[4]) -{ - unsigned slot = variable->data.location; - if (slot == VARYING_SLOT_POS) { - out[0] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT); - out[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT); - out[2] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT); - out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, - LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT)); - return; - } - - si_llvm_load_input_fs(ctx, input_index, out); -} - LLVMValueRef si_nir_lookup_interp_param(struct ac_shader_abi *abi, enum glsl_interp_mode interp, unsigned location) @@ -920,15 +1101,8 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi, bool write, bool bindless) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); - const struct tgsi_shader_info *info = &ctx->shader->selector->info; LLVMBuilderRef builder = ctx->ac.builder; unsigned const_index = base_index + constant_index; - bool dcc_off = write; - - /* TODO: images_store and images_atomic are not set */ - if (!dynamic_index && image && - (info->images_store | info->images_atomic) & (1 << const_index)) - dcc_off = true; assert(!descriptor_set); assert(!image || desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_BUFFER); @@ -939,8 +1113,14 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi, /* dynamic_index is the bindless handle */ if (image) { + /* For simplicity, bindless image descriptors use fixed + * 16-dword slots for now. + */ + dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index, + LLVMConstInt(ctx->i64, 2, 0), ""); + return si_load_image_desc(ctx, list, dynamic_index, desc_type, - dcc_off, true); + write, true); } /* Since bindless handle arithmetic can contain an unsigned integer @@ -949,7 +1129,7 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi, * to prevent incorrect code generation and hangs. */ dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index, - LLVMConstInt(ctx->i32, 2, 0), ""); + LLVMConstInt(ctx->i64, 2, 0), ""); list = ac_build_pointer_add(&ctx->ac, list, dynamic_index); return si_load_sampler_desc(ctx, list, ctx->i32_0, desc_type); } @@ -979,7 +1159,7 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi, index = LLVMBuildSub(ctx->ac.builder, LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0), index, ""); - return si_load_image_desc(ctx, list, index, desc_type, dcc_off, false); + return si_load_image_desc(ctx, list, index, desc_type, write, false); } index = LLVMBuildAdd(ctx->ac.builder, index, @@ -1001,20 +1181,16 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir) { struct tgsi_shader_info *info = &ctx->shader->selector->info; - if (nir->info.stage == MESA_SHADER_VERTEX || - nir->info.stage == MESA_SHADER_FRAGMENT) { + if (nir->info.stage == MESA_SHADER_VERTEX) { uint64_t processed_inputs = 0; nir_foreach_variable(variable, &nir->inputs) { unsigned attrib_count = glsl_count_attribute_slots(variable->type, - nir->info.stage == MESA_SHADER_VERTEX); + true); unsigned input_idx = variable->data.driver_location; LLVMValueRef data[4]; unsigned loc = variable->data.location; - if (loc >= VARYING_SLOT_VAR0 && nir->info.stage == MESA_SHADER_FRAGMENT) - ctx->abi.fs_input_attr_indices[loc - VARYING_SLOT_VAR0] = input_idx / 4; - for (unsigned i = 0; i < attrib_count; i++) { /* Packed components share the same location so skip * them if we have already processed the location. @@ -1024,24 +1200,50 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir) continue; } - if (nir->info.stage == MESA_SHADER_VERTEX) { + declare_nir_input_vs(ctx, variable, input_idx / 4, data); + bitcast_inputs(ctx, data, input_idx); + if (glsl_type_is_dual_slot(variable->type)) { + input_idx += 4; declare_nir_input_vs(ctx, variable, input_idx / 4, data); bitcast_inputs(ctx, data, input_idx); - if (glsl_type_is_dual_slot(variable->type)) { - input_idx += 4; - declare_nir_input_vs(ctx, variable, input_idx / 4, data); - bitcast_inputs(ctx, data, input_idx); - } - } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { - declare_nir_input_fs(ctx, variable, input_idx / 4, data); - bitcast_inputs(ctx, data, input_idx); } processed_inputs |= ((uint64_t)1 << (loc + i)); input_idx += 4; } } - } + } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { + unsigned colors_read = + ctx->shader->selector->info.colors_read; + LLVMValueRef main_fn = ctx->main_fn; + + LLVMValueRef undef = LLVMGetUndef(ctx->f32); + + unsigned offset = SI_PARAM_POS_FIXED_PT + 1; + + if (colors_read & 0x0f) { + unsigned mask = colors_read & 0x0f; + LLVMValueRef values[4]; + values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef; + values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef; + values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef; + values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef; + ctx->abi.color0 = + ac_to_integer(&ctx->ac, + ac_build_gather_values(&ctx->ac, values, 4)); + } + if (colors_read & 0xf0) { + unsigned mask = (colors_read & 0xf0) >> 4; + LLVMValueRef values[4]; + values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef; + values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef; + values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef; + values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef; + ctx->abi.color1 = + ac_to_integer(&ctx->ac, + ac_build_gather_values(&ctx->ac, values, 4)); + } + } ctx->abi.inputs = &ctx->inputs[0]; ctx->abi.load_sampler_desc = si_nir_load_sampler_desc; @@ -1051,7 +1253,7 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir) ctx->num_images = util_last_bit(info->images_declared); if (ctx->shader->selector->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE]) { - assert(nir->info.stage == MESA_SHADER_COMPUTE); + assert(gl_shader_stage_is_compute(nir->info.stage)); si_declare_compute_memory(ctx); } ac_nir_translate(&ctx->ac, &ctx->abi, nir);