X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcompiler%2Fnir%2Fnir_lower_io.c;h=6fb90c6efbd5e07f541b7c7857dbb15c5f4fae5b;hb=4360a8a2b3fce819e93c2844077ac0b26d234ead;hp=e45fffbcf9e539ee2c4e30c90f7b85585656af32;hpb=86f21e4eba7ad980109f13bd5480c02593ca19fe;p=mesa.git diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index e45fffbcf9e..6fb90c6efbd 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -121,19 +121,13 @@ shared_atomic_for_deref(nir_intrinsic_op deref_op) } void -nir_assign_var_locations(struct exec_list *var_list, unsigned *size, +nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode, + unsigned *size, int (*type_size)(const struct glsl_type *, bool)) { unsigned location = 0; - nir_foreach_variable(var, var_list) { - /* - * UBOs have their own address spaces, so don't count them towards the - * number of global uniforms - */ - if (var->data.mode == nir_var_mem_ubo || var->data.mode == nir_var_mem_ssbo) - continue; - + nir_foreach_variable_with_modes(var, shader, mode) { var->data.driver_location = location; bool bindless_type_size = var->data.mode == nir_var_shader_in || var->data.mode == nir_var_shader_out || @@ -165,6 +159,19 @@ nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage) return false; } +static unsigned get_number_of_slots(struct lower_io_state *state, + const nir_variable *var) +{ + const struct glsl_type *type = var->type; + + if (nir_is_per_vertex_io(var, state->builder.shader->info.stage)) { + assert(glsl_type_is_array(type)); + type = glsl_get_array_element(type); + } + + return state->type_size(type, var->data.bindless); +} + static nir_ssa_def * get_io_offset(nir_builder *b, nir_deref_instr *deref, nir_ssa_def **vertex_index, @@ -276,9 +283,6 @@ emit_load(struct lower_io_state *state, case nir_var_uniform: op = nir_intrinsic_load_uniform; break; - case nir_var_mem_shared: - op = nir_intrinsic_load_shared; - break; default: unreachable("Unknown variable mode"); } @@ -300,6 +304,14 @@ emit_load(struct lower_io_state *state, load->intrinsic == nir_intrinsic_load_uniform) nir_intrinsic_set_type(load, type); + if (load->intrinsic != nir_intrinsic_load_uniform) { + nir_io_semantics semantics = {0}; + semantics.location = var->data.location; + semantics.num_slots = get_number_of_slots(state, var); + semantics.fb_fetch_output = var->data.fb_fetch_output; + nir_intrinsic_set_io_semantics(load, semantics); + } + if (vertex_index) { load->src[0] = nir_src_for_ssa(vertex_index); load->src[1] = nir_src_for_ssa(offset); @@ -376,14 +388,10 @@ emit_store(struct lower_io_state *state, nir_ssa_def *data, nir_builder *b = &state->builder; nir_variable_mode mode = var->data.mode; + assert(mode == nir_var_shader_out); nir_intrinsic_op op; - if (mode == nir_var_mem_shared) { - op = nir_intrinsic_store_shared; - } else { - assert(mode == nir_var_shader_out); - op = vertex_index ? nir_intrinsic_store_per_vertex_output : - nir_intrinsic_store_output; - } + op = vertex_index ? nir_intrinsic_store_per_vertex_output : + nir_intrinsic_store_output; nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->builder.shader, op); @@ -406,6 +414,25 @@ emit_store(struct lower_io_state *state, nir_ssa_def *data, store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset); + unsigned gs_streams = 0; + if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) { + if (var->data.stream & NIR_STREAM_PACKED) { + gs_streams = var->data.stream & ~NIR_STREAM_PACKED; + } else { + assert(var->data.stream < 4); + gs_streams = 0; + for (unsigned i = 0; i < num_components; ++i) + gs_streams |= var->data.stream << (2 * i); + } + } + + nir_io_semantics semantics = {0}; + semantics.location = var->data.location; + semantics.num_slots = get_number_of_slots(state, var); + semantics.dual_source_blend_index = var->data.index; + semantics.gs_streams = gs_streams; + nir_intrinsic_set_io_semantics(store, semantics); + nir_builder_instr_insert(b, &store->instr); } @@ -468,40 +495,6 @@ lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state, } } -static nir_ssa_def * -lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state, - nir_variable *var, nir_ssa_def *offset) -{ - nir_builder *b = &state->builder; - assert(var->data.mode == nir_var_mem_shared); - - nir_intrinsic_op op = shared_atomic_for_deref(intrin->intrinsic); - - nir_intrinsic_instr *atomic = - nir_intrinsic_instr_create(state->builder.shader, op); - - nir_intrinsic_set_base(atomic, var->data.driver_location); - - atomic->src[0] = nir_src_for_ssa(offset); - assert(nir_intrinsic_infos[intrin->intrinsic].num_srcs == - nir_intrinsic_infos[op].num_srcs); - for (unsigned i = 1; i < nir_intrinsic_infos[op].num_srcs; i++) { - nir_src_copy(&atomic->src[i], &intrin->src[i], atomic); - } - - if (nir_intrinsic_infos[op].has_dest) { - assert(intrin->dest.is_ssa); - assert(nir_intrinsic_infos[intrin->intrinsic].has_dest); - nir_ssa_dest_init(&atomic->instr, &atomic->dest, - intrin->dest.ssa.num_components, - intrin->dest.ssa.bit_size, NULL); - } - - nir_builder_instr_insert(b, &atomic->instr); - - return nir_intrinsic_infos[op].has_dest ? &atomic->dest.ssa : NULL; -} - static nir_ssa_def * lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, nir_variable *var, nir_ssa_def *offset, unsigned component, @@ -566,6 +559,11 @@ lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, nir_intrinsic_set_base(load, var->data.driver_location); nir_intrinsic_set_component(load, component); + nir_io_semantics semantics = {0}; + semantics.location = var->data.location; + semantics.num_slots = get_number_of_slots(state, var); + nir_intrinsic_set_io_semantics(load, semantics); + load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa); load->src[1] = nir_src_for_ssa(offset); @@ -595,20 +593,6 @@ nir_lower_io_block(nir_block *block, switch (intrin->intrinsic) { case nir_intrinsic_load_deref: case nir_intrinsic_store_deref: - case nir_intrinsic_deref_atomic_add: - case nir_intrinsic_deref_atomic_imin: - case nir_intrinsic_deref_atomic_umin: - case nir_intrinsic_deref_atomic_imax: - case nir_intrinsic_deref_atomic_umax: - case nir_intrinsic_deref_atomic_and: - case nir_intrinsic_deref_atomic_or: - case nir_intrinsic_deref_atomic_xor: - case nir_intrinsic_deref_atomic_exchange: - case nir_intrinsic_deref_atomic_comp_swap: - case nir_intrinsic_deref_atomic_fadd: - case nir_intrinsic_deref_atomic_fmin: - case nir_intrinsic_deref_atomic_fmax: - case nir_intrinsic_deref_atomic_fcomp_swap: /* We can lower the io for this nir instrinsic */ break; case nir_intrinsic_interp_deref_at_centroid: @@ -626,16 +610,10 @@ nir_lower_io_block(nir_block *block, nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); nir_variable_mode mode = deref->mode; - + assert(util_is_power_of_two_nonzero(mode)); if ((state->modes & mode) == 0) continue; - if (mode != nir_var_shader_in && - mode != nir_var_shader_out && - mode != nir_var_mem_shared && - mode != nir_var_uniform) - continue; - nir_variable *var = nir_deref_instr_get_variable(deref); b->cursor = nir_before_instr(instr); @@ -666,24 +644,6 @@ nir_lower_io_block(nir_block *block, component_offset, deref->type); break; - case nir_intrinsic_deref_atomic_add: - case nir_intrinsic_deref_atomic_imin: - case nir_intrinsic_deref_atomic_umin: - case nir_intrinsic_deref_atomic_imax: - case nir_intrinsic_deref_atomic_umax: - case nir_intrinsic_deref_atomic_and: - case nir_intrinsic_deref_atomic_or: - case nir_intrinsic_deref_atomic_xor: - case nir_intrinsic_deref_atomic_exchange: - case nir_intrinsic_deref_atomic_comp_swap: - case nir_intrinsic_deref_atomic_fadd: - case nir_intrinsic_deref_atomic_fmin: - case nir_intrinsic_deref_atomic_fmax: - case nir_intrinsic_deref_atomic_fcomp_swap: - assert(vertex_index == NULL); - replacement = lower_atomic(intrin, state, var, offset); - break; - case nir_intrinsic_interp_deref_at_centroid: case nir_intrinsic_interp_deref_at_sample: case nir_intrinsic_interp_deref_at_offset: @@ -723,6 +683,10 @@ nir_lower_io_impl(nir_function_impl *impl, state.type_size = type_size; state.options = options; + ASSERTED nir_variable_mode supported_modes = + nir_var_shader_in | nir_var_shader_out | nir_var_uniform; + assert(!(modes & ~supported_modes)); + nir_foreach_block(block, impl) { progress |= nir_lower_io_block(block, &state); } @@ -734,6 +698,15 @@ nir_lower_io_impl(nir_function_impl *impl, return progress; } +/** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics + * + * This pass is intended to be used for cross-stage shader I/O and driver- + * managed uniforms to turn deref-based access into a simpler model using + * locations or offsets. For fragment shader inputs, it can optionally turn + * load_deref into an explicit interpolation using barycentrics coming from + * one of the load_barycentric_* intrinsics. This pass requires that all + * deref chains are complete and contain no casts. + */ bool nir_lower_io(nir_shader *shader, nir_variable_mode modes, int (*type_size)(const struct glsl_type *, bool), @@ -764,17 +737,23 @@ build_addr_iadd(nir_builder *b, nir_ssa_def *addr, nir_address_format addr_format, nir_ssa_def *offset) { assert(offset->num_components == 1); - assert(addr->bit_size == offset->bit_size); switch (addr_format) { case nir_address_format_32bit_global: case nir_address_format_64bit_global: case nir_address_format_32bit_offset: + assert(addr->bit_size == offset->bit_size); assert(addr->num_components == 1); return nir_iadd(b, addr, offset); + case nir_address_format_32bit_offset_as_64bit: + assert(addr->num_components == 1); + assert(offset->bit_size == 32); + return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset)); + case nir_address_format_64bit_bounded_global: assert(addr->num_components == 4); + assert(addr->bit_size == offset->bit_size); return nir_vec4(b, nir_channel(b, addr, 0), nir_channel(b, addr, 1), nir_channel(b, addr, 2), @@ -782,38 +761,133 @@ build_addr_iadd(nir_builder *b, nir_ssa_def *addr, case nir_address_format_32bit_index_offset: assert(addr->num_components == 2); + assert(addr->bit_size == offset->bit_size); return nir_vec2(b, nir_channel(b, addr, 0), nir_iadd(b, nir_channel(b, addr, 1), offset)); + + case nir_address_format_32bit_index_offset_pack64: + assert(addr->num_components == 1); + assert(offset->bit_size == 32); + return nir_pack_64_2x32_split(b, + nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset), + nir_unpack_64_2x32_split_y(b, addr)); + + case nir_address_format_vec2_index_32bit_offset: + assert(addr->num_components == 3); + assert(offset->bit_size == 32); + return nir_vec3(b, nir_channel(b, addr, 0), nir_channel(b, addr, 1), + nir_iadd(b, nir_channel(b, addr, 2), offset)); + case nir_address_format_logical: unreachable("Unsupported address format"); } unreachable("Invalid address format"); } +static unsigned +addr_get_offset_bit_size(nir_ssa_def *addr, nir_address_format addr_format) +{ + if (addr_format == nir_address_format_32bit_offset_as_64bit || + addr_format == nir_address_format_32bit_index_offset_pack64) + return 32; + return addr->bit_size; +} + static nir_ssa_def * build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr, nir_address_format addr_format, int64_t offset) { return build_addr_iadd(b, addr, addr_format, - nir_imm_intN_t(b, offset, addr->bit_size)); + nir_imm_intN_t(b, offset, + addr_get_offset_bit_size(addr, addr_format))); +} + +static nir_ssa_def * +build_addr_for_var(nir_builder *b, nir_variable *var, + nir_address_format addr_format) +{ + assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared | + nir_var_shader_temp | nir_var_function_temp | + nir_var_mem_constant)); + + const unsigned num_comps = nir_address_format_num_components(addr_format); + const unsigned bit_size = nir_address_format_bit_size(addr_format); + + switch (addr_format) { + case nir_address_format_32bit_global: + case nir_address_format_64bit_global: { + nir_ssa_def *base_addr; + switch (var->data.mode) { + case nir_var_shader_temp: + base_addr = nir_load_scratch_base_ptr(b, 0, num_comps, bit_size); + break; + + case nir_var_function_temp: + base_addr = nir_load_scratch_base_ptr(b, 1, num_comps, bit_size); + break; + + case nir_var_mem_constant: + base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size); + break; + + default: + unreachable("Unsupported variable mode"); + } + + return build_addr_iadd_imm(b, base_addr, addr_format, + var->data.driver_location); + } + + case nir_address_format_32bit_offset: + assert(var->data.driver_location <= UINT32_MAX); + return nir_imm_int(b, var->data.driver_location); + + case nir_address_format_32bit_offset_as_64bit: + assert(var->data.driver_location <= UINT32_MAX); + return nir_imm_int64(b, var->data.driver_location); + + default: + unreachable("Unsupported address format"); + } } static nir_ssa_def * addr_to_index(nir_builder *b, nir_ssa_def *addr, nir_address_format addr_format) { - assert(addr_format == nir_address_format_32bit_index_offset); - assert(addr->num_components == 2); - return nir_channel(b, addr, 0); + switch (addr_format) { + case nir_address_format_32bit_index_offset: + assert(addr->num_components == 2); + return nir_channel(b, addr, 0); + case nir_address_format_32bit_index_offset_pack64: + return nir_unpack_64_2x32_split_y(b, addr); + case nir_address_format_vec2_index_32bit_offset: + assert(addr->num_components == 3); + return nir_channels(b, addr, 0x3); + default: unreachable("Invalid address format"); + } } static nir_ssa_def * addr_to_offset(nir_builder *b, nir_ssa_def *addr, nir_address_format addr_format) { - assert(addr_format == nir_address_format_32bit_index_offset); - assert(addr->num_components == 2); - return nir_channel(b, addr, 1); + switch (addr_format) { + case nir_address_format_32bit_index_offset: + assert(addr->num_components == 2); + return nir_channel(b, addr, 1); + case nir_address_format_32bit_index_offset_pack64: + return nir_unpack_64_2x32_split_x(b, addr); + case nir_address_format_vec2_index_32bit_offset: + assert(addr->num_components == 3); + return nir_channel(b, addr, 2); + case nir_address_format_32bit_offset: + return addr; + case nir_address_format_32bit_offset_as_64bit: + return nir_u2u32(b, addr); + default: + unreachable("Invalid address format"); + } } /** Returns true if the given address format resolves to a global address */ @@ -825,6 +899,13 @@ addr_format_is_global(nir_address_format addr_format) addr_format == nir_address_format_64bit_bounded_global; } +static bool +addr_format_is_offset(nir_address_format addr_format) +{ + return addr_format == nir_address_format_32bit_offset || + addr_format == nir_address_format_32bit_offset_as_64bit; +} + static nir_ssa_def * addr_to_global(nir_builder *b, nir_ssa_def *addr, nir_address_format addr_format) @@ -841,7 +922,10 @@ addr_to_global(nir_builder *b, nir_ssa_def *addr, nir_u2u64(b, nir_channel(b, addr, 3))); case nir_address_format_32bit_index_offset: + case nir_address_format_32bit_index_offset_pack64: + case nir_address_format_vec2_index_32bit_offset: case nir_address_format_32bit_offset: + case nir_address_format_32bit_offset_as_64bit: case nir_address_format_logical: unreachable("Cannot get a 64-bit address with this address format"); } @@ -887,14 +971,32 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, assert(addr_format_is_global(addr_format)); op = nir_intrinsic_load_global; break; - case nir_var_shader_in: - assert(addr_format_is_global(addr_format)); + case nir_var_uniform: + assert(addr_format_is_offset(addr_format)); + assert(b->shader->info.stage == MESA_SHADER_KERNEL); op = nir_intrinsic_load_kernel_input; break; case nir_var_mem_shared: - assert(addr_format == nir_address_format_32bit_offset); + assert(addr_format_is_offset(addr_format)); op = nir_intrinsic_load_shared; break; + case nir_var_shader_temp: + case nir_var_function_temp: + if (addr_format_is_offset(addr_format)) { + op = nir_intrinsic_load_scratch; + } else { + assert(addr_format_is_global(addr_format)); + op = nir_intrinsic_load_global; + } + break; + case nir_var_mem_constant: + if (addr_format_is_offset(addr_format)) { + op = nir_intrinsic_load_constant; + } else { + assert(addr_format_is_global(addr_format)); + op = nir_intrinsic_load_global_constant; + } + break; default: unreachable("Unsupported explicit IO variable mode"); } @@ -903,17 +1005,22 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, if (addr_format_is_global(addr_format)) { load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); - } else if (addr_format == nir_address_format_32bit_offset) { + } else if (addr_format_is_offset(addr_format)) { assert(addr->num_components == 1); - load->src[0] = nir_src_for_ssa(addr); + load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); } else { load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); } - if (mode != nir_var_shader_in && mode != nir_var_mem_shared) + if (nir_intrinsic_has_access(load)) nir_intrinsic_set_access(load, nir_intrinsic_access(intrin)); + if (op == nir_intrinsic_load_constant) { + nir_intrinsic_set_base(load, 0); + nir_intrinsic_set_range(load, b->shader->constant_data_size); + } + unsigned bit_size = intrin->dest.ssa.bit_size; if (bit_size == 1) { /* TODO: Make the native bool bit_size an option. */ @@ -958,7 +1065,9 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, * standard encoding for booleans rather than forcing a 0/1 boolean. * This should save an instruction or two. */ - if (mode == nir_var_mem_shared) + if (mode == nir_var_mem_shared || + mode == nir_var_shader_temp || + mode == nir_var_function_temp) result = nir_b2b1(b, result); else result = nir_i2b(b, result); @@ -987,9 +1096,18 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin, op = nir_intrinsic_store_global; break; case nir_var_mem_shared: - assert(addr_format == nir_address_format_32bit_offset); + assert(addr_format_is_offset(addr_format)); op = nir_intrinsic_store_shared; break; + case nir_var_shader_temp: + case nir_var_function_temp: + if (addr_format_is_offset(addr_format)) { + op = nir_intrinsic_store_scratch; + } else { + assert(addr_format_is_global(addr_format)); + op = nir_intrinsic_store_global; + } + break; default: unreachable("Unsupported explicit IO variable mode"); } @@ -1003,7 +1121,9 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin, * * TODO: Make the native bool bit_size an option. */ - if (mode == nir_var_mem_shared) + if (mode == nir_var_mem_shared || + mode == nir_var_shader_temp || + mode == nir_var_function_temp) value = nir_b2b32(b, value); else value = nir_b2i(b, value, 32); @@ -1012,9 +1132,9 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin, store->src[0] = nir_src_for_ssa(value); if (addr_format_is_global(addr_format)) { store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); - } else if (addr_format == nir_address_format_32bit_offset) { + } else if (addr_format_is_offset(addr_format)) { assert(addr->num_components == 1); - store->src[1] = nir_src_for_ssa(addr); + store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); } else { store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); @@ -1022,7 +1142,7 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin, nir_intrinsic_set_write_mask(store, write_mask); - if (mode != nir_var_mem_shared) + if (nir_intrinsic_has_access(store)) nir_intrinsic_set_access(store, nir_intrinsic_access(intrin)); /* TODO: We should try and provide a better alignment. For OpenCL, we need @@ -1069,7 +1189,7 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin, op = global_atomic_for_deref(intrin->intrinsic); break; case nir_var_mem_shared: - assert(addr_format == nir_address_format_32bit_offset); + assert(addr_format_is_offset(addr_format)); op = shared_atomic_for_deref(intrin->intrinsic); break; default: @@ -1081,9 +1201,9 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin, unsigned src = 0; if (addr_format_is_global(addr_format)) { atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); - } else if (addr_format == nir_address_format_32bit_offset) { + } else if (addr_format_is_offset(addr_format)) { assert(addr->num_components == 1); - atomic->src[src++] = nir_src_for_ssa(addr); + atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); } else { atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); @@ -1095,7 +1215,7 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin, /* Global atomics don't have access flags because they assume that the * address may be non-uniform. */ - if (!addr_format_is_global(addr_format) && mode != nir_var_mem_shared) + if (nir_intrinsic_has_access(atomic)) nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin)); assert(intrin->dest.ssa.num_components == 1); @@ -1127,9 +1247,7 @@ nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref, assert(deref->dest.is_ssa); switch (deref->deref_type) { case nir_deref_type_var: - assert(deref->mode & (nir_var_shader_in | nir_var_mem_shared)); - return nir_imm_intN_t(b, deref->var->data.driver_location, - deref->dest.ssa.bit_size); + return build_addr_for_var(b, deref->var, addr_format); case nir_deref_type_array: { nir_deref_instr *parent = nir_deref_instr_parent(deref); @@ -1143,14 +1261,14 @@ nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref, assert(stride > 0); nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); - index = nir_i2i(b, index, base_addr->bit_size); + index = nir_i2i(b, index, addr_get_offset_bit_size(base_addr, addr_format)); return build_addr_iadd(b, base_addr, addr_format, nir_amul_imm(b, index, stride)); } case nir_deref_type_ptr_as_array: { nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); - index = nir_i2i(b, index, base_addr->bit_size); + index = nir_i2i(b, index, addr_get_offset_bit_size(base_addr, addr_format)); unsigned stride = nir_deref_instr_ptr_as_array_stride(deref); return build_addr_iadd(b, base_addr, addr_format, nir_amul_imm(b, index, stride)); @@ -1284,7 +1402,6 @@ lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin, unsigned stride = glsl_get_explicit_stride(deref->type); assert(stride > 0); - assert(addr_format == nir_address_format_32bit_index_offset); nir_ssa_def *addr = &deref->dest.ssa; nir_ssa_def *index = addr_to_index(b, addr, addr_format); nir_ssa_def *offset = addr_to_offset(b, addr, addr_format); @@ -1385,6 +1502,29 @@ nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes, return progress; } +/** Lower explicitly laid out I/O access to byte offset/address intrinsics + * + * This pass is intended to be used for any I/O which touches memory external + * to the shader or which is directly visible to the client. It requires that + * all data types in the given modes have a explicit stride/offset decorations + * to tell it exactly how to calculate the offset/address for the given load, + * store, or atomic operation. If the offset/stride information does not come + * from the client explicitly (as with shared variables in GL or Vulkan), + * nir_lower_vars_to_explicit_types() can be used to add them. + * + * Unlike nir_lower_io, this pass is fully capable of handling incomplete + * pointer chains which may contain cast derefs. It does so by walking the + * deref chain backwards and simply replacing each deref, one at a time, with + * the appropriate address calculation. The pass takes a nir_address_format + * parameter which describes how the offset or address is to be represented + * during calculations. By ensuring that the address is always in a + * consistent format, pointers can safely be conjured from thin air by the + * driver, stored to variables, passed through phis, etc. + * + * The one exception to the simple algorithm described above is for handling + * row-major matrices in which case we may look down one additional level of + * the deref chain. + */ bool nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes, nir_address_format addr_format) @@ -1450,24 +1590,51 @@ lower_vars_to_explicit(nir_shader *shader, glsl_type_size_align_func type_info) { bool progress = false; - unsigned offset = 0; - nir_foreach_variable(var, vars) { + unsigned offset; + switch (mode) { + case nir_var_function_temp: + case nir_var_shader_temp: + offset = shader->scratch_size; + break; + case nir_var_mem_shared: + offset = 0; + break; + case nir_var_mem_constant: + offset = shader->constant_data_size; + break; + default: + unreachable("Unsupported mode"); + } + nir_foreach_variable_in_list(var, vars) { + if (var->data.mode != mode) + continue; + unsigned size, align; const struct glsl_type *explicit_type = glsl_get_explicit_type_for_size_align(var->type, type_info, &size, &align); - if (explicit_type != var->type) { - progress = true; + if (explicit_type != var->type) var->type = explicit_type; - } var->data.driver_location = ALIGN_POT(offset, align); offset = var->data.driver_location + size; + progress = true; } - if (mode == nir_var_mem_shared) { + switch (mode) { + case nir_var_shader_temp: + case nir_var_function_temp: + shader->scratch_size = offset; + break; + case nir_var_mem_shared: shader->info.cs.shared_size = offset; - shader->num_shared = offset; + shader->shared_size = offset; + break; + case nir_var_mem_constant: + shader->constant_data_size = offset; + break; + default: + unreachable("Unsupported mode"); } return progress; @@ -1490,9 +1657,9 @@ nir_lower_vars_to_explicit_types(nir_shader *shader, bool progress = false; if (modes & nir_var_mem_shared) - progress |= lower_vars_to_explicit(shader, &shader->shared, nir_var_mem_shared, type_info); + progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info); if (modes & nir_var_shader_temp) - progress |= lower_vars_to_explicit(shader, &shader->globals, nir_var_shader_temp, type_info); + progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info); nir_foreach_function(function, shader) { if (function->impl) { @@ -1506,6 +1673,83 @@ nir_lower_vars_to_explicit_types(nir_shader *shader, return progress; } +static void +write_constant(void *dst, const nir_constant *c, const struct glsl_type *type) +{ + if (glsl_type_is_vector_or_scalar(type)) { + const unsigned num_components = glsl_get_vector_elements(type); + const unsigned bit_size = glsl_get_bit_size(type); + if (bit_size == 1) { + /* Booleans are special-cased to be 32-bit + * + * TODO: Make the native bool bit_size an option. + */ + for (unsigned i = 0; i < num_components; i++) { + int32_t b32 = -(int)c->values[i].b; + memcpy((char *)dst + i * 4, &b32, 4); + } + } else { + assert(bit_size >= 8 && bit_size % 8 == 0); + const unsigned byte_size = bit_size / 8; + for (unsigned i = 0; i < num_components; i++) { + /* Annoyingly, thanks to packed structs, we can't make any + * assumptions about the alignment of dst. To avoid any strange + * issues with unaligned writes, we always use memcpy. + */ + memcpy((char *)dst + i * byte_size, &c->values[i], byte_size); + } + } + } else if (glsl_type_is_array_or_matrix(type)) { + const unsigned array_len = glsl_get_length(type); + const unsigned stride = glsl_get_explicit_stride(type); + assert(stride > 0); + const struct glsl_type *elem_type = glsl_get_array_element(type); + for (unsigned i = 0; i < array_len; i++) + write_constant((char *)dst + i * stride, c->elements[i], elem_type); + } else { + assert(glsl_type_is_struct_or_ifc(type)); + const unsigned num_fields = glsl_get_length(type); + for (unsigned i = 0; i < num_fields; i++) { + const int field_offset = glsl_get_struct_field_offset(type, i); + assert(field_offset >= 0); + const struct glsl_type *field_type = glsl_get_struct_field(type, i); + write_constant((char *)dst + field_offset, c->elements[i], field_type); + } + } +} + +bool +nir_lower_mem_constant_vars(nir_shader *shader, + glsl_type_size_align_func type_info) +{ + unsigned old_constant_data_size = shader->constant_data_size; + if (!lower_vars_to_explicit(shader, &shader->variables, + nir_var_mem_constant, type_info)) { + nir_shader_preserve_all_metadata(shader); + return false; + } + + shader->constant_data = rerzalloc_size(shader, shader->constant_data, + old_constant_data_size, + shader->constant_data_size); + + nir_foreach_variable_with_modes(var, shader, nir_var_mem_constant) { + write_constant((char *)shader->constant_data + var->data.driver_location, + var->constant_initializer, var->type); + } + + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + + nir_lower_vars_to_explicit_types_impl(function->impl, + nir_var_mem_constant, + type_info); + } + + return true; +} + /** * Return the offset source for a load/store intrinsic. */ @@ -1518,11 +1762,41 @@ nir_get_io_offset_src(nir_intrinsic_instr *instr) case nir_intrinsic_load_shared: case nir_intrinsic_load_uniform: case nir_intrinsic_load_global: + case nir_intrinsic_load_global_constant: case nir_intrinsic_load_scratch: case nir_intrinsic_load_fs_input_interp_deltas: + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_comp_swap: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_fadd: + case nir_intrinsic_shared_atomic_fcomp_swap: + case nir_intrinsic_shared_atomic_fmax: + case nir_intrinsic_shared_atomic_fmin: + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_global_atomic_add: + case nir_intrinsic_global_atomic_and: + case nir_intrinsic_global_atomic_comp_swap: + case nir_intrinsic_global_atomic_exchange: + case nir_intrinsic_global_atomic_fadd: + case nir_intrinsic_global_atomic_fcomp_swap: + case nir_intrinsic_global_atomic_fmax: + case nir_intrinsic_global_atomic_fmin: + case nir_intrinsic_global_atomic_imax: + case nir_intrinsic_global_atomic_imin: + case nir_intrinsic_global_atomic_or: + case nir_intrinsic_global_atomic_umax: + case nir_intrinsic_global_atomic_umin: + case nir_intrinsic_global_atomic_xor: return &instr->src[0]; case nir_intrinsic_load_ubo: case nir_intrinsic_load_ssbo: + case nir_intrinsic_load_input_vertex: case nir_intrinsic_load_per_vertex_input: case nir_intrinsic_load_per_vertex_output: case nir_intrinsic_load_interpolated_input: @@ -1582,7 +1856,10 @@ nir_address_format_null_value(nir_address_format addr_format) [nir_address_format_64bit_global] = {{0}}, [nir_address_format_64bit_bounded_global] = {{0}}, [nir_address_format_32bit_index_offset] = {{.u32 = ~0}, {.u32 = ~0}}, + [nir_address_format_32bit_index_offset_pack64] = {{.u64 = ~0ull}}, + [nir_address_format_vec2_index_32bit_offset] = {{.u32 = ~0}, {.u32 = ~0}, {.u32 = ~0}}, [nir_address_format_32bit_offset] = {{.u32 = ~0}}, + [nir_address_format_32bit_offset_as_64bit] = {{.u64 = ~0ull}}, [nir_address_format_logical] = {{.u32 = ~0}}, }; @@ -1599,9 +1876,18 @@ nir_build_addr_ieq(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, case nir_address_format_64bit_global: case nir_address_format_64bit_bounded_global: case nir_address_format_32bit_index_offset: + case nir_address_format_vec2_index_32bit_offset: case nir_address_format_32bit_offset: return nir_ball_iequal(b, addr0, addr1); + case nir_address_format_32bit_offset_as_64bit: + assert(addr0->num_components == 1 && addr1->num_components == 1); + return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)); + + case nir_address_format_32bit_index_offset_pack64: + assert(addr0->num_components == 1 && addr1->num_components == 1); + return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1)); + case nir_address_format_logical: unreachable("Unsupported address format"); } @@ -1617,10 +1903,16 @@ nir_build_addr_isub(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, case nir_address_format_32bit_global: case nir_address_format_64bit_global: case nir_address_format_32bit_offset: + case nir_address_format_32bit_index_offset_pack64: assert(addr0->num_components == 1); assert(addr1->num_components == 1); return nir_isub(b, addr0, addr1); + case nir_address_format_32bit_offset_as_64bit: + assert(addr0->num_components == 1); + assert(addr1->num_components == 1); + return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1))); + case nir_address_format_64bit_bounded_global: return nir_isub(b, addr_to_global(b, addr0, addr_format), addr_to_global(b, addr1, addr_format)); @@ -1631,6 +1923,12 @@ nir_build_addr_isub(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, /* Assume the same buffer index. */ return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1)); + case nir_address_format_vec2_index_32bit_offset: + assert(addr0->num_components == 3); + assert(addr1->num_components == 3); + /* Assume the same buffer index. */ + return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2)); + case nir_address_format_logical: unreachable("Unsupported address format"); } @@ -1656,6 +1954,17 @@ is_output(nir_intrinsic_instr *intrin) intrin->intrinsic == nir_intrinsic_store_per_vertex_output; } +static bool is_dual_slot(nir_intrinsic_instr *intrin) +{ + if (intrin->intrinsic == nir_intrinsic_store_output || + intrin->intrinsic == nir_intrinsic_store_per_vertex_output) { + return nir_src_bit_size(intrin->src[0]) == 64 && + nir_src_num_components(intrin->src[0]) >= 3; + } + + return nir_dest_bit_size(intrin->dest) && + nir_dest_num_components(intrin->dest) >= 3; +} /** * This pass adds constant offsets to instr->const_index[0] for input/output @@ -1682,7 +1991,16 @@ add_const_offset_to_base_block(nir_block *block, nir_builder *b, nir_src *offset = nir_get_io_offset_src(intrin); if (nir_src_is_const(*offset)) { - intrin->const_index[0] += nir_src_as_uint(*offset); + unsigned off = nir_src_as_uint(*offset); + + nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off); + + nir_io_semantics sem = nir_intrinsic_io_semantics(intrin); + sem.location += off; + /* non-indirect indexing should reduce num_slots */ + sem.num_slots = is_dual_slot(intrin) ? 2 : 1; + nir_intrinsic_set_io_semantics(intrin, sem); + b->cursor = nir_before_instr(&intrin->instr); nir_instr_rewrite_src(&intrin->instr, offset, nir_src_for_ssa(nir_imm_int(b, 0)));