From e4baff90812d799d586296fcad992ddcc553c359 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 5 Feb 2020 14:54:42 -0800 Subject: [PATCH] freedreno: Switch to using lowered image intrinsics. This cuts out a bunch of deref chain walking that the compiler can do for us. Reviewed-by: Kenneth Graunke Tested-by: Marge Bot Part-of: --- src/freedreno/ir3/ir3_a4xx.c | 52 +++++++-------- src/freedreno/ir3/ir3_a6xx.c | 39 ++++++----- src/freedreno/ir3/ir3_compiler_nir.c | 42 ++++++------ src/freedreno/ir3/ir3_image.c | 65 ++++--------------- src/freedreno/ir3/ir3_image.h | 5 +- src/freedreno/ir3/ir3_nir.c | 26 ++++---- src/freedreno/vulkan/tu_shader.c | 19 ++++-- .../drivers/freedreno/freedreno_screen.c | 3 + 8 files changed, 108 insertions(+), 143 deletions(-) diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c index 91385344eff..444b7c592f2 100644 --- a/src/freedreno/ir3/ir3_a4xx.c +++ b/src/freedreno/ir3/ir3_a4xx.c @@ -207,22 +207,22 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) } static struct ir3_instruction * -get_image_offset(struct ir3_context *ctx, const nir_variable *var, +get_image_offset(struct ir3_context *ctx, const nir_intrinsic_instr *instr, struct ir3_instruction * const *coords, bool byteoff) { struct ir3_block *b = ctx->block; struct ir3_instruction *offset; - unsigned ncoords = ir3_get_image_coords(var, NULL); + unsigned index = nir_src_as_uint(instr->src[0]); + unsigned ncoords = ir3_get_image_coords(instr, NULL); /* to calculate the byte offset (yes, uggg) we need (up to) three * const values to know the bytes per pixel, and y and z stride: */ struct ir3_const_state *const_state = &ctx->so->shader->const_state; unsigned cb = regid(const_state->offsets.image_dims, 0) + - const_state->image_dims.off[var->data.driver_location]; + const_state->image_dims.off[index]; - debug_assert(const_state->image_dims.mask & - (1 << var->data.driver_location)); + debug_assert(const_state->image_dims.mask & (1 << index)); /* offset = coords.x * bytes_per_pixel: */ offset = ir3_MUL_S24(b, coords[0], 0, create_uniform(b, cb + 0), 0); @@ -251,26 +251,25 @@ get_image_offset(struct ir3_context *ctx, const nir_variable *var, }, 2); } -/* src[] = { deref, coord, sample_index, value }. const_index[] = {} */ +/* src[] = { index, coord, sample_index, value }. const_index[] = {} */ static void emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) { struct ir3_block *b = ctx->block; - const nir_variable *var = nir_intrinsic_get_var(intr, 0); struct ir3_instruction *stib, *offset; struct ir3_instruction * const *value = ir3_get_src(ctx, &intr->src[3]); struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); - unsigned ncoords = ir3_get_image_coords(var, NULL); - unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); + unsigned ncoords = ir3_get_image_coords(intr, NULL); + unsigned slot = nir_src_as_uint(intr->src[0]); unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot); - unsigned ncomp = ir3_get_num_components_for_image_format(var->data.image.format); + unsigned ncomp = ir3_get_num_components_for_image_format(nir_intrinsic_format(intr)); /* src0 is value * src1 is coords * src2 is 64b byte offset */ - offset = get_image_offset(ctx, var, coords, true); + offset = get_image_offset(ctx, intr, coords, true); /* NOTE: stib seems to take byte offset, but stgb.typed can be used * too and takes a dword offset.. not quite sure yet why blob uses @@ -283,7 +282,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) offset, 0); stib->cat6.iim_val = ncomp; stib->cat6.d = ncoords; - stib->cat6.type = ir3_get_image_type(var); + stib->cat6.type = ir3_get_type_for_image_intrinsic(intr); stib->cat6.typed = true; stib->barrier_class = IR3_BARRIER_IMAGE_W; stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W; @@ -296,11 +295,10 @@ static struct ir3_instruction * emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) { struct ir3_block *b = ctx->block; - const nir_variable *var = nir_intrinsic_get_var(intr, 0); struct ir3_instruction *atomic, *image, *src0, *src1, *src2; struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); - unsigned ncoords = ir3_get_image_coords(var, NULL); - unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); + unsigned ncoords = ir3_get_image_coords(intr, NULL); + unsigned slot = nir_src_as_uint(intr->src[0]); unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot); image = create_immed(b, ibo_idx); @@ -311,33 +309,33 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) */ src0 = ir3_get_src(ctx, &intr->src[3])[0]; src1 = ir3_create_collect(ctx, coords, ncoords); - src2 = get_image_offset(ctx, var, coords, false); + src2 = get_image_offset(ctx, intr, coords, false); switch (intr->intrinsic) { - case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_atomic_add: atomic = ir3_ATOMIC_ADD_G(b, image, 0, src0, 0, src1, 0, src2, 0); break; - case nir_intrinsic_image_deref_atomic_imin: - case nir_intrinsic_image_deref_atomic_umin: + case nir_intrinsic_image_atomic_imin: + case nir_intrinsic_image_atomic_umin: atomic = ir3_ATOMIC_MIN_G(b, image, 0, src0, 0, src1, 0, src2, 0); break; - case nir_intrinsic_image_deref_atomic_imax: - case nir_intrinsic_image_deref_atomic_umax: + case nir_intrinsic_image_atomic_imax: + case nir_intrinsic_image_atomic_umax: atomic = ir3_ATOMIC_MAX_G(b, image, 0, src0, 0, src1, 0, src2, 0); break; - case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_atomic_and: atomic = ir3_ATOMIC_AND_G(b, image, 0, src0, 0, src1, 0, src2, 0); break; - case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_atomic_or: atomic = ir3_ATOMIC_OR_G(b, image, 0, src0, 0, src1, 0, src2, 0); break; - case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_atomic_xor: atomic = ir3_ATOMIC_XOR_G(b, image, 0, src0, 0, src1, 0, src2, 0); break; - case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_atomic_exchange: atomic = ir3_ATOMIC_XCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0); break; - case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_atomic_comp_swap: /* for cmpxchg, src0 is [ui]vec2(data, compare): */ src0 = ir3_create_collect(ctx, (struct ir3_instruction*[]){ ir3_get_src(ctx, &intr->src[4])[0], @@ -351,7 +349,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) atomic->cat6.iim_val = 1; atomic->cat6.d = ncoords; - atomic->cat6.type = ir3_get_image_type(var); + atomic->cat6.type = ir3_get_type_for_image_intrinsic(intr); atomic->cat6.typed = true; atomic->barrier_class = IR3_BARRIER_IMAGE_W; atomic->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W; diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c index 8f9e5734d59..e84c4808de1 100644 --- a/src/freedreno/ir3/ir3_a6xx.c +++ b/src/freedreno/ir3/ir3_a6xx.c @@ -208,14 +208,14 @@ static void emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) { struct ir3_block *b = ctx->block; - const nir_variable *var = nir_intrinsic_get_var(intr, 0); struct ir3_instruction *stib; struct ir3_instruction * const *value = ir3_get_src(ctx, &intr->src[3]); struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); - unsigned ncoords = ir3_get_image_coords(var, NULL); - unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); + unsigned ncoords = ir3_get_image_coords(intr, NULL); + unsigned slot = nir_src_as_uint(intr->src[0]); unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot); - unsigned ncomp = ir3_get_num_components_for_image_format(var->data.image.format); + enum pipe_format format = nir_intrinsic_format(intr); + unsigned ncomp = ir3_get_num_components_for_image_format(format); /* src0 is offset, src1 is value: */ @@ -224,7 +224,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) ir3_create_collect(ctx, value, ncomp), 0); stib->cat6.iim_val = ncomp; stib->cat6.d = ncoords; - stib->cat6.type = ir3_get_image_type(var); + stib->cat6.type = ir3_get_type_for_image_intrinsic(intr); stib->cat6.typed = true; stib->barrier_class = IR3_BARRIER_IMAGE_W; stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W; @@ -237,12 +237,11 @@ static struct ir3_instruction * emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) { struct ir3_block *b = ctx->block; - const nir_variable *var = nir_intrinsic_get_var(intr, 0); struct ir3_instruction *atomic, *ibo, *src0, *src1, *dummy; struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); struct ir3_instruction *value = ir3_get_src(ctx, &intr->src[3])[0]; - unsigned ncoords = ir3_get_image_coords(var, NULL); - unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); + unsigned ncoords = ir3_get_image_coords(intr, NULL); + unsigned slot = nir_src_as_uint(intr->src[0]); unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot); ibo = create_immed(b, ibo_idx); @@ -262,7 +261,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) dummy = create_immed(b, 0); src0 = ir3_create_collect(ctx, coords, ncoords); - if (intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) { + if (intr->intrinsic == nir_intrinsic_image_atomic_comp_swap) { struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[4])[0]; src1 = ir3_create_collect(ctx, (struct ir3_instruction*[]){ dummy, compare, value @@ -274,30 +273,30 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) } switch (intr->intrinsic) { - case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_atomic_add: atomic = ir3_ATOMIC_ADD_G(b, ibo, 0, src0, 0, src1, 0); break; - case nir_intrinsic_image_deref_atomic_imin: - case nir_intrinsic_image_deref_atomic_umin: + case nir_intrinsic_image_atomic_imin: + case nir_intrinsic_image_atomic_umin: atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0); break; - case nir_intrinsic_image_deref_atomic_imax: - case nir_intrinsic_image_deref_atomic_umax: + case nir_intrinsic_image_atomic_imax: + case nir_intrinsic_image_atomic_umax: atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0); break; - case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_atomic_and: atomic = ir3_ATOMIC_AND_G(b, ibo, 0, src0, 0, src1, 0); break; - case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_atomic_or: atomic = ir3_ATOMIC_OR_G(b, ibo, 0, src0, 0, src1, 0); break; - case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_atomic_xor: atomic = ir3_ATOMIC_XOR_G(b, ibo, 0, src0, 0, src1, 0); break; - case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_atomic_exchange: atomic = ir3_ATOMIC_XCHG_G(b, ibo, 0, src0, 0, src1, 0); break; - case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_atomic_comp_swap: atomic = ir3_ATOMIC_CMPXCHG_G(b, ibo, 0, src0, 0, src1, 0); break; default: @@ -306,7 +305,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) atomic->cat6.iim_val = 1; atomic->cat6.d = ncoords; - atomic->cat6.type = ir3_get_image_type(var); + atomic->cat6.type = ir3_get_type_for_image_intrinsic(intr); atomic->cat6.typed = true; atomic->barrier_class = IR3_BARRIER_IMAGE_W; atomic->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W; diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index c6cddc0b9ca..a8b691a15d7 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -1040,7 +1040,7 @@ emit_intrinsic_atomic_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr) static struct ir3_instruction * get_image_samp_tex_src(struct ir3_context *ctx, nir_intrinsic_instr *intr) { - unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); + unsigned slot = nir_src_as_uint(intr->src[0]); unsigned tex_idx = ir3_image_to_tex(&ctx->so->image_mapping, slot); struct ir3_instruction *texture, *sampler; @@ -1059,13 +1059,12 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr, struct ir3_instruction **dst) { struct ir3_block *b = ctx->block; - const nir_variable *var = nir_intrinsic_get_var(intr, 0); struct ir3_instruction *samp_tex = get_image_samp_tex_src(ctx, intr); struct ir3_instruction *sam; struct ir3_instruction * const *src0 = ir3_get_src(ctx, &intr->src[1]); struct ir3_instruction *coords[4]; - unsigned flags, ncoords = ir3_get_image_coords(var, &flags); - type_t type = ir3_get_image_type(var); + unsigned flags, ncoords = ir3_get_image_coords(intr, &flags); + type_t type = ir3_get_type_for_image_intrinsic(intr); /* hmm, this seems a bit odd, but it is what blob does and (at least * a5xx) just faults on bogus addresses otherwise: @@ -1095,10 +1094,9 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, struct ir3_instruction **dst) { struct ir3_block *b = ctx->block; - const nir_variable *var = nir_intrinsic_get_var(intr, 0); struct ir3_instruction *samp_tex = get_image_samp_tex_src(ctx, intr); struct ir3_instruction *sam, *lod; - unsigned flags, ncoords = ir3_get_image_coords(var, &flags); + unsigned flags, ncoords = ir3_get_image_coords(intr, &flags); type_t dst_type = nir_dest_bit_size(intr->dest) < 32 ? TYPE_U16 : TYPE_U32; @@ -1126,9 +1124,7 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, * * TODO: This is at least true on a5xx. Check other gens. */ - enum glsl_sampler_dim dim = - glsl_get_sampler_dim(glsl_without_array(var->type)); - if (dim == GLSL_SAMPLER_DIM_BUF) { + if (nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_BUF) { /* Since all the possible values the divisor can take are * power-of-two (4, 8, or 16), the division is implemented * as a shift-right. @@ -1138,7 +1134,7 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, */ struct ir3_const_state *const_state = &ctx->so->shader->const_state; unsigned cb = regid(const_state->offsets.image_dims, 0) + - const_state->image_dims.off[var->data.driver_location]; + const_state->image_dims.off[nir_src_as_uint(intr->src[0])]; struct ir3_instruction *aux = create_uniform(b, cb + 1); tmp[0] = ir3_SHR_B(b, tmp[0], 0, aux, 0); @@ -1621,28 +1617,28 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) case nir_intrinsic_shared_atomic_comp_swap: dst[0] = emit_intrinsic_atomic_shared(ctx, intr); break; - case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_load: emit_intrinsic_load_image(ctx, intr, dst); break; - case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_store: if ((ctx->so->type == MESA_SHADER_FRAGMENT) && !ctx->s->info.fs.early_fragment_tests) ctx->so->no_earlyz = true; ctx->funcs->emit_intrinsic_store_image(ctx, intr); break; - case nir_intrinsic_image_deref_size: + case nir_intrinsic_image_size: emit_intrinsic_image_size(ctx, intr, dst); break; - case nir_intrinsic_image_deref_atomic_add: - case nir_intrinsic_image_deref_atomic_imin: - case nir_intrinsic_image_deref_atomic_umin: - case nir_intrinsic_image_deref_atomic_imax: - case nir_intrinsic_image_deref_atomic_umax: - case nir_intrinsic_image_deref_atomic_and: - case nir_intrinsic_image_deref_atomic_or: - case nir_intrinsic_image_deref_atomic_xor: - case nir_intrinsic_image_deref_atomic_exchange: - case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_imin: + case nir_intrinsic_image_atomic_umin: + case nir_intrinsic_image_atomic_imax: + case nir_intrinsic_image_atomic_umax: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: if ((ctx->so->type == MESA_SHADER_FRAGMENT) && !ctx->s->info.fs.early_fragment_tests) ctx->so->no_earlyz = true; diff --git a/src/freedreno/ir3/ir3_image.c b/src/freedreno/ir3/ir3_image.c index 3fc0223c0ca..09ee833de11 100644 --- a/src/freedreno/ir3/ir3_image.c +++ b/src/freedreno/ir3/ir3_image.c @@ -73,49 +73,19 @@ ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image) return mapping->image_to_tex[image] + mapping->tex_base; } -/* Helper to parse the deref for an image to get image slot. This should be - * mapped to tex or ibo idx using ir3_image_to_tex() or ir3_image_to_ibo(). - */ -unsigned -ir3_get_image_slot(nir_deref_instr *deref) -{ - unsigned int loc = 0; - unsigned inner_size = 1; - - while (deref->deref_type != nir_deref_type_var) { - assert(deref->deref_type == nir_deref_type_array); - unsigned const_index = nir_src_as_uint(deref->arr.index); - - /* Go to the next instruction */ - deref = nir_deref_instr_parent(deref); - - assert(glsl_type_is_array(deref->type)); - const unsigned array_len = glsl_get_length(deref->type); - loc += MIN2(const_index, array_len - 1) * inner_size; - - /* Update the inner size */ - inner_size *= array_len; - } - - loc += deref->var->data.driver_location; - - return loc; -} - /* see tex_info() for equiv logic for texture instructions.. it would be * nice if this could be better unified.. */ unsigned -ir3_get_image_coords(const nir_variable *var, unsigned *flagsp) +ir3_get_image_coords(const nir_intrinsic_instr *instr, unsigned *flagsp) { - const struct glsl_type *type = glsl_without_array(var->type); - unsigned coords = glsl_get_sampler_coordinate_components(type); + unsigned coords = nir_image_intrinsic_coord_components(instr); unsigned flags = 0; if (coords == 3) flags |= IR3_INSTR_3D; - if (glsl_sampler_type_is_array(type)) + if (nir_intrinsic_image_array(instr)) flags |= IR3_INSTR_A; if (flagsp) @@ -125,25 +95,18 @@ ir3_get_image_coords(const nir_variable *var, unsigned *flagsp) } type_t -ir3_get_image_type(const nir_variable *var) +ir3_get_type_for_image_intrinsic(const nir_intrinsic_instr *instr) { - switch (glsl_get_sampler_result_type(glsl_without_array(var->type))) { - case GLSL_TYPE_UINT: - return TYPE_U32; - case GLSL_TYPE_INT: - return TYPE_S32; - case GLSL_TYPE_FLOAT: - return TYPE_F32; - case GLSL_TYPE_UINT16: - return TYPE_U16; - case GLSL_TYPE_INT16: - return TYPE_S16; - case GLSL_TYPE_FLOAT16: - return TYPE_F16; - default: - unreachable("bad sampler type."); - return 0; - } + const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; + int bit_size = info->has_dest ? nir_dest_bit_size(instr->dest) : 32; + enum pipe_format format = nir_intrinsic_format(instr); + + if (util_format_is_pure_uint(format)) + return bit_size == 16 ? TYPE_U16 : TYPE_U32; + else if (util_format_is_pure_sint(format)) + return bit_size == 16 ? TYPE_S16 : TYPE_S32; + else + return bit_size == 16 ? TYPE_F16 : TYPE_F32; } /* Returns the number of components for the different image formats diff --git a/src/freedreno/ir3/ir3_image.h b/src/freedreno/ir3/ir3_image.h index 5ba1652dab1..ff798694713 100644 --- a/src/freedreno/ir3/ir3_image.h +++ b/src/freedreno/ir3/ir3_image.h @@ -36,9 +36,8 @@ unsigned ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo); unsigned ir3_image_to_ibo(struct ir3_shader *shader, unsigned image); unsigned ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image); -unsigned ir3_get_image_slot(nir_deref_instr *deref); -unsigned ir3_get_image_coords(const nir_variable *var, unsigned *flagsp); -type_t ir3_get_image_type(const nir_variable *var); +unsigned ir3_get_image_coords(const nir_intrinsic_instr *instr, unsigned *flagsp); +type_t ir3_get_type_for_image_intrinsic(const nir_intrinsic_instr *instr); unsigned ir3_get_num_components_for_image_format(enum pipe_format); #endif /* IR3_IMAGE_H_ */ diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 5d4dbb1135d..b6b37d05206 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -382,19 +382,19 @@ ir3_nir_scan_driver_consts(nir_shader *shader, layout->ssbo_size.count; layout->ssbo_size.count += 1; /* one const per */ break; - case nir_intrinsic_image_deref_atomic_add: - case nir_intrinsic_image_deref_atomic_imin: - case nir_intrinsic_image_deref_atomic_umin: - case nir_intrinsic_image_deref_atomic_imax: - case nir_intrinsic_image_deref_atomic_umax: - case nir_intrinsic_image_deref_atomic_and: - case nir_intrinsic_image_deref_atomic_or: - case nir_intrinsic_image_deref_atomic_xor: - case nir_intrinsic_image_deref_atomic_exchange: - case nir_intrinsic_image_deref_atomic_comp_swap: - case nir_intrinsic_image_deref_store: - case nir_intrinsic_image_deref_size: - idx = nir_intrinsic_get_var(intr, 0)->data.driver_location; + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_imin: + case nir_intrinsic_image_atomic_umin: + case nir_intrinsic_image_atomic_imax: + case nir_intrinsic_image_atomic_umax: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_store: + case nir_intrinsic_image_size: + idx = nir_src_as_uint(intr->src[0]); if (layout->image_dims.mask & (1 << idx)) break; layout->image_dims.mask |= (1 << idx); diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index 9df70400449..6f972f96264 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -269,8 +269,9 @@ lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr, } static void -add_image_deref_mapping(nir_intrinsic_instr *instr, struct tu_shader *shader, - const struct tu_pipeline_layout *layout) +lower_image_deref(nir_builder *b, + nir_intrinsic_instr *instr, struct tu_shader *shader, + const struct tu_pipeline_layout *layout) { nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); nir_variable *var = nir_deref_instr_get_variable(deref); @@ -281,9 +282,15 @@ add_image_deref_mapping(nir_intrinsic_instr *instr, struct tu_shader *shader, struct tu_descriptor_set_binding_layout *binding_layout = &set_layout->binding[binding]; - var->data.driver_location = - map_add(&shader->image_map, set, binding, var->data.index, - binding_layout->array_size); + nir_ssa_def *index = nir_imm_int(b, + map_add(&shader->image_map, + set, binding, var->data.index, + binding_layout->array_size)); + if (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + index = nir_iadd(b, index, nir_ssa_for_src(b, deref->arr.index, 1)); + } + nir_rewrite_image_intrinsic(instr, index, false); } static bool @@ -324,7 +331,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, case nir_intrinsic_image_deref_load_param_intel: case nir_intrinsic_image_deref_load_raw_intel: case nir_intrinsic_image_deref_store_raw_intel: - add_image_deref_mapping(instr, shader, layout); + lower_image_deref(b, instr, shader, layout); return true; default: diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index f79d858e11e..bb8711f34d2 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -342,6 +342,9 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 1; return 0; + case PIPE_CAP_NIR_IMAGES_AS_DEREF: + return 0; + case PIPE_CAP_MAX_VIEWPORTS: return 1; -- 2.30.2