From: Eric Anholt Date: Fri, 20 Dec 2019 22:02:55 +0000 (-0800) Subject: freedreno: Stop scattered remapping of SSBOs/images to IBOs. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=fb6fca003757478a06fb1f6781ad769e84b335ff;p=mesa.git freedreno: Stop scattered remapping of SSBOs/images to IBOs. Just make it be all SSBOs then all storage images. The remapping table was there to make it so that the big gap present from gallium's atomic lowering would get cleaned up, but that's no longer case. The table has made it very hard to support Vulkan storage images, so it's time for it to go. This does mean that an SSBO/IBO that is only loaded (or size-queried) will now occupy a slot in the table where it wouldn't before. This seems like a minor cost compared to being able to drop this much logic. With the remapping table gone, SSBO array handling for turnip just falls out. Fixes many array cases of dEQP-VK.binding_model.shader_access.primary_cmd_buf.storage_buffer.* Reviewed-by: Rob Clark Reviewed-by: Jonathan Marek (turnip) Tested-by: Marge Bot Part-of: --- diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c index dd654bacc7c..264b22fd430 100644 --- a/src/freedreno/ir3/ir3_a4xx.c +++ b/src/freedreno/ir3/ir3_a4xx.c @@ -43,7 +43,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr, struct ir3_instruction *ldgb, *src0, *src1, *byte_offset, *offset; /* can this be non-const buffer_index? how do we handle that? */ - int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, nir_src_as_uint(intr->src[0])); + int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, nir_src_as_uint(intr->src[0])); byte_offset = ir3_get_src(ctx, &intr->src[1])[0]; offset = ir3_get_src(ctx, &intr->src[2])[0]; @@ -81,7 +81,7 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) unsigned ncomp = ffs(~wrmask) - 1; /* can this be non-const buffer_index? how do we handle that? */ - int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, nir_src_as_uint(intr->src[1])); + int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, nir_src_as_uint(intr->src[1])); byte_offset = ir3_get_src(ctx, &intr->src[2])[0]; offset = ir3_get_src(ctx, &intr->src[3])[0]; @@ -132,7 +132,7 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) type_t type = TYPE_U32; /* can this be non-const buffer_index? how do we handle that? */ - int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, nir_src_as_uint(intr->src[0])); + int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, nir_src_as_uint(intr->src[0])); ssbo = create_immed(b, ibo_idx); byte_offset = ir3_get_src(ctx, &intr->src[1])[0]; @@ -262,7 +262,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); unsigned ncoords = ir3_get_image_coords(var, NULL); unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); - unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot); + unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot); unsigned ncomp = ir3_get_num_components_for_glformat(var->data.image.format); /* src0 is value @@ -301,7 +301,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); unsigned ncoords = ir3_get_image_coords(var, NULL); unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); - unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot); + unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot); image = create_immed(b, ibo_idx); diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c index fe3355bf2eb..b75489b6b6a 100644 --- a/src/freedreno/ir3/ir3_a6xx.c +++ b/src/freedreno/ir3/ir3_a6xx.c @@ -48,7 +48,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr, struct ir3_instruction *ldib; /* can this be non-const buffer_index? how do we handle that? */ - int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, nir_src_as_uint(intr->src[0])); + int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, nir_src_as_uint(intr->src[0])); offset = ir3_get_src(ctx, &intr->src[2])[0]; @@ -77,7 +77,7 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) unsigned ncomp = ffs(~wrmask) - 1; /* can this be non-const buffer_index? how do we handle that? */ - int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, nir_src_as_uint(intr->src[1])); + int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, nir_src_as_uint(intr->src[1])); /* src0 is offset, src1 is value: */ @@ -119,7 +119,8 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) type_t type = TYPE_U32; /* can this be non-const buffer_index? how do we handle that? */ - int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, nir_src_as_uint(intr->src[0])); + int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, + nir_src_as_uint(intr->src[0])); ibo = create_immed(b, ibo_idx); data = ir3_get_src(ctx, &intr->src[2])[0]; @@ -213,7 +214,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); unsigned ncoords = ir3_get_image_coords(var, NULL); unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); - unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot); + unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot); unsigned ncomp = ir3_get_num_components_for_glformat(var->data.image.format); /* src0 is offset, src1 is value: @@ -242,7 +243,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction *value = ir3_get_src(ctx, &intr->src[3])[0]; unsigned ncoords = ir3_get_image_coords(var, NULL); unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); - unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot); + unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot); ibo = create_immed(b, ibo_idx); @@ -383,7 +384,7 @@ get_atomic_dest_mov(struct ir3_instruction *atomic) void ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so) { - if (so->image_mapping.num_ibo == 0) + if (ir3_shader_nibo(so) == 0) return; foreach_block (block, &ir->block_list) { diff --git a/src/freedreno/ir3/ir3_image.c b/src/freedreno/ir3/ir3_image.c index 60c71901c9c..6dabf6c0376 100644 --- a/src/freedreno/ir3/ir3_image.c +++ b/src/freedreno/ir3/ir3_image.c @@ -35,20 +35,14 @@ void ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures) { memset(mapping, IBO_INVALID, sizeof(*mapping)); - mapping->num_ibo = 0; mapping->num_tex = 0; mapping->tex_base = num_textures; } unsigned -ir3_ssbo_to_ibo(struct ir3_ibo_mapping *mapping, unsigned ssbo) +ir3_ssbo_to_ibo(struct ir3_shader *shader, unsigned ssbo) { - if (mapping->ssbo_to_ibo[ssbo] == IBO_INVALID) { - unsigned ibo = mapping->num_ibo++; - mapping->ssbo_to_ibo[ssbo] = ibo; - mapping->ibo_to_image[ibo] = IBO_SSBO | ssbo; - } - return mapping->ssbo_to_ibo[ssbo]; + return ssbo; } unsigned @@ -63,14 +57,9 @@ ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo) } unsigned -ir3_image_to_ibo(struct ir3_ibo_mapping *mapping, unsigned image) +ir3_image_to_ibo(struct ir3_shader *shader, unsigned image) { - if (mapping->image_to_ibo[image] == IBO_INVALID) { - unsigned ibo = mapping->num_ibo++; - mapping->image_to_ibo[image] = ibo; - mapping->ibo_to_image[ibo] = image; - } - return mapping->image_to_ibo[image]; + return shader->nir->info.num_ssbos + image; } unsigned diff --git a/src/freedreno/ir3/ir3_image.h b/src/freedreno/ir3/ir3_image.h index c89e581eef8..b0e0959b157 100644 --- a/src/freedreno/ir3/ir3_image.h +++ b/src/freedreno/ir3/ir3_image.h @@ -31,9 +31,9 @@ void ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures); -unsigned ir3_ssbo_to_ibo(struct ir3_ibo_mapping *mapping, unsigned ssbo); +unsigned ir3_ssbo_to_ibo(struct ir3_shader *shader, unsigned ssbo); unsigned ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo); -unsigned ir3_image_to_ibo(struct ir3_ibo_mapping *mapping, unsigned image); +unsigned ir3_image_to_ibo(struct ir3_shader *shader, unsigned image); unsigned ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image); unsigned ir3_get_image_slot(nir_deref_instr *deref); diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 528764b0e27..f056a3e5cd6 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -427,12 +427,10 @@ ir3_normalize_key(struct ir3_shader_key *key, gl_shader_stage type) */ struct ir3_ibo_mapping { #define IBO_INVALID 0xff - /* Maps logical SSBO state to hw state: */ - uint8_t ssbo_to_ibo[IR3_MAX_SHADER_BUFFERS]; + /* Maps logical SSBO state to hw tex state: */ uint8_t ssbo_to_tex[IR3_MAX_SHADER_BUFFERS]; - /* Maps logical Image state to hw state: */ - uint8_t image_to_ibo[IR3_MAX_SHADER_IMAGES]; + /* Maps logical Image state to hw tex state: */ uint8_t image_to_tex[IR3_MAX_SHADER_IMAGES]; /* Maps hw state back to logical SSBO or Image state: @@ -441,10 +439,8 @@ struct ir3_ibo_mapping { * hw slot is used for SSBO state vs Image state. */ #define IBO_SSBO 0x80 - uint8_t ibo_to_image[32]; uint8_t tex_to_image[32]; - uint8_t num_ibo; uint8_t num_tex; /* including real textures */ uint8_t tex_base; /* the number of real textures, ie. image/ssbo start here */ }; @@ -795,4 +791,14 @@ ir3_shader_halfregs(const struct ir3_shader_variant *v) return (2 * (v->info.max_reg + 1)) + (v->info.max_half_reg + 1); } +static inline uint32_t +ir3_shader_nibo(const struct ir3_shader_variant *v) +{ + /* The dummy variant used in binning mode won't have an actual shader. */ + if (!v->shader) + return 0; + + return v->shader->nir->info.num_ssbos + v->shader->nir->info.num_images; +} + #endif /* IR3_SHADER_H_ */ diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 516133def56..c922c34a275 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -2956,26 +2956,25 @@ tu6_emit_ibo(struct tu_cmd_buffer *cmd, &pipeline->program.link[type]; VkResult result; - if (link->image_mapping.num_ibo == 0) { + unsigned num_desc = link->ssbo_map.num_desc; + + if (num_desc == 0) { *entry = (struct tu_cs_entry) {}; return VK_SUCCESS; } struct ts_cs_memory ibo_const; - result = tu_cs_alloc(device, draw_state, link->image_mapping.num_ibo, + result = tu_cs_alloc(device, draw_state, num_desc, A6XX_TEX_CONST_DWORDS, &ibo_const); if (result != VK_SUCCESS) return result; - for (unsigned i = 0; i < link->image_mapping.num_ibo; i++) { - unsigned idx = link->image_mapping.ibo_to_image[i]; - uint32_t *dst = &ibo_const.map[A6XX_TEX_CONST_DWORDS * i]; - - if (idx & IBO_SSBO) { - idx &= ~IBO_SSBO; + int ssbo_index = 0; + for (unsigned i = 0; i < link->ssbo_map.num; i++) { + for (int j = 0; j < link->ssbo_map.array_size[i]; j++) { + uint32_t *dst = &ibo_const.map[A6XX_TEX_CONST_DWORDS * ssbo_index]; - uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, idx, - 0 /* XXX */); + uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, i, j); /* We don't expose robustBufferAccess, so leave the size unlimited. */ uint32_t sz = MAX_STORAGE_BUFFER_RANGE / 4; @@ -2990,10 +2989,11 @@ tu6_emit_ibo(struct tu_cmd_buffer *cmd, dst[5] = va >> 32; for (int i = 6; i < A6XX_TEX_CONST_DWORDS; i++) dst[i] = 0; - } else { - tu_finishme("Emit images"); + + ssbo_index++; } } + assert(ssbo_index == num_desc); struct tu_cs cs; result = tu_cs_begin_sub_stream(device, draw_state, 7, &cs); @@ -3027,7 +3027,7 @@ tu6_emit_ibo(struct tu_cmd_buffer *cmd, CP_LOAD_STATE6_0_STATE_TYPE(st) | CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | CP_LOAD_STATE6_0_STATE_BLOCK(sb) | - CP_LOAD_STATE6_0_NUM_UNIT(link->image_mapping.num_ibo)); + CP_LOAD_STATE6_0_NUM_UNIT(num_desc)); tu_cs_emit_qw(&cs, ibo_const.iova); /* SRC_ADDR_LO/HI */ tu_cs_emit_pkt4(&cs, ibo_addr_reg, 2); diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index f170fa1958b..1b43264b5bc 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -358,6 +358,15 @@ tu6_blend_op(VkBlendOp op) } } +static unsigned +tu_shader_nibo(const struct tu_shader *shader) +{ + /* In tu_cmd_buffer.c we emit the SSBO's IBOS, but not yet storage image + * IBOs. + */ + return shader->ssbo_map.num_desc; +} + static void tu6_emit_vs_config(struct tu_cs *cs, struct tu_shader *shader, const struct ir3_shader_variant *vs) @@ -457,7 +466,7 @@ tu6_emit_fs_config(struct tu_cs *cs, struct tu_shader *shader, uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(shader->texture_map.num_desc) | A6XX_SP_FS_CONFIG_NSAMP(shader->sampler_map.num_desc) | - A6XX_SP_FS_CONFIG_NIBO(fs->image_mapping.num_ibo); + A6XX_SP_FS_CONFIG_NIBO(tu_shader_nibo(shader)); if (fs->instrlen) sp_fs_config |= A6XX_SP_FS_CONFIG_ENABLED; @@ -479,7 +488,7 @@ tu6_emit_fs_config(struct tu_cs *cs, struct tu_shader *shader, A6XX_HLSQ_FS_CNTL_ENABLED); tu_cs_emit_pkt4(cs, REG_A6XX_SP_IBO_COUNT, 1); - tu_cs_emit(cs, fs->image_mapping.num_ibo); + tu_cs_emit(cs, tu_shader_nibo(shader)); } static void @@ -496,7 +505,7 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader, tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CONFIG, 2); tu_cs_emit(cs, A6XX_SP_CS_CONFIG_ENABLED | - A6XX_SP_CS_CONFIG_NIBO(v->image_mapping.num_ibo) | + A6XX_SP_CS_CONFIG_NIBO(tu_shader_nibo(shader)) | A6XX_SP_CS_CONFIG_NTEX(shader->texture_map.num_desc) | A6XX_SP_CS_CONFIG_NSAMP(shader->sampler_map.num_desc)); tu_cs_emit(cs, v->instrlen); @@ -525,7 +534,7 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader, tu_cs_emit(cs, 0x2fc); /* HLSQ_CS_UNKNOWN_B998 */ tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_IBO_COUNT, 1); - tu_cs_emit(cs, v->image_mapping.num_ibo); + tu_cs_emit(cs, tu_shader_nibo(shader)); } static void diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index 1fc9da9a779..0b755a99f8e 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -294,6 +294,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, tu_finishme("non-constant vulkan_resource_index array index"); index = map_add(&shader->ssbo_map, set, binding, 0, binding_layout->array_size); + index += const_val->u32; break; default: tu_finishme("unsupported desc_type for vulkan_resource_index"); @@ -345,6 +346,12 @@ tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader, progress |= lower_impl(function->impl, tu_shader, layout); } + /* spirv_to_nir produces num_ssbos equal to the number of SSBO-containing + * variables, while ir3 wants the number of descriptors (like the gallium + * path). + */ + shader->info.num_ssbos = tu_shader->ssbo_map.num_desc; + return progress; } diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c index 486657d92f4..9b337d33de2 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c @@ -400,13 +400,10 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct ir3_shader_variant *v) { unsigned count = util_last_bit(so->enabled_mask); - const struct ir3_ibo_mapping *m = &v->image_mapping; for (unsigned i = 0; i < count; i++) { - unsigned slot = m->ssbo_to_ibo[i]; - OUT_PKT7(ring, CP_LOAD_STATE4, 5); - OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) | + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(i) | CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | CP_LOAD_STATE4_0_STATE_BLOCK(sb) | CP_LOAD_STATE4_0_NUM_UNIT(1)); @@ -424,7 +421,7 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16)); OUT_PKT7(ring, CP_LOAD_STATE4, 5); - OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) | + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(i) | CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | CP_LOAD_STATE4_0_STATE_BLOCK(sb) | CP_LOAD_STATE4_0_NUM_UNIT(1)); diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_image.c b/src/gallium/drivers/freedreno/a5xx/fd5_image.c index e46a21c4523..4da1f16385a 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_image.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_image.c @@ -210,6 +210,6 @@ fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring, translate_image(&img, &so->si[index]); emit_image_tex(ring, m->image_to_tex[index] + m->tex_base, &img, shader); - emit_image_ssbo(ring, m->image_to_ibo[index], &img, shader); + emit_image_ssbo(ring, v->shader->nir->info.num_ssbos + index, &img, shader); } } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c index 2e6a7fd21b0..36ae9f5b86d 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c @@ -86,7 +86,8 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v, OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 2); OUT_RING(ring, A6XX_SP_CS_CONFIG_ENABLED | - A6XX_SP_CS_CONFIG_NIBO(v->image_mapping.num_ibo) | + A6XX_SP_CS_CONFIG_NIBO(v->shader->nir->info.num_ssbos + + v->shader->nir->info.num_images) | A6XX_SP_CS_CONFIG_NTEX(v->num_samp) | A6XX_SP_CS_CONFIG_NSAMP(v->num_samp)); /* SP_VS_CONFIG */ OUT_RING(ring, v->instrlen); /* SP_VS_INSTRLEN */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 7b1b88cee7e..9fb0125409e 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -1142,11 +1142,11 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) emit_border_color(ctx, ring); if (hs) { - debug_assert(hs->image_mapping.num_ibo == 0); - debug_assert(ds->image_mapping.num_ibo == 0); + debug_assert(ir3_shader_nibo(hs) == 0); + debug_assert(ir3_shader_nibo(ds) == 0); } if (gs) { - debug_assert(gs->image_mapping.num_ibo == 0); + debug_assert(ir3_shader_nibo(gs) == 0); } #define DIRTY_IBO (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE | \ @@ -1156,14 +1156,13 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) fd6_build_ibo_state(ctx, fs, PIPE_SHADER_FRAGMENT); struct fd_ringbuffer *obj = fd_submit_new_ringbuffer( ctx->batch->submit, 0x100, FD_RINGBUFFER_STREAMING); - const struct ir3_ibo_mapping *mapping = &fs->image_mapping; OUT_PKT7(obj, CP_LOAD_STATE6, 3); OUT_RING(obj, CP_LOAD_STATE6_0_DST_OFF(0) | CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | CP_LOAD_STATE6_0_STATE_BLOCK(SB6_IBO) | - CP_LOAD_STATE6_0_NUM_UNIT(mapping->num_ibo)); + CP_LOAD_STATE6_0_NUM_UNIT(ir3_shader_nibo(fs))); OUT_RB(obj, state); OUT_PKT4(obj, REG_A6XX_SP_IBO_LO, 2); @@ -1173,7 +1172,7 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) * de-duplicate this from program->config_stateobj */ OUT_PKT4(obj, REG_A6XX_SP_IBO_COUNT, 1); - OUT_RING(obj, mapping->num_ibo); + OUT_RING(obj, ir3_shader_nibo(fs)); ir3_emit_ssbo_sizes(ctx->screen, fs, obj, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]); @@ -1250,21 +1249,20 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE)) { struct fd_ringbuffer *state = fd6_build_ibo_state(ctx, cp, PIPE_SHADER_COMPUTE); - const struct ir3_ibo_mapping *mapping = &cp->image_mapping; OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3); OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | CP_LOAD_STATE6_0_STATE_TYPE(ST6_IBO) | CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) | - CP_LOAD_STATE6_0_NUM_UNIT(mapping->num_ibo)); + CP_LOAD_STATE6_0_NUM_UNIT(ir3_shader_nibo(cp))); OUT_RB(ring, state); OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_LO, 2); OUT_RB(ring, state); OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_COUNT, 1); - OUT_RING(ring, mapping->num_ibo); + OUT_RING(ring, ir3_shader_nibo(cp)); fd_ringbuffer_del(state); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.c b/src/gallium/drivers/freedreno/a6xx/fd6_image.c index 8cc95b7cb5b..7a126ddf7b9 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_image.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.c @@ -232,6 +232,15 @@ fd6_emit_ssbo_tex(struct fd_ringbuffer *ring, const struct pipe_shader_buffer *p static void emit_image_ssbo(struct fd_ringbuffer *ring, struct fd6_image *img) { + /* If the SSBO isn't present (becasue gallium doesn't pack atomic + * counters), zero-fill the slot. + */ + if (!img->prsc) { + for (int i = 0; i < 16; i++) + OUT_RING(ring, 0); + return; + } + struct fd_resource *rsc = fd_resource(img->prsc); enum a6xx_tile_mode tile_mode = fd_resource_tile_mode(img->prsc, img->level); bool ubwc_enabled = fd_resource_ubwc_enabled(rsc, img->level); @@ -280,24 +289,24 @@ fd6_build_ibo_state(struct fd_context *ctx, const struct ir3_shader_variant *v, { struct fd_shaderbuf_stateobj *bufso = &ctx->shaderbuf[shader]; struct fd_shaderimg_stateobj *imgso = &ctx->shaderimg[shader]; - const struct ir3_ibo_mapping *mapping = &v->image_mapping; struct fd_ringbuffer *state = fd_submit_new_ringbuffer(ctx->batch->submit, - mapping->num_ibo * 16 * 4, FD_RINGBUFFER_STREAMING); + (v->shader->nir->info.num_ssbos + + v->shader->nir->info.num_images) * 16 * 4, + FD_RINGBUFFER_STREAMING); assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT); - for (unsigned i = 0; i < mapping->num_ibo; i++) { + for (unsigned i = 0; i < v->shader->nir->info.num_ssbos; i++) { struct fd6_image img; - unsigned idx = mapping->ibo_to_image[i]; - - if (idx & IBO_SSBO) { - translate_buf(&img, &bufso->sb[idx & ~IBO_SSBO]); - } else { - translate_image(&img, &imgso->si[idx]); - } + translate_buf(&img, &bufso->sb[i]); + emit_image_ssbo(state, &img); + } + for (unsigned i = 0; i < v->shader->nir->info.num_images; i++) { + struct fd6_image img; + translate_image(&img, &imgso->si[i]); emit_image_ssbo(state, &img); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index 14b57bfb238..ffd633aa6c0 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -221,39 +221,39 @@ setup_config_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *stat OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 1); OUT_RING(ring, COND(state->vs, A6XX_SP_VS_CONFIG_ENABLED) | - A6XX_SP_VS_CONFIG_NIBO(state->vs->image_mapping.num_ibo) | + A6XX_SP_VS_CONFIG_NIBO(ir3_shader_nibo(state->vs)) | A6XX_SP_VS_CONFIG_NTEX(state->vs->num_samp) | A6XX_SP_VS_CONFIG_NSAMP(state->vs->num_samp)); OUT_PKT4(ring, REG_A6XX_SP_HS_CONFIG, 1); OUT_RING(ring, COND(state->hs, A6XX_SP_HS_CONFIG_ENABLED | - A6XX_SP_HS_CONFIG_NIBO(state->hs->image_mapping.num_ibo) | + A6XX_SP_HS_CONFIG_NIBO(ir3_shader_nibo(state->hs)) | A6XX_SP_HS_CONFIG_NTEX(state->hs->num_samp) | A6XX_SP_HS_CONFIG_NSAMP(state->hs->num_samp))); OUT_PKT4(ring, REG_A6XX_SP_DS_CONFIG, 1); OUT_RING(ring, COND(state->ds, A6XX_SP_DS_CONFIG_ENABLED | - A6XX_SP_DS_CONFIG_NIBO(state->ds->image_mapping.num_ibo) | + A6XX_SP_DS_CONFIG_NIBO(ir3_shader_nibo(state->ds)) | A6XX_SP_DS_CONFIG_NTEX(state->ds->num_samp) | A6XX_SP_DS_CONFIG_NSAMP(state->ds->num_samp))); OUT_PKT4(ring, REG_A6XX_SP_GS_CONFIG, 1); OUT_RING(ring, COND(state->gs, A6XX_SP_GS_CONFIG_ENABLED | - A6XX_SP_GS_CONFIG_NIBO(state->gs->image_mapping.num_ibo) | + A6XX_SP_GS_CONFIG_NIBO(ir3_shader_nibo(state->gs)) | A6XX_SP_GS_CONFIG_NTEX(state->gs->num_samp) | A6XX_SP_GS_CONFIG_NSAMP(state->gs->num_samp))); OUT_PKT4(ring, REG_A6XX_SP_FS_CONFIG, 1); OUT_RING(ring, COND(state->fs, A6XX_SP_FS_CONFIG_ENABLED) | - A6XX_SP_FS_CONFIG_NIBO(state->fs->image_mapping.num_ibo) | + A6XX_SP_FS_CONFIG_NIBO(ir3_shader_nibo(state->fs)) | A6XX_SP_FS_CONFIG_NTEX(state->fs->num_samp) | A6XX_SP_FS_CONFIG_NSAMP(state->fs->num_samp)); OUT_PKT4(ring, REG_A6XX_SP_IBO_COUNT, 1); - OUT_RING(ring, state->fs->image_mapping.num_ibo); + OUT_RING(ring, ir3_shader_nibo(state->fs)); } static inline uint32_t