From: Marek Olšák Date: Sat, 31 Jan 2015 16:22:35 +0000 (+0100) Subject: radeonsi: allow 64 descriptors per array X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8f65e6eae8a3d44ff9ae04c62621e360a06ae29d;p=mesa.git radeonsi: allow 64 descriptors per array We need a slot for the stipple texture and the pixel shader already uses 32 textures (16 API slots + 16 FMASK slots). Reviewed-by: Michel Dänzer --- diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 3452be3de92..bbfd36dcbeb 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -181,7 +181,7 @@ static void si_update_descriptors(struct si_context *sctx, if (desc->dirty_mask) { desc->atom.num_dw = 7 + /* copy */ - (4 + desc->element_dw_size) * util_bitcount(desc->dirty_mask) + /* update */ + (4 + desc->element_dw_size) * util_bitcount64(desc->dirty_mask) + /* update */ 4; /* pointer update */ if (desc->shader_userdata_reg >= R_00B130_SPI_SHADER_USER_DATA_VS_0 && @@ -241,7 +241,7 @@ static void si_emit_descriptors(struct si_context *sctx, int packet_start = 0; int packet_size = 0; int last_index = desc->num_elements; /* point to a non-existing element */ - unsigned dirty_mask = desc->dirty_mask; + uint64_t dirty_mask = desc->dirty_mask; unsigned new_context_id = (desc->current_context_id + 1) % SI_NUM_CONTEXTS; assert(dirty_mask); @@ -263,7 +263,7 @@ static void si_emit_descriptors(struct si_context *sctx, * with CP DMA instead of emitting zeros. */ while (dirty_mask) { - int i = u_bit_scan(&dirty_mask); + int i = u_bit_scan64(&dirty_mask); assert(i < desc->num_elements); @@ -366,11 +366,11 @@ static enum radeon_bo_priority si_get_resource_ro_priority(struct r600_resource static void si_sampler_views_begin_new_cs(struct si_context *sctx, struct si_sampler_views *views) { - unsigned mask = views->desc.enabled_mask; + uint64_t mask = views->desc.enabled_mask; /* Add relocations to the CS. */ while (mask) { - int i = u_bit_scan(&mask); + int i = u_bit_scan64(&mask); struct si_sampler_view *rview = (struct si_sampler_view*)views->views[i]; @@ -409,14 +409,14 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader, pipe_sampler_view_reference(&views->views[slot], view); views->desc_data[slot] = view_desc; - views->desc.enabled_mask |= 1 << slot; + views->desc.enabled_mask |= 1llu << slot; } else { pipe_sampler_view_reference(&views->views[slot], NULL); views->desc_data[slot] = null_descriptor; - views->desc.enabled_mask &= ~(1 << slot); + views->desc.enabled_mask &= ~(1llu << slot); } - views->desc.dirty_mask |= 1 << slot; + views->desc.dirty_mask |= 1llu << slot; } static void si_set_sampler_views(struct pipe_context *ctx, @@ -514,12 +514,12 @@ void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader, unsigned slot = start + i; if (!sstates[i]) { - samplers->desc.dirty_mask &= ~(1 << slot); + samplers->desc.dirty_mask &= ~(1llu << slot); continue; } samplers->desc_data[slot] = sstates[i]->val; - samplers->desc.dirty_mask |= 1 << slot; + samplers->desc.dirty_mask |= 1llu << slot; } si_update_descriptors(sctx, &samplers->desc); @@ -579,11 +579,11 @@ static void si_release_buffer_resources(struct si_buffer_resources *buffers) static void si_buffer_resources_begin_new_cs(struct si_context *sctx, struct si_buffer_resources *buffers) { - unsigned mask = buffers->desc.enabled_mask; + uint64_t mask = buffers->desc.enabled_mask; /* Add relocations to the CS. */ while (mask) { - int i = u_bit_scan(&mask); + int i = u_bit_scan64(&mask); r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)buffers->buffers[i], @@ -767,14 +767,14 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)buffer, buffers->shader_usage, buffers->priority); - buffers->desc.enabled_mask |= 1 << slot; + buffers->desc.enabled_mask |= 1llu << slot; } else { /* Clear the descriptor. */ memset(buffers->desc_data[slot], 0, sizeof(uint32_t) * 4); - buffers->desc.enabled_mask &= ~(1 << slot); + buffers->desc.enabled_mask &= ~(1llu << slot); } - buffers->desc.dirty_mask |= 1 << slot; + buffers->desc.dirty_mask |= 1llu << slot; si_update_descriptors(sctx, &buffers->desc); } @@ -860,14 +860,14 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot, r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)buffer, buffers->shader_usage, buffers->priority); - buffers->desc.enabled_mask |= 1 << slot; + buffers->desc.enabled_mask |= 1llu << slot; } else { /* Clear the descriptor. */ memset(buffers->desc_data[slot], 0, sizeof(uint32_t) * 4); - buffers->desc.enabled_mask &= ~(1 << slot); + buffers->desc.enabled_mask &= ~(1llu << slot); } - buffers->desc.dirty_mask |= 1 << slot; + buffers->desc.dirty_mask |= 1llu << slot; si_update_descriptors(sctx, &buffers->desc); } @@ -945,24 +945,24 @@ static void si_set_streamout_targets(struct pipe_context *ctx, r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)buffer, buffers->shader_usage, buffers->priority); - buffers->desc.enabled_mask |= 1 << bufidx; + buffers->desc.enabled_mask |= 1llu << bufidx; } else { /* Clear the descriptor and unset the resource. */ memset(buffers->desc_data[bufidx], 0, sizeof(uint32_t) * 4); pipe_resource_reference(&buffers->buffers[bufidx], NULL); - buffers->desc.enabled_mask &= ~(1 << bufidx); + buffers->desc.enabled_mask &= ~(1llu << bufidx); } - buffers->desc.dirty_mask |= 1 << bufidx; + buffers->desc.dirty_mask |= 1llu << bufidx; } for (; i < old_num_targets; i++) { bufidx = SI_SO_BUF_OFFSET + i; /* Clear the descriptor and unset the resource. */ memset(buffers->desc_data[bufidx], 0, sizeof(uint32_t) * 4); pipe_resource_reference(&buffers->buffers[bufidx], NULL); - buffers->desc.enabled_mask &= ~(1 << bufidx); - buffers->desc.dirty_mask |= 1 << bufidx; + buffers->desc.enabled_mask &= ~(1llu << bufidx); + buffers->desc.dirty_mask |= 1llu << bufidx; } si_update_descriptors(sctx, &buffers->desc); @@ -1035,10 +1035,10 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource for (shader = 0; shader < SI_NUM_SHADERS; shader++) { struct si_buffer_resources *buffers = &sctx->rw_buffers[shader]; bool found = false; - uint32_t mask = buffers->desc.enabled_mask; + uint64_t mask = buffers->desc.enabled_mask; while (mask) { - i = u_bit_scan(&mask); + i = u_bit_scan64(&mask); if (buffers->buffers[i] == buf) { si_desc_reset_buffer_offset(ctx, buffers->desc_data[i], old_va, buf); @@ -1047,7 +1047,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource rbuffer, buffers->shader_usage, buffers->priority); - buffers->desc.dirty_mask |= 1 << i; + buffers->desc.dirty_mask |= 1llu << i; found = true; if (i >= SI_SO_BUF_OFFSET && shader == PIPE_SHADER_VERTEX) { @@ -1070,10 +1070,10 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource for (shader = 0; shader < SI_NUM_SHADERS; shader++) { struct si_buffer_resources *buffers = &sctx->const_buffers[shader]; bool found = false; - uint32_t mask = buffers->desc.enabled_mask; + uint64_t mask = buffers->desc.enabled_mask; while (mask) { - unsigned i = u_bit_scan(&mask); + unsigned i = u_bit_scan64(&mask); if (buffers->buffers[i] == buf) { si_desc_reset_buffer_offset(ctx, buffers->desc_data[i], old_va, buf); @@ -1082,7 +1082,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource rbuffer, buffers->shader_usage, buffers->priority); - buffers->desc.dirty_mask |= 1 << i; + buffers->desc.dirty_mask |= 1llu << i; found = true; } } @@ -1101,16 +1101,16 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource for (shader = 0; shader < SI_NUM_SHADERS; shader++) { struct si_sampler_views *views = &sctx->samplers[shader].views; bool found = false; - uint32_t mask = views->desc.enabled_mask; + uint64_t mask = views->desc.enabled_mask; while (mask) { - unsigned i = u_bit_scan(&mask); + unsigned i = u_bit_scan64(&mask); if (views->views[i]->texture == buf) { r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, rbuffer, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO); - views->desc.dirty_mask |= 1 << i; + views->desc.dirty_mask |= 1llu << i; found = true; } } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 3cd252c0e64..d2feb7da47e 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -155,9 +155,9 @@ struct si_descriptors { unsigned buffer_offset; /* The i-th bit is set if that element is dirty (changed but not emitted). */ - unsigned dirty_mask; + uint64_t dirty_mask; /* The i-th bit is set if that element is enabled (non-NULL resource). */ - unsigned enabled_mask; + uint64_t enabled_mask; /* We can't update descriptors directly because the GPU might be * reading them at the same time, so we have to update them