radeonsi: clear PIPE_IMAGE_ACCESS_WRITE when it's invalid to be on the safe side
[mesa.git] / src / gallium / drivers / radeonsi / si_descriptors.c
1 /*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 /* Resource binding slots and sampler states (each described with 8 or
25 * 4 dwords) are stored in lists in memory which is accessed by shaders
26 * using scalar load instructions.
27 *
28 * This file is responsible for managing such lists. It keeps a copy of all
29 * descriptors in CPU memory and re-uploads a whole list if some slots have
30 * been changed.
31 *
32 * This code is also reponsible for updating shader pointers to those lists.
33 *
34 * Note that CP DMA can't be used for updating the lists, because a GPU hang
35 * could leave the list in a mid-IB state and the next IB would get wrong
36 * descriptors and the whole context would be unusable at that point.
37 * (Note: The register shadowing can't be used due to the same reason)
38 *
39 * Also, uploading descriptors to newly allocated memory doesn't require
40 * a KCACHE flush.
41 *
42 *
43 * Possible scenarios for one 16 dword image+sampler slot:
44 *
45 * | Image | w/ FMASK | Buffer | NULL
46 * [ 0: 3] Image[0:3] | Image[0:3] | Null[0:3] | Null[0:3]
47 * [ 4: 7] Image[4:7] | Image[4:7] | Buffer[0:3] | 0
48 * [ 8:11] Null[0:3] | Fmask[0:3] | Null[0:3] | Null[0:3]
49 * [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3]
50 *
51 * FMASK implies MSAA, therefore no sampler state.
52 * Sampler states are never unbound except when FMASK is bound.
53 */
54
55 #include "radeon/r600_cs.h"
56 #include "si_pipe.h"
57 #include "sid.h"
58 #include "gfx9d.h"
59
60 #include "util/hash_table.h"
61 #include "util/u_idalloc.h"
62 #include "util/u_format.h"
63 #include "util/u_memory.h"
64 #include "util/u_upload_mgr.h"
65
66
67 /* NULL image and buffer descriptor for textures (alpha = 1) and images
68 * (alpha = 0).
69 *
70 * For images, all fields must be zero except for the swizzle, which
71 * supports arbitrary combinations of 0s and 1s. The texture type must be
72 * any valid type (e.g. 1D). If the texture type isn't set, the hw hangs.
73 *
74 * For buffers, all fields must be zero. If they are not, the hw hangs.
75 *
76 * This is the only reason why the buffer descriptor must be in words [4:7].
77 */
78 static uint32_t null_texture_descriptor[8] = {
79 0,
80 0,
81 0,
82 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_1) |
83 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
84 /* the rest must contain zeros, which is also used by the buffer
85 * descriptor */
86 };
87
88 static uint32_t null_image_descriptor[8] = {
89 0,
90 0,
91 0,
92 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
93 /* the rest must contain zeros, which is also used by the buffer
94 * descriptor */
95 };
96
97 static uint64_t si_desc_extract_buffer_address(uint32_t *desc)
98 {
99 return desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32);
100 }
101
102 static void si_init_descriptor_list(uint32_t *desc_list,
103 unsigned element_dw_size,
104 unsigned num_elements,
105 const uint32_t *null_descriptor)
106 {
107 int i;
108
109 /* Initialize the array to NULL descriptors if the element size is 8. */
110 if (null_descriptor) {
111 assert(element_dw_size % 8 == 0);
112 for (i = 0; i < num_elements * element_dw_size / 8; i++)
113 memcpy(desc_list + i * 8, null_descriptor, 8 * 4);
114 }
115 }
116
117 static void si_init_descriptors(struct si_descriptors *desc,
118 unsigned shader_userdata_index,
119 unsigned element_dw_size,
120 unsigned num_elements)
121 {
122 desc->list = CALLOC(num_elements, element_dw_size * 4);
123 desc->element_dw_size = element_dw_size;
124 desc->num_elements = num_elements;
125 desc->shader_userdata_offset = shader_userdata_index * 4;
126 desc->slot_index_to_bind_directly = -1;
127 }
128
129 static void si_release_descriptors(struct si_descriptors *desc)
130 {
131 r600_resource_reference(&desc->buffer, NULL);
132 FREE(desc->list);
133 }
134
135 static bool si_upload_descriptors(struct si_context *sctx,
136 struct si_descriptors *desc)
137 {
138 unsigned slot_size = desc->element_dw_size * 4;
139 unsigned first_slot_offset = desc->first_active_slot * slot_size;
140 unsigned upload_size = desc->num_active_slots * slot_size;
141
142 /* Skip the upload if no shader is using the descriptors. dirty_mask
143 * will stay dirty and the descriptors will be uploaded when there is
144 * a shader using them.
145 */
146 if (!upload_size)
147 return true;
148
149 /* If there is just one active descriptor, bind it directly. */
150 if ((int)desc->first_active_slot == desc->slot_index_to_bind_directly &&
151 desc->num_active_slots == 1) {
152 uint32_t *descriptor = &desc->list[desc->slot_index_to_bind_directly *
153 desc->element_dw_size];
154
155 /* The buffer is already in the buffer list. */
156 r600_resource_reference(&desc->buffer, NULL);
157 desc->gpu_list = NULL;
158 desc->gpu_address = si_desc_extract_buffer_address(descriptor);
159 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
160 return true;
161 }
162
163 uint32_t *ptr;
164 int buffer_offset;
165 u_upload_alloc(sctx->b.b.const_uploader, 0, upload_size,
166 si_optimal_tcc_alignment(sctx, upload_size),
167 (unsigned*)&buffer_offset,
168 (struct pipe_resource**)&desc->buffer,
169 (void**)&ptr);
170 if (!desc->buffer) {
171 desc->gpu_address = 0;
172 return false; /* skip the draw call */
173 }
174
175 util_memcpy_cpu_to_le32(ptr, (char*)desc->list + first_slot_offset,
176 upload_size);
177 desc->gpu_list = ptr - first_slot_offset / 4;
178
179 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
180 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
181
182 /* The shader pointer should point to slot 0. */
183 buffer_offset -= first_slot_offset;
184 desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
185
186 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
187 return true;
188 }
189
190 static void
191 si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc)
192 {
193 if (!desc->buffer)
194 return;
195
196 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
197 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
198 }
199
200 /* SAMPLER VIEWS */
201
202 static unsigned
203 si_sampler_and_image_descriptors_idx(unsigned shader)
204 {
205 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
206 SI_SHADER_DESCS_SAMPLERS_AND_IMAGES;
207 }
208
209 static struct si_descriptors *
210 si_sampler_and_image_descriptors(struct si_context *sctx, unsigned shader)
211 {
212 return &sctx->descriptors[si_sampler_and_image_descriptors_idx(shader)];
213 }
214
215 static void si_release_sampler_views(struct si_samplers *samplers)
216 {
217 int i;
218
219 for (i = 0; i < ARRAY_SIZE(samplers->views); i++) {
220 pipe_sampler_view_reference(&samplers->views[i], NULL);
221 }
222 }
223
224 static void si_sampler_view_add_buffer(struct si_context *sctx,
225 struct pipe_resource *resource,
226 enum radeon_bo_usage usage,
227 bool is_stencil_sampler,
228 bool check_mem)
229 {
230 struct r600_resource *rres;
231 struct r600_texture *rtex;
232 enum radeon_bo_priority priority;
233
234 if (!resource)
235 return;
236
237 if (resource->target != PIPE_BUFFER) {
238 struct r600_texture *tex = (struct r600_texture*)resource;
239
240 if (tex->is_depth && !r600_can_sample_zs(tex, is_stencil_sampler))
241 resource = &tex->flushed_depth_texture->resource.b.b;
242 }
243
244 rres = (struct r600_resource*)resource;
245 priority = r600_get_sampler_view_priority(rres);
246
247 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
248 rres, usage, priority,
249 check_mem);
250
251 if (resource->target == PIPE_BUFFER)
252 return;
253
254 /* Now add separate DCC or HTILE. */
255 rtex = (struct r600_texture*)resource;
256 if (rtex->dcc_separate_buffer) {
257 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
258 rtex->dcc_separate_buffer, usage,
259 RADEON_PRIO_DCC, check_mem);
260 }
261 }
262
263 static void si_sampler_views_begin_new_cs(struct si_context *sctx,
264 struct si_samplers *samplers)
265 {
266 unsigned mask = samplers->enabled_mask;
267
268 /* Add buffers to the CS. */
269 while (mask) {
270 int i = u_bit_scan(&mask);
271 struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[i];
272
273 si_sampler_view_add_buffer(sctx, sview->base.texture,
274 RADEON_USAGE_READ,
275 sview->is_stencil_sampler, false);
276 }
277 }
278
279 /* Set buffer descriptor fields that can be changed by reallocations. */
280 static void si_set_buf_desc_address(struct r600_resource *buf,
281 uint64_t offset, uint32_t *state)
282 {
283 uint64_t va = buf->gpu_address + offset;
284
285 state[0] = va;
286 state[1] &= C_008F04_BASE_ADDRESS_HI;
287 state[1] |= S_008F04_BASE_ADDRESS_HI(va >> 32);
288 }
289
290 /* Set texture descriptor fields that can be changed by reallocations.
291 *
292 * \param tex texture
293 * \param base_level_info information of the level of BASE_ADDRESS
294 * \param base_level the level of BASE_ADDRESS
295 * \param first_level pipe_sampler_view.u.tex.first_level
296 * \param block_width util_format_get_blockwidth()
297 * \param is_stencil select between separate Z & Stencil
298 * \param state descriptor to update
299 */
300 void si_set_mutable_tex_desc_fields(struct si_screen *sscreen,
301 struct r600_texture *tex,
302 const struct legacy_surf_level *base_level_info,
303 unsigned base_level, unsigned first_level,
304 unsigned block_width, bool is_stencil,
305 uint32_t *state)
306 {
307 uint64_t va, meta_va = 0;
308
309 if (tex->is_depth && !r600_can_sample_zs(tex, is_stencil)) {
310 tex = tex->flushed_depth_texture;
311 is_stencil = false;
312 }
313
314 va = tex->resource.gpu_address;
315
316 if (sscreen->b.chip_class >= GFX9) {
317 /* Only stencil_offset needs to be added here. */
318 if (is_stencil)
319 va += tex->surface.u.gfx9.stencil_offset;
320 else
321 va += tex->surface.u.gfx9.surf_offset;
322 } else {
323 va += base_level_info->offset;
324 }
325
326 state[0] = va >> 8;
327 state[1] &= C_008F14_BASE_ADDRESS_HI;
328 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
329
330 /* Only macrotiled modes can set tile swizzle.
331 * GFX9 doesn't use (legacy) base_level_info.
332 */
333 if (sscreen->b.chip_class >= GFX9 ||
334 base_level_info->mode == RADEON_SURF_MODE_2D)
335 state[0] |= tex->surface.tile_swizzle;
336
337 if (sscreen->b.chip_class >= VI) {
338 state[6] &= C_008F28_COMPRESSION_EN;
339 state[7] = 0;
340
341 if (vi_dcc_enabled(tex, first_level)) {
342 meta_va = (!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
343 tex->dcc_offset;
344
345 if (sscreen->b.chip_class == VI) {
346 meta_va += base_level_info->dcc_offset;
347 assert(base_level_info->mode == RADEON_SURF_MODE_2D);
348 }
349
350 meta_va |= (uint32_t)tex->surface.tile_swizzle << 8;
351 } else if (vi_tc_compat_htile_enabled(tex, first_level)) {
352 meta_va = tex->resource.gpu_address + tex->htile_offset;
353 }
354
355 if (meta_va) {
356 state[6] |= S_008F28_COMPRESSION_EN(1);
357 state[7] = meta_va >> 8;
358 }
359 }
360
361 if (sscreen->b.chip_class >= GFX9) {
362 state[3] &= C_008F1C_SW_MODE;
363 state[4] &= C_008F20_PITCH_GFX9;
364
365 if (is_stencil) {
366 state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.stencil.swizzle_mode);
367 state[4] |= S_008F20_PITCH_GFX9(tex->surface.u.gfx9.stencil.epitch);
368 } else {
369 state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode);
370 state[4] |= S_008F20_PITCH_GFX9(tex->surface.u.gfx9.surf.epitch);
371 }
372
373 state[5] &= C_008F24_META_DATA_ADDRESS &
374 C_008F24_META_PIPE_ALIGNED &
375 C_008F24_META_RB_ALIGNED;
376 if (meta_va) {
377 struct gfx9_surf_meta_flags meta;
378
379 if (tex->dcc_offset)
380 meta = tex->surface.u.gfx9.dcc;
381 else
382 meta = tex->surface.u.gfx9.htile;
383
384 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
385 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
386 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
387 }
388 } else {
389 /* SI-CI-VI */
390 unsigned pitch = base_level_info->nblk_x * block_width;
391 unsigned index = si_tile_mode_index(tex, base_level, is_stencil);
392
393 state[3] &= C_008F1C_TILING_INDEX;
394 state[3] |= S_008F1C_TILING_INDEX(index);
395 state[4] &= C_008F20_PITCH_GFX6;
396 state[4] |= S_008F20_PITCH_GFX6(pitch - 1);
397 }
398 }
399
400 static void si_set_sampler_state_desc(struct si_sampler_state *sstate,
401 struct si_sampler_view *sview,
402 struct r600_texture *tex,
403 uint32_t *desc)
404 {
405 if (sview && sview->is_integer)
406 memcpy(desc, sstate->integer_val, 4*4);
407 else if (tex && tex->upgraded_depth &&
408 (!sview || !sview->is_stencil_sampler))
409 memcpy(desc, sstate->upgraded_depth_val, 4*4);
410 else
411 memcpy(desc, sstate->val, 4*4);
412 }
413
414 static void si_set_sampler_view_desc(struct si_context *sctx,
415 struct si_sampler_view *sview,
416 struct si_sampler_state *sstate,
417 uint32_t *desc)
418 {
419 struct pipe_sampler_view *view = &sview->base;
420 struct r600_texture *rtex = (struct r600_texture *)view->texture;
421 bool is_buffer = rtex->resource.b.b.target == PIPE_BUFFER;
422
423 if (unlikely(!is_buffer && sview->dcc_incompatible)) {
424 if (vi_dcc_enabled(rtex, view->u.tex.first_level))
425 if (!si_texture_disable_dcc(&sctx->b, rtex))
426 sctx->b.decompress_dcc(&sctx->b.b, rtex);
427
428 sview->dcc_incompatible = false;
429 }
430
431 assert(rtex); /* views with texture == NULL aren't supported */
432 memcpy(desc, sview->state, 8*4);
433
434 if (is_buffer) {
435 si_set_buf_desc_address(&rtex->resource,
436 sview->base.u.buf.offset,
437 desc + 4);
438 } else {
439 bool is_separate_stencil = rtex->db_compatible &&
440 sview->is_stencil_sampler;
441
442 si_set_mutable_tex_desc_fields(sctx->screen, rtex,
443 sview->base_level_info,
444 sview->base_level,
445 sview->base.u.tex.first_level,
446 sview->block_width,
447 is_separate_stencil,
448 desc);
449 }
450
451 if (!is_buffer && rtex->fmask.size) {
452 memcpy(desc + 8, sview->fmask_state, 8*4);
453 } else {
454 /* Disable FMASK and bind sampler state in [12:15]. */
455 memcpy(desc + 8, null_texture_descriptor, 4*4);
456
457 if (sstate)
458 si_set_sampler_state_desc(sstate, sview,
459 is_buffer ? NULL : rtex,
460 desc + 12);
461 }
462 }
463
464 static bool color_needs_decompression(struct r600_texture *rtex)
465 {
466 return rtex->fmask.size ||
467 (rtex->dirty_level_mask &&
468 (rtex->cmask.size || rtex->dcc_offset));
469 }
470
471 static bool depth_needs_decompression(struct r600_texture *rtex)
472 {
473 /* If the depth/stencil texture is TC-compatible, no decompression
474 * will be done. The decompression function will only flush DB caches
475 * to make it coherent with shaders. That's necessary because the driver
476 * doesn't flush DB caches in any other case.
477 */
478 return rtex->db_compatible;
479 }
480
481 static void si_set_sampler_view(struct si_context *sctx,
482 unsigned shader,
483 unsigned slot, struct pipe_sampler_view *view,
484 bool disallow_early_out)
485 {
486 struct si_samplers *samplers = &sctx->samplers[shader];
487 struct si_sampler_view *rview = (struct si_sampler_view*)view;
488 struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader);
489 unsigned desc_slot = si_get_sampler_slot(slot);
490 uint32_t *desc = descs->list + desc_slot * 16;
491
492 if (samplers->views[slot] == view && !disallow_early_out)
493 return;
494
495 if (view) {
496 struct r600_texture *rtex = (struct r600_texture *)view->texture;
497
498 si_set_sampler_view_desc(sctx, rview,
499 samplers->sampler_states[slot], desc);
500
501 if (rtex->resource.b.b.target == PIPE_BUFFER) {
502 rtex->resource.bind_history |= PIPE_BIND_SAMPLER_VIEW;
503 samplers->needs_depth_decompress_mask &= ~(1u << slot);
504 samplers->needs_color_decompress_mask &= ~(1u << slot);
505 } else {
506 if (depth_needs_decompression(rtex)) {
507 samplers->needs_depth_decompress_mask |= 1u << slot;
508 } else {
509 samplers->needs_depth_decompress_mask &= ~(1u << slot);
510 }
511 if (color_needs_decompression(rtex)) {
512 samplers->needs_color_decompress_mask |= 1u << slot;
513 } else {
514 samplers->needs_color_decompress_mask &= ~(1u << slot);
515 }
516
517 if (rtex->dcc_offset &&
518 p_atomic_read(&rtex->framebuffers_bound))
519 sctx->need_check_render_feedback = true;
520 }
521
522 pipe_sampler_view_reference(&samplers->views[slot], view);
523 samplers->enabled_mask |= 1u << slot;
524
525 /* Since this can flush, it must be done after enabled_mask is
526 * updated. */
527 si_sampler_view_add_buffer(sctx, view->texture,
528 RADEON_USAGE_READ,
529 rview->is_stencil_sampler, true);
530 } else {
531 pipe_sampler_view_reference(&samplers->views[slot], NULL);
532 memcpy(desc, null_texture_descriptor, 8*4);
533 /* Only clear the lower dwords of FMASK. */
534 memcpy(desc + 8, null_texture_descriptor, 4*4);
535 /* Re-set the sampler state if we are transitioning from FMASK. */
536 if (samplers->sampler_states[slot])
537 si_set_sampler_state_desc(samplers->sampler_states[slot], NULL, NULL,
538 desc + 12);
539
540 samplers->enabled_mask &= ~(1u << slot);
541 samplers->needs_depth_decompress_mask &= ~(1u << slot);
542 samplers->needs_color_decompress_mask &= ~(1u << slot);
543 }
544
545 sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
546 }
547
548 static void si_update_shader_needs_decompress_mask(struct si_context *sctx,
549 unsigned shader)
550 {
551 struct si_samplers *samplers = &sctx->samplers[shader];
552 unsigned shader_bit = 1 << shader;
553
554 if (samplers->needs_depth_decompress_mask ||
555 samplers->needs_color_decompress_mask ||
556 sctx->images[shader].needs_color_decompress_mask)
557 sctx->shader_needs_decompress_mask |= shader_bit;
558 else
559 sctx->shader_needs_decompress_mask &= ~shader_bit;
560 }
561
562 static void si_set_sampler_views(struct pipe_context *ctx,
563 enum pipe_shader_type shader, unsigned start,
564 unsigned count,
565 struct pipe_sampler_view **views)
566 {
567 struct si_context *sctx = (struct si_context *)ctx;
568 int i;
569
570 if (!count || shader >= SI_NUM_SHADERS)
571 return;
572
573 if (views) {
574 for (i = 0; i < count; i++)
575 si_set_sampler_view(sctx, shader, start + i, views[i], false);
576 } else {
577 for (i = 0; i < count; i++)
578 si_set_sampler_view(sctx, shader, start + i, NULL, false);
579 }
580
581 si_update_shader_needs_decompress_mask(sctx, shader);
582 }
583
584 static void
585 si_samplers_update_needs_color_decompress_mask(struct si_samplers *samplers)
586 {
587 unsigned mask = samplers->enabled_mask;
588
589 while (mask) {
590 int i = u_bit_scan(&mask);
591 struct pipe_resource *res = samplers->views[i]->texture;
592
593 if (res && res->target != PIPE_BUFFER) {
594 struct r600_texture *rtex = (struct r600_texture *)res;
595
596 if (color_needs_decompression(rtex)) {
597 samplers->needs_color_decompress_mask |= 1u << i;
598 } else {
599 samplers->needs_color_decompress_mask &= ~(1u << i);
600 }
601 }
602 }
603 }
604
605 /* IMAGE VIEWS */
606
607 static void
608 si_release_image_views(struct si_images *images)
609 {
610 unsigned i;
611
612 for (i = 0; i < SI_NUM_IMAGES; ++i) {
613 struct pipe_image_view *view = &images->views[i];
614
615 pipe_resource_reference(&view->resource, NULL);
616 }
617 }
618
619 static void
620 si_image_views_begin_new_cs(struct si_context *sctx, struct si_images *images)
621 {
622 uint mask = images->enabled_mask;
623
624 /* Add buffers to the CS. */
625 while (mask) {
626 int i = u_bit_scan(&mask);
627 struct pipe_image_view *view = &images->views[i];
628
629 assert(view->resource);
630
631 si_sampler_view_add_buffer(sctx, view->resource,
632 RADEON_USAGE_READWRITE, false, false);
633 }
634 }
635
636 static void
637 si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot)
638 {
639 struct si_images *images = &ctx->images[shader];
640
641 if (images->enabled_mask & (1u << slot)) {
642 struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
643 unsigned desc_slot = si_get_image_slot(slot);
644
645 pipe_resource_reference(&images->views[slot].resource, NULL);
646 images->needs_color_decompress_mask &= ~(1 << slot);
647
648 memcpy(descs->list + desc_slot*8, null_image_descriptor, 8*4);
649 images->enabled_mask &= ~(1u << slot);
650 ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
651 }
652 }
653
654 static void
655 si_mark_image_range_valid(const struct pipe_image_view *view)
656 {
657 struct r600_resource *res = (struct r600_resource *)view->resource;
658
659 assert(res && res->b.b.target == PIPE_BUFFER);
660
661 util_range_add(&res->valid_buffer_range,
662 view->u.buf.offset,
663 view->u.buf.offset + view->u.buf.size);
664 }
665
666 static void si_set_shader_image_desc(struct si_context *ctx,
667 const struct pipe_image_view *view,
668 bool skip_decompress,
669 uint32_t *desc)
670 {
671 struct si_screen *screen = ctx->screen;
672 struct r600_resource *res;
673
674 res = (struct r600_resource *)view->resource;
675
676 if (res->b.b.target == PIPE_BUFFER) {
677 if (view->access & PIPE_IMAGE_ACCESS_WRITE)
678 si_mark_image_range_valid(view);
679
680 si_make_buffer_descriptor(screen, res,
681 view->format,
682 view->u.buf.offset,
683 view->u.buf.size, desc);
684 si_set_buf_desc_address(res, view->u.buf.offset, desc + 4);
685 } else {
686 static const unsigned char swizzle[4] = { 0, 1, 2, 3 };
687 struct r600_texture *tex = (struct r600_texture *)res;
688 unsigned level = view->u.tex.level;
689 unsigned width, height, depth, hw_level;
690 bool uses_dcc = vi_dcc_enabled(tex, level);
691 unsigned access = view->access;
692
693 /* Clear the write flag when writes can't occur.
694 * Note that DCC_DECOMPRESS for MSAA doesn't work in some cases,
695 * so we don't wanna trigger it.
696 */
697 if (tex->is_depth || tex->resource.b.b.nr_samples >= 2) {
698 assert(!"Z/S and MSAA image stores are not supported");
699 access &= ~PIPE_IMAGE_ACCESS_WRITE;
700 }
701
702 assert(!tex->is_depth);
703 assert(tex->fmask.size == 0);
704
705 if (uses_dcc && !skip_decompress &&
706 (view->access & PIPE_IMAGE_ACCESS_WRITE ||
707 !vi_dcc_formats_compatible(res->b.b.format, view->format))) {
708 /* If DCC can't be disabled, at least decompress it.
709 * The decompression is relatively cheap if the surface
710 * has been decompressed already.
711 */
712 if (!si_texture_disable_dcc(&ctx->b, tex))
713 ctx->b.decompress_dcc(&ctx->b.b, tex);
714 }
715
716 if (ctx->b.chip_class >= GFX9) {
717 /* Always set the base address. The swizzle modes don't
718 * allow setting mipmap level offsets as the base.
719 */
720 width = res->b.b.width0;
721 height = res->b.b.height0;
722 depth = res->b.b.depth0;
723 hw_level = level;
724 } else {
725 /* Always force the base level to the selected level.
726 *
727 * This is required for 3D textures, where otherwise
728 * selecting a single slice for non-layered bindings
729 * fails. It doesn't hurt the other targets.
730 */
731 width = u_minify(res->b.b.width0, level);
732 height = u_minify(res->b.b.height0, level);
733 depth = u_minify(res->b.b.depth0, level);
734 hw_level = 0;
735 }
736
737 si_make_texture_descriptor(screen, tex,
738 false, res->b.b.target,
739 view->format, swizzle,
740 hw_level, hw_level,
741 view->u.tex.first_layer,
742 view->u.tex.last_layer,
743 width, height, depth,
744 desc, NULL);
745 si_set_mutable_tex_desc_fields(screen, tex,
746 &tex->surface.u.legacy.level[level],
747 level, level,
748 util_format_get_blockwidth(view->format),
749 false, desc);
750 }
751 }
752
753 static void si_set_shader_image(struct si_context *ctx,
754 unsigned shader,
755 unsigned slot, const struct pipe_image_view *view,
756 bool skip_decompress)
757 {
758 struct si_images *images = &ctx->images[shader];
759 struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
760 struct r600_resource *res;
761 unsigned desc_slot = si_get_image_slot(slot);
762 uint32_t *desc = descs->list + desc_slot * 8;
763
764 if (!view || !view->resource) {
765 si_disable_shader_image(ctx, shader, slot);
766 return;
767 }
768
769 res = (struct r600_resource *)view->resource;
770
771 if (&images->views[slot] != view)
772 util_copy_image_view(&images->views[slot], view);
773
774 si_set_shader_image_desc(ctx, view, skip_decompress, desc);
775
776 if (res->b.b.target == PIPE_BUFFER) {
777 images->needs_color_decompress_mask &= ~(1 << slot);
778 res->bind_history |= PIPE_BIND_SHADER_IMAGE;
779 } else {
780 struct r600_texture *tex = (struct r600_texture *)res;
781 unsigned level = view->u.tex.level;
782
783 if (color_needs_decompression(tex)) {
784 images->needs_color_decompress_mask |= 1 << slot;
785 } else {
786 images->needs_color_decompress_mask &= ~(1 << slot);
787 }
788
789 if (vi_dcc_enabled(tex, level) &&
790 p_atomic_read(&tex->framebuffers_bound))
791 ctx->need_check_render_feedback = true;
792 }
793
794 images->enabled_mask |= 1u << slot;
795 ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
796
797 /* Since this can flush, it must be done after enabled_mask is updated. */
798 si_sampler_view_add_buffer(ctx, &res->b.b,
799 (view->access & PIPE_IMAGE_ACCESS_WRITE) ?
800 RADEON_USAGE_READWRITE : RADEON_USAGE_READ,
801 false, true);
802 }
803
804 static void
805 si_set_shader_images(struct pipe_context *pipe,
806 enum pipe_shader_type shader,
807 unsigned start_slot, unsigned count,
808 const struct pipe_image_view *views)
809 {
810 struct si_context *ctx = (struct si_context *)pipe;
811 unsigned i, slot;
812
813 assert(shader < SI_NUM_SHADERS);
814
815 if (!count)
816 return;
817
818 assert(start_slot + count <= SI_NUM_IMAGES);
819
820 if (views) {
821 for (i = 0, slot = start_slot; i < count; ++i, ++slot)
822 si_set_shader_image(ctx, shader, slot, &views[i], false);
823 } else {
824 for (i = 0, slot = start_slot; i < count; ++i, ++slot)
825 si_set_shader_image(ctx, shader, slot, NULL, false);
826 }
827
828 si_update_shader_needs_decompress_mask(ctx, shader);
829 }
830
831 static void
832 si_images_update_needs_color_decompress_mask(struct si_images *images)
833 {
834 unsigned mask = images->enabled_mask;
835
836 while (mask) {
837 int i = u_bit_scan(&mask);
838 struct pipe_resource *res = images->views[i].resource;
839
840 if (res && res->target != PIPE_BUFFER) {
841 struct r600_texture *rtex = (struct r600_texture *)res;
842
843 if (color_needs_decompression(rtex)) {
844 images->needs_color_decompress_mask |= 1 << i;
845 } else {
846 images->needs_color_decompress_mask &= ~(1 << i);
847 }
848 }
849 }
850 }
851
852 /* SAMPLER STATES */
853
854 static void si_bind_sampler_states(struct pipe_context *ctx,
855 enum pipe_shader_type shader,
856 unsigned start, unsigned count, void **states)
857 {
858 struct si_context *sctx = (struct si_context *)ctx;
859 struct si_samplers *samplers = &sctx->samplers[shader];
860 struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, shader);
861 struct si_sampler_state **sstates = (struct si_sampler_state**)states;
862 int i;
863
864 if (!count || shader >= SI_NUM_SHADERS)
865 return;
866
867 for (i = 0; i < count; i++) {
868 unsigned slot = start + i;
869 unsigned desc_slot = si_get_sampler_slot(slot);
870
871 if (!sstates[i] ||
872 sstates[i] == samplers->sampler_states[slot])
873 continue;
874
875 #ifdef DEBUG
876 assert(sstates[i]->magic == SI_SAMPLER_STATE_MAGIC);
877 #endif
878 samplers->sampler_states[slot] = sstates[i];
879
880 /* If FMASK is bound, don't overwrite it.
881 * The sampler state will be set after FMASK is unbound.
882 */
883 struct si_sampler_view *sview =
884 (struct si_sampler_view *)samplers->views[slot];
885
886 struct r600_texture *tex = NULL;
887
888 if (sview && sview->base.texture &&
889 sview->base.texture->target != PIPE_BUFFER)
890 tex = (struct r600_texture *)sview->base.texture;
891
892 if (tex && tex->fmask.size)
893 continue;
894
895 si_set_sampler_state_desc(sstates[i], sview, tex,
896 desc->list + desc_slot * 16 + 12);
897
898 sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
899 }
900 }
901
902 /* BUFFER RESOURCES */
903
904 static void si_init_buffer_resources(struct si_buffer_resources *buffers,
905 struct si_descriptors *descs,
906 unsigned num_buffers,
907 unsigned shader_userdata_index,
908 enum radeon_bo_usage shader_usage,
909 enum radeon_bo_usage shader_usage_constbuf,
910 enum radeon_bo_priority priority,
911 enum radeon_bo_priority priority_constbuf)
912 {
913 buffers->shader_usage = shader_usage;
914 buffers->shader_usage_constbuf = shader_usage_constbuf;
915 buffers->priority = priority;
916 buffers->priority_constbuf = priority_constbuf;
917 buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
918
919 si_init_descriptors(descs, shader_userdata_index, 4, num_buffers);
920 }
921
922 static void si_release_buffer_resources(struct si_buffer_resources *buffers,
923 struct si_descriptors *descs)
924 {
925 int i;
926
927 for (i = 0; i < descs->num_elements; i++) {
928 pipe_resource_reference(&buffers->buffers[i], NULL);
929 }
930
931 FREE(buffers->buffers);
932 }
933
934 static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
935 struct si_buffer_resources *buffers)
936 {
937 unsigned mask = buffers->enabled_mask;
938
939 /* Add buffers to the CS. */
940 while (mask) {
941 int i = u_bit_scan(&mask);
942
943 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
944 r600_resource(buffers->buffers[i]),
945 i < SI_NUM_SHADER_BUFFERS ? buffers->shader_usage :
946 buffers->shader_usage_constbuf,
947 i < SI_NUM_SHADER_BUFFERS ? buffers->priority :
948 buffers->priority_constbuf);
949 }
950 }
951
952 static void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers,
953 struct si_descriptors *descs,
954 unsigned idx, struct pipe_resource **buf,
955 unsigned *offset, unsigned *size)
956 {
957 pipe_resource_reference(buf, buffers->buffers[idx]);
958 if (*buf) {
959 struct r600_resource *res = r600_resource(*buf);
960 const uint32_t *desc = descs->list + idx * 4;
961 uint64_t va;
962
963 *size = desc[2];
964
965 assert(G_008F04_STRIDE(desc[1]) == 0);
966 va = ((uint64_t)desc[1] << 32) | desc[0];
967
968 assert(va >= res->gpu_address && va + *size <= res->gpu_address + res->bo_size);
969 *offset = va - res->gpu_address;
970 }
971 }
972
973 /* VERTEX BUFFERS */
974
975 static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
976 {
977 struct si_descriptors *desc = &sctx->vertex_buffers;
978 int count = sctx->vertex_elements ? sctx->vertex_elements->count : 0;
979 int i;
980
981 for (i = 0; i < count; i++) {
982 int vb = sctx->vertex_elements->vertex_buffer_index[i];
983
984 if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
985 continue;
986 if (!sctx->vertex_buffer[vb].buffer.resource)
987 continue;
988
989 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
990 (struct r600_resource*)sctx->vertex_buffer[vb].buffer.resource,
991 RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
992 }
993
994 if (!desc->buffer)
995 return;
996 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
997 desc->buffer, RADEON_USAGE_READ,
998 RADEON_PRIO_DESCRIPTORS);
999 }
1000
1001 bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
1002 {
1003 struct si_vertex_elements *velems = sctx->vertex_elements;
1004 struct si_descriptors *desc = &sctx->vertex_buffers;
1005 unsigned i, count;
1006 unsigned desc_list_byte_size;
1007 unsigned first_vb_use_mask;
1008 uint32_t *ptr;
1009
1010 if (!sctx->vertex_buffers_dirty || !velems)
1011 return true;
1012
1013 count = velems->count;
1014
1015 if (!count)
1016 return true;
1017
1018 desc_list_byte_size = velems->desc_list_byte_size;
1019 first_vb_use_mask = velems->first_vb_use_mask;
1020
1021 /* Vertex buffer descriptors are the only ones which are uploaded
1022 * directly through a staging buffer and don't go through
1023 * the fine-grained upload path.
1024 */
1025 unsigned buffer_offset = 0;
1026 u_upload_alloc(sctx->b.b.const_uploader, 0,
1027 desc_list_byte_size,
1028 si_optimal_tcc_alignment(sctx, desc_list_byte_size),
1029 &buffer_offset,
1030 (struct pipe_resource**)&desc->buffer, (void**)&ptr);
1031 if (!desc->buffer) {
1032 desc->gpu_address = 0;
1033 return false;
1034 }
1035
1036 desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
1037 desc->list = ptr;
1038 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1039 desc->buffer, RADEON_USAGE_READ,
1040 RADEON_PRIO_DESCRIPTORS);
1041
1042 assert(count <= SI_MAX_ATTRIBS);
1043
1044 for (i = 0; i < count; i++) {
1045 struct pipe_vertex_buffer *vb;
1046 struct r600_resource *rbuffer;
1047 unsigned vbo_index = velems->vertex_buffer_index[i];
1048 uint32_t *desc = &ptr[i*4];
1049
1050 vb = &sctx->vertex_buffer[vbo_index];
1051 rbuffer = (struct r600_resource*)vb->buffer.resource;
1052 if (!rbuffer) {
1053 memset(desc, 0, 16);
1054 continue;
1055 }
1056
1057 int offset = (int)vb->buffer_offset + (int)velems->src_offset[i];
1058 int64_t va = (int64_t)rbuffer->gpu_address + offset;
1059 assert(va > 0);
1060
1061 int64_t num_records = (int64_t)rbuffer->b.b.width0 - offset;
1062 if (sctx->b.chip_class != VI && vb->stride) {
1063 /* Round up by rounding down and adding 1 */
1064 num_records = (num_records - velems->format_size[i]) /
1065 vb->stride + 1;
1066 }
1067 assert(num_records >= 0 && num_records <= UINT_MAX);
1068
1069 desc[0] = va;
1070 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1071 S_008F04_STRIDE(vb->stride);
1072 desc[2] = num_records;
1073 desc[3] = velems->rsrc_word3[i];
1074
1075 if (first_vb_use_mask & (1 << i)) {
1076 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1077 (struct r600_resource*)vb->buffer.resource,
1078 RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
1079 }
1080 }
1081
1082 /* Don't flush the const cache. It would have a very negative effect
1083 * on performance (confirmed by testing). New descriptors are always
1084 * uploaded to a fresh new buffer, so I don't think flushing the const
1085 * cache is needed. */
1086 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
1087 sctx->vertex_buffers_dirty = false;
1088 sctx->vertex_buffer_pointer_dirty = true;
1089 sctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS;
1090 return true;
1091 }
1092
1093
1094 /* CONSTANT BUFFERS */
1095
1096 static unsigned
1097 si_const_and_shader_buffer_descriptors_idx(unsigned shader)
1098 {
1099 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
1100 SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS;
1101 }
1102
1103 static struct si_descriptors *
1104 si_const_and_shader_buffer_descriptors(struct si_context *sctx, unsigned shader)
1105 {
1106 return &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)];
1107 }
1108
1109 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
1110 const uint8_t *ptr, unsigned size, uint32_t *const_offset)
1111 {
1112 void *tmp;
1113
1114 u_upload_alloc(sctx->b.b.const_uploader, 0, size,
1115 si_optimal_tcc_alignment(sctx, size),
1116 const_offset,
1117 (struct pipe_resource**)rbuffer, &tmp);
1118 if (*rbuffer)
1119 util_memcpy_cpu_to_le32(tmp, ptr, size);
1120 }
1121
1122 static void si_set_constant_buffer(struct si_context *sctx,
1123 struct si_buffer_resources *buffers,
1124 unsigned descriptors_idx,
1125 uint slot, const struct pipe_constant_buffer *input)
1126 {
1127 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1128 assert(slot < descs->num_elements);
1129 pipe_resource_reference(&buffers->buffers[slot], NULL);
1130
1131 /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
1132 * with a NULL buffer). We need to use a dummy buffer instead. */
1133 if (sctx->b.chip_class == CIK &&
1134 (!input || (!input->buffer && !input->user_buffer)))
1135 input = &sctx->null_const_buf;
1136
1137 if (input && (input->buffer || input->user_buffer)) {
1138 struct pipe_resource *buffer = NULL;
1139 uint64_t va;
1140
1141 /* Upload the user buffer if needed. */
1142 if (input->user_buffer) {
1143 unsigned buffer_offset;
1144
1145 si_upload_const_buffer(sctx,
1146 (struct r600_resource**)&buffer, input->user_buffer,
1147 input->buffer_size, &buffer_offset);
1148 if (!buffer) {
1149 /* Just unbind on failure. */
1150 si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, NULL);
1151 return;
1152 }
1153 va = r600_resource(buffer)->gpu_address + buffer_offset;
1154 } else {
1155 pipe_resource_reference(&buffer, input->buffer);
1156 va = r600_resource(buffer)->gpu_address + input->buffer_offset;
1157 /* Only track usage for non-user buffers. */
1158 r600_resource(buffer)->bind_history |= PIPE_BIND_CONSTANT_BUFFER;
1159 }
1160
1161 /* Set the descriptor. */
1162 uint32_t *desc = descs->list + slot*4;
1163 desc[0] = va;
1164 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1165 S_008F04_STRIDE(0);
1166 desc[2] = input->buffer_size;
1167 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1168 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1169 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1170 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1171 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1172 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1173
1174 buffers->buffers[slot] = buffer;
1175 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1176 (struct r600_resource*)buffer,
1177 buffers->shader_usage_constbuf,
1178 buffers->priority_constbuf, true);
1179 buffers->enabled_mask |= 1u << slot;
1180 } else {
1181 /* Clear the descriptor. */
1182 memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
1183 buffers->enabled_mask &= ~(1u << slot);
1184 }
1185
1186 sctx->descriptors_dirty |= 1u << descriptors_idx;
1187 }
1188
1189 void si_set_rw_buffer(struct si_context *sctx,
1190 uint slot, const struct pipe_constant_buffer *input)
1191 {
1192 si_set_constant_buffer(sctx, &sctx->rw_buffers,
1193 SI_DESCS_RW_BUFFERS, slot, input);
1194 }
1195
1196 static void si_pipe_set_constant_buffer(struct pipe_context *ctx,
1197 enum pipe_shader_type shader, uint slot,
1198 const struct pipe_constant_buffer *input)
1199 {
1200 struct si_context *sctx = (struct si_context *)ctx;
1201
1202 if (shader >= SI_NUM_SHADERS)
1203 return;
1204
1205 slot = si_get_constbuf_slot(slot);
1206 si_set_constant_buffer(sctx, &sctx->const_and_shader_buffers[shader],
1207 si_const_and_shader_buffer_descriptors_idx(shader),
1208 slot, input);
1209 }
1210
1211 void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader,
1212 uint slot, struct pipe_constant_buffer *cbuf)
1213 {
1214 cbuf->user_buffer = NULL;
1215 si_get_buffer_from_descriptors(
1216 &sctx->const_and_shader_buffers[shader],
1217 si_const_and_shader_buffer_descriptors(sctx, shader),
1218 si_get_constbuf_slot(slot),
1219 &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size);
1220 }
1221
1222 /* SHADER BUFFERS */
1223
1224 static void si_set_shader_buffers(struct pipe_context *ctx,
1225 enum pipe_shader_type shader,
1226 unsigned start_slot, unsigned count,
1227 const struct pipe_shader_buffer *sbuffers)
1228 {
1229 struct si_context *sctx = (struct si_context *)ctx;
1230 struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
1231 struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
1232 unsigned i;
1233
1234 assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
1235
1236 for (i = 0; i < count; ++i) {
1237 const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
1238 struct r600_resource *buf;
1239 unsigned slot = si_get_shaderbuf_slot(start_slot + i);
1240 uint32_t *desc = descs->list + slot * 4;
1241 uint64_t va;
1242
1243 if (!sbuffer || !sbuffer->buffer) {
1244 pipe_resource_reference(&buffers->buffers[slot], NULL);
1245 memset(desc, 0, sizeof(uint32_t) * 4);
1246 buffers->enabled_mask &= ~(1u << slot);
1247 sctx->descriptors_dirty |=
1248 1u << si_const_and_shader_buffer_descriptors_idx(shader);
1249 continue;
1250 }
1251
1252 buf = (struct r600_resource *)sbuffer->buffer;
1253 va = buf->gpu_address + sbuffer->buffer_offset;
1254
1255 desc[0] = va;
1256 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1257 S_008F04_STRIDE(0);
1258 desc[2] = sbuffer->buffer_size;
1259 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1260 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1261 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1262 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1263 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1264 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1265
1266 pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
1267 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, buf,
1268 buffers->shader_usage,
1269 buffers->priority, true);
1270 buf->bind_history |= PIPE_BIND_SHADER_BUFFER;
1271
1272 buffers->enabled_mask |= 1u << slot;
1273 sctx->descriptors_dirty |=
1274 1u << si_const_and_shader_buffer_descriptors_idx(shader);
1275
1276 util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset,
1277 sbuffer->buffer_offset + sbuffer->buffer_size);
1278 }
1279 }
1280
1281 void si_get_shader_buffers(struct si_context *sctx,
1282 enum pipe_shader_type shader,
1283 uint start_slot, uint count,
1284 struct pipe_shader_buffer *sbuf)
1285 {
1286 struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
1287 struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
1288
1289 for (unsigned i = 0; i < count; ++i) {
1290 si_get_buffer_from_descriptors(
1291 buffers, descs,
1292 si_get_shaderbuf_slot(start_slot + i),
1293 &sbuf[i].buffer, &sbuf[i].buffer_offset,
1294 &sbuf[i].buffer_size);
1295 }
1296 }
1297
1298 /* RING BUFFERS */
1299
1300 void si_set_ring_buffer(struct pipe_context *ctx, uint slot,
1301 struct pipe_resource *buffer,
1302 unsigned stride, unsigned num_records,
1303 bool add_tid, bool swizzle,
1304 unsigned element_size, unsigned index_stride, uint64_t offset)
1305 {
1306 struct si_context *sctx = (struct si_context *)ctx;
1307 struct si_buffer_resources *buffers = &sctx->rw_buffers;
1308 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1309
1310 /* The stride field in the resource descriptor has 14 bits */
1311 assert(stride < (1 << 14));
1312
1313 assert(slot < descs->num_elements);
1314 pipe_resource_reference(&buffers->buffers[slot], NULL);
1315
1316 if (buffer) {
1317 uint64_t va;
1318
1319 va = r600_resource(buffer)->gpu_address + offset;
1320
1321 switch (element_size) {
1322 default:
1323 assert(!"Unsupported ring buffer element size");
1324 case 0:
1325 case 2:
1326 element_size = 0;
1327 break;
1328 case 4:
1329 element_size = 1;
1330 break;
1331 case 8:
1332 element_size = 2;
1333 break;
1334 case 16:
1335 element_size = 3;
1336 break;
1337 }
1338
1339 switch (index_stride) {
1340 default:
1341 assert(!"Unsupported ring buffer index stride");
1342 case 0:
1343 case 8:
1344 index_stride = 0;
1345 break;
1346 case 16:
1347 index_stride = 1;
1348 break;
1349 case 32:
1350 index_stride = 2;
1351 break;
1352 case 64:
1353 index_stride = 3;
1354 break;
1355 }
1356
1357 if (sctx->b.chip_class >= VI && stride)
1358 num_records *= stride;
1359
1360 /* Set the descriptor. */
1361 uint32_t *desc = descs->list + slot*4;
1362 desc[0] = va;
1363 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1364 S_008F04_STRIDE(stride) |
1365 S_008F04_SWIZZLE_ENABLE(swizzle);
1366 desc[2] = num_records;
1367 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1368 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1369 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1370 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1371 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1372 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1373 S_008F0C_INDEX_STRIDE(index_stride) |
1374 S_008F0C_ADD_TID_ENABLE(add_tid);
1375
1376 if (sctx->b.chip_class >= GFX9)
1377 assert(!swizzle || element_size == 1); /* always 4 bytes on GFX9 */
1378 else
1379 desc[3] |= S_008F0C_ELEMENT_SIZE(element_size);
1380
1381 pipe_resource_reference(&buffers->buffers[slot], buffer);
1382 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1383 (struct r600_resource*)buffer,
1384 buffers->shader_usage, buffers->priority);
1385 buffers->enabled_mask |= 1u << slot;
1386 } else {
1387 /* Clear the descriptor. */
1388 memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
1389 buffers->enabled_mask &= ~(1u << slot);
1390 }
1391
1392 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
1393 }
1394
1395 static void si_desc_reset_buffer_offset(struct pipe_context *ctx,
1396 uint32_t *desc, uint64_t old_buf_va,
1397 struct pipe_resource *new_buf)
1398 {
1399 /* Retrieve the buffer offset from the descriptor. */
1400 uint64_t old_desc_va = si_desc_extract_buffer_address(desc);
1401
1402 assert(old_buf_va <= old_desc_va);
1403 uint64_t offset_within_buffer = old_desc_va - old_buf_va;
1404
1405 /* Update the descriptor. */
1406 si_set_buf_desc_address(r600_resource(new_buf), offset_within_buffer,
1407 desc);
1408 }
1409
1410 /* INTERNAL CONST BUFFERS */
1411
1412 static void si_set_polygon_stipple(struct pipe_context *ctx,
1413 const struct pipe_poly_stipple *state)
1414 {
1415 struct si_context *sctx = (struct si_context *)ctx;
1416 struct pipe_constant_buffer cb = {};
1417 unsigned stipple[32];
1418 int i;
1419
1420 for (i = 0; i < 32; i++)
1421 stipple[i] = util_bitreverse(state->stipple[i]);
1422
1423 cb.user_buffer = stipple;
1424 cb.buffer_size = sizeof(stipple);
1425
1426 si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &cb);
1427 }
1428
1429 /* TEXTURE METADATA ENABLE/DISABLE */
1430
1431 static void
1432 si_resident_handles_update_needs_color_decompress(struct si_context *sctx)
1433 {
1434 util_dynarray_clear(&sctx->resident_tex_needs_color_decompress);
1435 util_dynarray_clear(&sctx->resident_img_needs_color_decompress);
1436
1437 util_dynarray_foreach(&sctx->resident_tex_handles,
1438 struct si_texture_handle *, tex_handle) {
1439 struct pipe_resource *res = (*tex_handle)->view->texture;
1440 struct r600_texture *rtex;
1441
1442 if (!res || res->target == PIPE_BUFFER)
1443 continue;
1444
1445 rtex = (struct r600_texture *)res;
1446 if (!color_needs_decompression(rtex))
1447 continue;
1448
1449 util_dynarray_append(&sctx->resident_tex_needs_color_decompress,
1450 struct si_texture_handle *, *tex_handle);
1451 }
1452
1453 util_dynarray_foreach(&sctx->resident_img_handles,
1454 struct si_image_handle *, img_handle) {
1455 struct pipe_image_view *view = &(*img_handle)->view;
1456 struct pipe_resource *res = view->resource;
1457 struct r600_texture *rtex;
1458
1459 if (!res || res->target == PIPE_BUFFER)
1460 continue;
1461
1462 rtex = (struct r600_texture *)res;
1463 if (!color_needs_decompression(rtex))
1464 continue;
1465
1466 util_dynarray_append(&sctx->resident_img_needs_color_decompress,
1467 struct si_image_handle *, *img_handle);
1468 }
1469 }
1470
1471 /* CMASK can be enabled (for fast clear) and disabled (for texture export)
1472 * while the texture is bound, possibly by a different context. In that case,
1473 * call this function to update needs_*_decompress_masks.
1474 */
1475 void si_update_needs_color_decompress_masks(struct si_context *sctx)
1476 {
1477 for (int i = 0; i < SI_NUM_SHADERS; ++i) {
1478 si_samplers_update_needs_color_decompress_mask(&sctx->samplers[i]);
1479 si_images_update_needs_color_decompress_mask(&sctx->images[i]);
1480 si_update_shader_needs_decompress_mask(sctx, i);
1481 }
1482
1483 si_resident_handles_update_needs_color_decompress(sctx);
1484 }
1485
1486 /* BUFFER DISCARD/INVALIDATION */
1487
1488 /** Reset descriptors of buffer resources after \p buf has been invalidated. */
1489 static void si_reset_buffer_resources(struct si_context *sctx,
1490 struct si_buffer_resources *buffers,
1491 unsigned descriptors_idx,
1492 unsigned slot_mask,
1493 struct pipe_resource *buf,
1494 uint64_t old_va,
1495 enum radeon_bo_usage usage,
1496 enum radeon_bo_priority priority)
1497 {
1498 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1499 unsigned mask = buffers->enabled_mask & slot_mask;
1500
1501 while (mask) {
1502 unsigned i = u_bit_scan(&mask);
1503 if (buffers->buffers[i] == buf) {
1504 si_desc_reset_buffer_offset(&sctx->b.b,
1505 descs->list + i*4,
1506 old_va, buf);
1507 sctx->descriptors_dirty |= 1u << descriptors_idx;
1508
1509 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1510 (struct r600_resource *)buf,
1511 usage, priority, true);
1512 }
1513 }
1514 }
1515
1516 static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf,
1517 uint64_t old_va)
1518 {
1519 struct si_context *sctx = (struct si_context*)ctx;
1520 struct r600_resource *rbuffer = r600_resource(buf);
1521 unsigned i, shader;
1522 unsigned num_elems = sctx->vertex_elements ?
1523 sctx->vertex_elements->count : 0;
1524
1525 /* We changed the buffer, now we need to bind it where the old one
1526 * was bound. This consists of 2 things:
1527 * 1) Updating the resource descriptor and dirtying it.
1528 * 2) Adding a relocation to the CS, so that it's usable.
1529 */
1530
1531 /* Vertex buffers. */
1532 if (rbuffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
1533 for (i = 0; i < num_elems; i++) {
1534 int vb = sctx->vertex_elements->vertex_buffer_index[i];
1535
1536 if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
1537 continue;
1538 if (!sctx->vertex_buffer[vb].buffer.resource)
1539 continue;
1540
1541 if (sctx->vertex_buffer[vb].buffer.resource == buf) {
1542 sctx->vertex_buffers_dirty = true;
1543 break;
1544 }
1545 }
1546 }
1547
1548 /* Streamout buffers. (other internal buffers can't be invalidated) */
1549 if (rbuffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
1550 for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
1551 struct si_buffer_resources *buffers = &sctx->rw_buffers;
1552 struct si_descriptors *descs =
1553 &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1554
1555 if (buffers->buffers[i] != buf)
1556 continue;
1557
1558 si_desc_reset_buffer_offset(ctx, descs->list + i*4,
1559 old_va, buf);
1560 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
1561
1562 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1563 rbuffer, buffers->shader_usage,
1564 RADEON_PRIO_SHADER_RW_BUFFER,
1565 true);
1566
1567 /* Update the streamout state. */
1568 if (sctx->streamout.begin_emitted)
1569 si_emit_streamout_end(sctx);
1570 sctx->streamout.append_bitmask =
1571 sctx->streamout.enabled_mask;
1572 si_streamout_buffers_dirty(sctx);
1573 }
1574 }
1575
1576 /* Constant and shader buffers. */
1577 if (rbuffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
1578 for (shader = 0; shader < SI_NUM_SHADERS; shader++)
1579 si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
1580 si_const_and_shader_buffer_descriptors_idx(shader),
1581 u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
1582 buf, old_va,
1583 sctx->const_and_shader_buffers[shader].shader_usage_constbuf,
1584 sctx->const_and_shader_buffers[shader].priority_constbuf);
1585 }
1586
1587 if (rbuffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
1588 for (shader = 0; shader < SI_NUM_SHADERS; shader++)
1589 si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
1590 si_const_and_shader_buffer_descriptors_idx(shader),
1591 u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS),
1592 buf, old_va,
1593 sctx->const_and_shader_buffers[shader].shader_usage,
1594 sctx->const_and_shader_buffers[shader].priority);
1595 }
1596
1597 if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
1598 /* Texture buffers - update bindings. */
1599 for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
1600 struct si_samplers *samplers = &sctx->samplers[shader];
1601 struct si_descriptors *descs =
1602 si_sampler_and_image_descriptors(sctx, shader);
1603 unsigned mask = samplers->enabled_mask;
1604
1605 while (mask) {
1606 unsigned i = u_bit_scan(&mask);
1607 if (samplers->views[i]->texture == buf) {
1608 unsigned desc_slot = si_get_sampler_slot(i);
1609
1610 si_desc_reset_buffer_offset(ctx,
1611 descs->list +
1612 desc_slot * 16 + 4,
1613 old_va, buf);
1614 sctx->descriptors_dirty |=
1615 1u << si_sampler_and_image_descriptors_idx(shader);
1616
1617 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1618 rbuffer, RADEON_USAGE_READ,
1619 RADEON_PRIO_SAMPLER_BUFFER,
1620 true);
1621 }
1622 }
1623 }
1624 }
1625
1626 /* Shader images */
1627 if (rbuffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
1628 for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
1629 struct si_images *images = &sctx->images[shader];
1630 struct si_descriptors *descs =
1631 si_sampler_and_image_descriptors(sctx, shader);
1632 unsigned mask = images->enabled_mask;
1633
1634 while (mask) {
1635 unsigned i = u_bit_scan(&mask);
1636
1637 if (images->views[i].resource == buf) {
1638 unsigned desc_slot = si_get_image_slot(i);
1639
1640 if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
1641 si_mark_image_range_valid(&images->views[i]);
1642
1643 si_desc_reset_buffer_offset(
1644 ctx, descs->list + desc_slot * 8 + 4,
1645 old_va, buf);
1646 sctx->descriptors_dirty |=
1647 1u << si_sampler_and_image_descriptors_idx(shader);
1648
1649 radeon_add_to_buffer_list_check_mem(
1650 &sctx->b, &sctx->b.gfx, rbuffer,
1651 RADEON_USAGE_READWRITE,
1652 RADEON_PRIO_SAMPLER_BUFFER, true);
1653 }
1654 }
1655 }
1656 }
1657
1658 /* Bindless texture handles */
1659 if (rbuffer->texture_handle_allocated) {
1660 struct si_descriptors *descs = &sctx->bindless_descriptors;
1661
1662 util_dynarray_foreach(&sctx->resident_tex_handles,
1663 struct si_texture_handle *, tex_handle) {
1664 struct pipe_sampler_view *view = (*tex_handle)->view;
1665 unsigned desc_slot = (*tex_handle)->desc_slot;
1666
1667 if (view->texture == buf) {
1668 si_set_buf_desc_address(rbuffer,
1669 view->u.buf.offset,
1670 descs->list +
1671 desc_slot * 16 + 4);
1672
1673 (*tex_handle)->desc_dirty = true;
1674 sctx->bindless_descriptors_dirty = true;
1675
1676 radeon_add_to_buffer_list_check_mem(
1677 &sctx->b, &sctx->b.gfx, rbuffer,
1678 RADEON_USAGE_READ,
1679 RADEON_PRIO_SAMPLER_BUFFER, true);
1680 }
1681 }
1682 }
1683
1684 /* Bindless image handles */
1685 if (rbuffer->image_handle_allocated) {
1686 struct si_descriptors *descs = &sctx->bindless_descriptors;
1687
1688 util_dynarray_foreach(&sctx->resident_img_handles,
1689 struct si_image_handle *, img_handle) {
1690 struct pipe_image_view *view = &(*img_handle)->view;
1691 unsigned desc_slot = (*img_handle)->desc_slot;
1692
1693 if (view->resource == buf) {
1694 if (view->access & PIPE_IMAGE_ACCESS_WRITE)
1695 si_mark_image_range_valid(view);
1696
1697 si_set_buf_desc_address(rbuffer,
1698 view->u.buf.offset,
1699 descs->list +
1700 desc_slot * 16 + 4);
1701
1702 (*img_handle)->desc_dirty = true;
1703 sctx->bindless_descriptors_dirty = true;
1704
1705 radeon_add_to_buffer_list_check_mem(
1706 &sctx->b, &sctx->b.gfx, rbuffer,
1707 RADEON_USAGE_READWRITE,
1708 RADEON_PRIO_SAMPLER_BUFFER, true);
1709 }
1710 }
1711 }
1712 }
1713
1714 /* Reallocate a buffer a update all resource bindings where the buffer is
1715 * bound.
1716 *
1717 * This is used to avoid CPU-GPU synchronizations, because it makes the buffer
1718 * idle by discarding its contents. Apps usually tell us when to do this using
1719 * map_buffer flags, for example.
1720 */
1721 static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf)
1722 {
1723 struct si_context *sctx = (struct si_context*)ctx;
1724 struct r600_resource *rbuffer = r600_resource(buf);
1725 uint64_t old_va = rbuffer->gpu_address;
1726
1727 /* Reallocate the buffer in the same pipe_resource. */
1728 si_alloc_resource(&sctx->screen->b, rbuffer);
1729
1730 si_rebind_buffer(ctx, buf, old_va);
1731 }
1732
1733 static void si_upload_bindless_descriptor(struct si_context *sctx,
1734 unsigned desc_slot,
1735 unsigned num_dwords)
1736 {
1737 struct si_descriptors *desc = &sctx->bindless_descriptors;
1738 struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
1739 unsigned desc_slot_offset = desc_slot * 16;
1740 uint32_t *data;
1741 uint64_t va;
1742
1743 data = desc->list + desc_slot_offset;
1744 va = desc->gpu_address + desc_slot_offset * 4;
1745
1746 radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + num_dwords, 0));
1747 radeon_emit(cs, S_370_DST_SEL(V_370_TC_L2) |
1748 S_370_WR_CONFIRM(1) |
1749 S_370_ENGINE_SEL(V_370_ME));
1750 radeon_emit(cs, va);
1751 radeon_emit(cs, va >> 32);
1752 radeon_emit_array(cs, data, num_dwords);
1753 }
1754
1755 static void si_upload_bindless_descriptors(struct si_context *sctx)
1756 {
1757 if (!sctx->bindless_descriptors_dirty)
1758 return;
1759
1760 /* Wait for graphics/compute to be idle before updating the resident
1761 * descriptors directly in memory, in case the GPU is using them.
1762 */
1763 sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
1764 SI_CONTEXT_CS_PARTIAL_FLUSH;
1765 si_emit_cache_flush(sctx);
1766
1767 util_dynarray_foreach(&sctx->resident_tex_handles,
1768 struct si_texture_handle *, tex_handle) {
1769 unsigned desc_slot = (*tex_handle)->desc_slot;
1770
1771 if (!(*tex_handle)->desc_dirty)
1772 continue;
1773
1774 si_upload_bindless_descriptor(sctx, desc_slot, 16);
1775 (*tex_handle)->desc_dirty = false;
1776 }
1777
1778 util_dynarray_foreach(&sctx->resident_img_handles,
1779 struct si_image_handle *, img_handle) {
1780 unsigned desc_slot = (*img_handle)->desc_slot;
1781
1782 if (!(*img_handle)->desc_dirty)
1783 continue;
1784
1785 si_upload_bindless_descriptor(sctx, desc_slot, 8);
1786 (*img_handle)->desc_dirty = false;
1787 }
1788
1789 /* Invalidate L1 because it doesn't know that L2 changed. */
1790 sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1;
1791 si_emit_cache_flush(sctx);
1792
1793 sctx->bindless_descriptors_dirty = false;
1794 }
1795
1796 /* Update mutable image descriptor fields of all resident textures. */
1797 static void si_update_bindless_texture_descriptor(struct si_context *sctx,
1798 struct si_texture_handle *tex_handle)
1799 {
1800 struct si_sampler_view *sview = (struct si_sampler_view *)tex_handle->view;
1801 struct si_descriptors *desc = &sctx->bindless_descriptors;
1802 unsigned desc_slot_offset = tex_handle->desc_slot * 16;
1803 uint32_t desc_list[16];
1804
1805 if (sview->base.texture->target == PIPE_BUFFER)
1806 return;
1807
1808 memcpy(desc_list, desc->list + desc_slot_offset, sizeof(desc_list));
1809 si_set_sampler_view_desc(sctx, sview, &tex_handle->sstate,
1810 desc->list + desc_slot_offset);
1811
1812 if (memcmp(desc_list, desc->list + desc_slot_offset,
1813 sizeof(desc_list))) {
1814 tex_handle->desc_dirty = true;
1815 sctx->bindless_descriptors_dirty = true;
1816 }
1817 }
1818
1819 static void si_update_bindless_image_descriptor(struct si_context *sctx,
1820 struct si_image_handle *img_handle)
1821 {
1822 struct si_descriptors *desc = &sctx->bindless_descriptors;
1823 unsigned desc_slot_offset = img_handle->desc_slot * 16;
1824 struct pipe_image_view *view = &img_handle->view;
1825 uint32_t desc_list[8];
1826
1827 if (view->resource->target == PIPE_BUFFER)
1828 return;
1829
1830 memcpy(desc_list, desc->list + desc_slot_offset,
1831 sizeof(desc_list));
1832 si_set_shader_image_desc(sctx, view, true,
1833 desc->list + desc_slot_offset);
1834
1835 if (memcmp(desc_list, desc->list + desc_slot_offset,
1836 sizeof(desc_list))) {
1837 img_handle->desc_dirty = true;
1838 sctx->bindless_descriptors_dirty = true;
1839 }
1840 }
1841
1842 static void si_update_all_resident_texture_descriptors(struct si_context *sctx)
1843 {
1844 util_dynarray_foreach(&sctx->resident_tex_handles,
1845 struct si_texture_handle *, tex_handle) {
1846 si_update_bindless_texture_descriptor(sctx, *tex_handle);
1847 }
1848
1849 util_dynarray_foreach(&sctx->resident_img_handles,
1850 struct si_image_handle *, img_handle) {
1851 si_update_bindless_image_descriptor(sctx, *img_handle);
1852 }
1853
1854 si_upload_bindless_descriptors(sctx);
1855 }
1856
1857 /* Update mutable image descriptor fields of all bound textures. */
1858 void si_update_all_texture_descriptors(struct si_context *sctx)
1859 {
1860 unsigned shader;
1861
1862 for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
1863 struct si_samplers *samplers = &sctx->samplers[shader];
1864 struct si_images *images = &sctx->images[shader];
1865 unsigned mask;
1866
1867 /* Images. */
1868 mask = images->enabled_mask;
1869 while (mask) {
1870 unsigned i = u_bit_scan(&mask);
1871 struct pipe_image_view *view = &images->views[i];
1872
1873 if (!view->resource ||
1874 view->resource->target == PIPE_BUFFER)
1875 continue;
1876
1877 si_set_shader_image(sctx, shader, i, view, true);
1878 }
1879
1880 /* Sampler views. */
1881 mask = samplers->enabled_mask;
1882 while (mask) {
1883 unsigned i = u_bit_scan(&mask);
1884 struct pipe_sampler_view *view = samplers->views[i];
1885
1886 if (!view ||
1887 !view->texture ||
1888 view->texture->target == PIPE_BUFFER)
1889 continue;
1890
1891 si_set_sampler_view(sctx, shader, i,
1892 samplers->views[i], true);
1893 }
1894
1895 si_update_shader_needs_decompress_mask(sctx, shader);
1896 }
1897
1898 si_update_all_resident_texture_descriptors(sctx);
1899 }
1900
1901 /* SHADER USER DATA */
1902
1903 static void si_mark_shader_pointers_dirty(struct si_context *sctx,
1904 unsigned shader)
1905 {
1906 sctx->shader_pointers_dirty |=
1907 u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS,
1908 SI_NUM_SHADER_DESCS);
1909
1910 if (shader == PIPE_SHADER_VERTEX)
1911 sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
1912
1913 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
1914 }
1915
1916 static void si_shader_pointers_begin_new_cs(struct si_context *sctx)
1917 {
1918 sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
1919 sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
1920 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
1921 sctx->graphics_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
1922 sctx->compute_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
1923 }
1924
1925 /* Set a base register address for user data constants in the given shader.
1926 * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*.
1927 */
1928 static void si_set_user_data_base(struct si_context *sctx,
1929 unsigned shader, uint32_t new_base)
1930 {
1931 uint32_t *base = &sctx->shader_pointers.sh_base[shader];
1932
1933 if (*base != new_base) {
1934 *base = new_base;
1935
1936 if (new_base) {
1937 si_mark_shader_pointers_dirty(sctx, shader);
1938
1939 if (shader == PIPE_SHADER_VERTEX)
1940 sctx->last_vs_state = ~0;
1941 }
1942 }
1943 }
1944
1945 /* This must be called when these shaders are changed from non-NULL to NULL
1946 * and vice versa:
1947 * - geometry shader
1948 * - tessellation control shader
1949 * - tessellation evaluation shader
1950 */
1951 void si_shader_change_notify(struct si_context *sctx)
1952 {
1953 /* VS can be bound as VS, ES, or LS. */
1954 if (sctx->tes_shader.cso) {
1955 if (sctx->b.chip_class >= GFX9) {
1956 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1957 R_00B430_SPI_SHADER_USER_DATA_LS_0);
1958 } else {
1959 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1960 R_00B530_SPI_SHADER_USER_DATA_LS_0);
1961 }
1962 } else if (sctx->gs_shader.cso) {
1963 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1964 R_00B330_SPI_SHADER_USER_DATA_ES_0);
1965 } else {
1966 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1967 R_00B130_SPI_SHADER_USER_DATA_VS_0);
1968 }
1969
1970 /* TES can be bound as ES, VS, or not bound. */
1971 if (sctx->tes_shader.cso) {
1972 if (sctx->gs_shader.cso)
1973 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
1974 R_00B330_SPI_SHADER_USER_DATA_ES_0);
1975 else
1976 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
1977 R_00B130_SPI_SHADER_USER_DATA_VS_0);
1978 } else {
1979 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0);
1980 }
1981 }
1982
1983 static void si_emit_shader_pointer_head(struct radeon_winsys_cs *cs,
1984 struct si_descriptors *desc,
1985 unsigned sh_base,
1986 unsigned pointer_count)
1987 {
1988 radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * 2, 0));
1989 radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2);
1990 }
1991
1992 static void si_emit_shader_pointer_body(struct radeon_winsys_cs *cs,
1993 struct si_descriptors *desc)
1994 {
1995 uint64_t va = desc->gpu_address;
1996
1997 radeon_emit(cs, va);
1998 radeon_emit(cs, va >> 32);
1999 }
2000
2001 static void si_emit_shader_pointer(struct si_context *sctx,
2002 struct si_descriptors *desc,
2003 unsigned sh_base)
2004 {
2005 struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2006
2007 si_emit_shader_pointer_head(cs, desc, sh_base, 1);
2008 si_emit_shader_pointer_body(cs, desc);
2009 }
2010
2011 static void si_emit_consecutive_shader_pointers(struct si_context *sctx,
2012 unsigned pointer_mask,
2013 unsigned sh_base)
2014 {
2015 if (!sh_base)
2016 return;
2017
2018 struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2019 unsigned mask = sctx->shader_pointers_dirty & pointer_mask;
2020
2021 while (mask) {
2022 int start, count;
2023 u_bit_scan_consecutive_range(&mask, &start, &count);
2024
2025 struct si_descriptors *descs = &sctx->descriptors[start];
2026
2027 si_emit_shader_pointer_head(cs, descs, sh_base, count);
2028 for (int i = 0; i < count; i++)
2029 si_emit_shader_pointer_body(cs, descs + i);
2030 }
2031 }
2032
2033 static void si_emit_global_shader_pointers(struct si_context *sctx,
2034 struct si_descriptors *descs)
2035 {
2036 if (sctx->b.chip_class == GFX9) {
2037 /* Broadcast it to all shader stages. */
2038 si_emit_shader_pointer(sctx, descs,
2039 R_00B530_SPI_SHADER_USER_DATA_COMMON_0);
2040 return;
2041 }
2042
2043 si_emit_shader_pointer(sctx, descs,
2044 R_00B030_SPI_SHADER_USER_DATA_PS_0);
2045 si_emit_shader_pointer(sctx, descs,
2046 R_00B130_SPI_SHADER_USER_DATA_VS_0);
2047 si_emit_shader_pointer(sctx, descs,
2048 R_00B330_SPI_SHADER_USER_DATA_ES_0);
2049 si_emit_shader_pointer(sctx, descs,
2050 R_00B230_SPI_SHADER_USER_DATA_GS_0);
2051 si_emit_shader_pointer(sctx, descs,
2052 R_00B430_SPI_SHADER_USER_DATA_HS_0);
2053 si_emit_shader_pointer(sctx, descs,
2054 R_00B530_SPI_SHADER_USER_DATA_LS_0);
2055 }
2056
2057 void si_emit_graphics_shader_pointers(struct si_context *sctx,
2058 struct r600_atom *atom)
2059 {
2060 uint32_t *sh_base = sctx->shader_pointers.sh_base;
2061
2062 if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) {
2063 si_emit_global_shader_pointers(sctx,
2064 &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
2065 }
2066
2067 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX),
2068 sh_base[PIPE_SHADER_VERTEX]);
2069 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
2070 sh_base[PIPE_SHADER_TESS_CTRL]);
2071 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_EVAL),
2072 sh_base[PIPE_SHADER_TESS_EVAL]);
2073 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
2074 sh_base[PIPE_SHADER_GEOMETRY]);
2075 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT),
2076 sh_base[PIPE_SHADER_FRAGMENT]);
2077
2078 sctx->shader_pointers_dirty &=
2079 ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
2080
2081 if (sctx->vertex_buffer_pointer_dirty) {
2082 si_emit_shader_pointer(sctx, &sctx->vertex_buffers,
2083 sh_base[PIPE_SHADER_VERTEX]);
2084 sctx->vertex_buffer_pointer_dirty = false;
2085 }
2086
2087 if (sctx->graphics_bindless_pointer_dirty) {
2088 si_emit_global_shader_pointers(sctx,
2089 &sctx->bindless_descriptors);
2090 sctx->graphics_bindless_pointer_dirty = false;
2091 }
2092 }
2093
2094 void si_emit_compute_shader_pointers(struct si_context *sctx)
2095 {
2096 unsigned base = R_00B900_COMPUTE_USER_DATA_0;
2097
2098 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE),
2099 R_00B900_COMPUTE_USER_DATA_0);
2100 sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE);
2101
2102 if (sctx->compute_bindless_pointer_dirty) {
2103 si_emit_shader_pointer(sctx, &sctx->bindless_descriptors, base);
2104 sctx->compute_bindless_pointer_dirty = false;
2105 }
2106 }
2107
2108 /* BINDLESS */
2109
2110 static void si_init_bindless_descriptors(struct si_context *sctx,
2111 struct si_descriptors *desc,
2112 unsigned shader_userdata_index,
2113 unsigned num_elements)
2114 {
2115 MAYBE_UNUSED unsigned desc_slot;
2116
2117 si_init_descriptors(desc, shader_userdata_index, 16, num_elements);
2118 sctx->bindless_descriptors.num_active_slots = num_elements;
2119
2120 /* The first bindless descriptor is stored at slot 1, because 0 is not
2121 * considered to be a valid handle.
2122 */
2123 sctx->num_bindless_descriptors = 1;
2124
2125 /* Track which bindless slots are used (or not). */
2126 util_idalloc_init(&sctx->bindless_used_slots);
2127 util_idalloc_resize(&sctx->bindless_used_slots, num_elements);
2128
2129 /* Reserve slot 0 because it's an invalid handle for bindless. */
2130 desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots);
2131 assert(desc_slot == 0);
2132 }
2133
2134 static void si_release_bindless_descriptors(struct si_context *sctx)
2135 {
2136 si_release_descriptors(&sctx->bindless_descriptors);
2137 util_idalloc_fini(&sctx->bindless_used_slots);
2138 }
2139
2140 static unsigned si_get_first_free_bindless_slot(struct si_context *sctx)
2141 {
2142 struct si_descriptors *desc = &sctx->bindless_descriptors;
2143 unsigned desc_slot;
2144
2145 desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots);
2146 if (desc_slot >= desc->num_elements) {
2147 /* The array of bindless descriptors is full, resize it. */
2148 unsigned slot_size = desc->element_dw_size * 4;
2149 unsigned new_num_elements = desc->num_elements * 2;
2150
2151 desc->list = REALLOC(desc->list, desc->num_elements * slot_size,
2152 new_num_elements * slot_size);
2153 desc->num_elements = new_num_elements;
2154 desc->num_active_slots = new_num_elements;
2155 }
2156
2157 assert(desc_slot);
2158 return desc_slot;
2159 }
2160
2161 static unsigned
2162 si_create_bindless_descriptor(struct si_context *sctx, uint32_t *desc_list,
2163 unsigned size)
2164 {
2165 struct si_descriptors *desc = &sctx->bindless_descriptors;
2166 unsigned desc_slot, desc_slot_offset;
2167
2168 /* Find a free slot. */
2169 desc_slot = si_get_first_free_bindless_slot(sctx);
2170
2171 /* For simplicity, sampler and image bindless descriptors use fixed
2172 * 16-dword slots for now. Image descriptors only need 8-dword but this
2173 * doesn't really matter because no real apps use image handles.
2174 */
2175 desc_slot_offset = desc_slot * 16;
2176
2177 /* Copy the descriptor into the array. */
2178 memcpy(desc->list + desc_slot_offset, desc_list, size);
2179
2180 /* Re-upload the whole array of bindless descriptors into a new buffer.
2181 */
2182 if (!si_upload_descriptors(sctx, desc))
2183 return 0;
2184
2185 /* Make sure to re-emit the shader pointers for all stages. */
2186 sctx->graphics_bindless_pointer_dirty = true;
2187 sctx->compute_bindless_pointer_dirty = true;
2188
2189 return desc_slot;
2190 }
2191
2192 static void si_update_bindless_buffer_descriptor(struct si_context *sctx,
2193 unsigned desc_slot,
2194 struct pipe_resource *resource,
2195 uint64_t offset,
2196 bool *desc_dirty)
2197 {
2198 struct si_descriptors *desc = &sctx->bindless_descriptors;
2199 struct r600_resource *buf = r600_resource(resource);
2200 unsigned desc_slot_offset = desc_slot * 16;
2201 uint32_t *desc_list = desc->list + desc_slot_offset + 4;
2202 uint64_t old_desc_va;
2203
2204 assert(resource->target == PIPE_BUFFER);
2205
2206 /* Retrieve the old buffer addr from the descriptor. */
2207 old_desc_va = si_desc_extract_buffer_address(desc_list);
2208
2209 if (old_desc_va != buf->gpu_address + offset) {
2210 /* The buffer has been invalidated when the handle wasn't
2211 * resident, update the descriptor and the dirty flag.
2212 */
2213 si_set_buf_desc_address(buf, offset, &desc_list[0]);
2214
2215 *desc_dirty = true;
2216 }
2217 }
2218
2219 static uint64_t si_create_texture_handle(struct pipe_context *ctx,
2220 struct pipe_sampler_view *view,
2221 const struct pipe_sampler_state *state)
2222 {
2223 struct si_sampler_view *sview = (struct si_sampler_view *)view;
2224 struct si_context *sctx = (struct si_context *)ctx;
2225 struct si_texture_handle *tex_handle;
2226 struct si_sampler_state *sstate;
2227 uint32_t desc_list[16];
2228 uint64_t handle;
2229
2230 tex_handle = CALLOC_STRUCT(si_texture_handle);
2231 if (!tex_handle)
2232 return 0;
2233
2234 memset(desc_list, 0, sizeof(desc_list));
2235 si_init_descriptor_list(&desc_list[0], 16, 1, null_texture_descriptor);
2236
2237 sstate = ctx->create_sampler_state(ctx, state);
2238 if (!sstate) {
2239 FREE(tex_handle);
2240 return 0;
2241 }
2242
2243 si_set_sampler_view_desc(sctx, sview, sstate, &desc_list[0]);
2244 memcpy(&tex_handle->sstate, sstate, sizeof(*sstate));
2245 ctx->delete_sampler_state(ctx, sstate);
2246
2247 tex_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list,
2248 sizeof(desc_list));
2249 if (!tex_handle->desc_slot) {
2250 FREE(tex_handle);
2251 return 0;
2252 }
2253
2254 handle = tex_handle->desc_slot;
2255
2256 if (!_mesa_hash_table_insert(sctx->tex_handles, (void *)handle,
2257 tex_handle)) {
2258 FREE(tex_handle);
2259 return 0;
2260 }
2261
2262 pipe_sampler_view_reference(&tex_handle->view, view);
2263
2264 r600_resource(sview->base.texture)->texture_handle_allocated = true;
2265
2266 return handle;
2267 }
2268
2269 static void si_delete_texture_handle(struct pipe_context *ctx, uint64_t handle)
2270 {
2271 struct si_context *sctx = (struct si_context *)ctx;
2272 struct si_texture_handle *tex_handle;
2273 struct hash_entry *entry;
2274
2275 entry = _mesa_hash_table_search(sctx->tex_handles, (void *)handle);
2276 if (!entry)
2277 return;
2278
2279 tex_handle = (struct si_texture_handle *)entry->data;
2280
2281 /* Allow this descriptor slot to be re-used. */
2282 util_idalloc_free(&sctx->bindless_used_slots, tex_handle->desc_slot);
2283
2284 pipe_sampler_view_reference(&tex_handle->view, NULL);
2285 _mesa_hash_table_remove(sctx->tex_handles, entry);
2286 FREE(tex_handle);
2287 }
2288
2289 static void si_make_texture_handle_resident(struct pipe_context *ctx,
2290 uint64_t handle, bool resident)
2291 {
2292 struct si_context *sctx = (struct si_context *)ctx;
2293 struct si_texture_handle *tex_handle;
2294 struct si_sampler_view *sview;
2295 struct hash_entry *entry;
2296
2297 entry = _mesa_hash_table_search(sctx->tex_handles, (void *)handle);
2298 if (!entry)
2299 return;
2300
2301 tex_handle = (struct si_texture_handle *)entry->data;
2302 sview = (struct si_sampler_view *)tex_handle->view;
2303
2304 if (resident) {
2305 if (sview->base.texture->target != PIPE_BUFFER) {
2306 struct r600_texture *rtex =
2307 (struct r600_texture *)sview->base.texture;
2308
2309 if (depth_needs_decompression(rtex)) {
2310 util_dynarray_append(
2311 &sctx->resident_tex_needs_depth_decompress,
2312 struct si_texture_handle *,
2313 tex_handle);
2314 }
2315
2316 if (color_needs_decompression(rtex)) {
2317 util_dynarray_append(
2318 &sctx->resident_tex_needs_color_decompress,
2319 struct si_texture_handle *,
2320 tex_handle);
2321 }
2322
2323 if (rtex->dcc_offset &&
2324 p_atomic_read(&rtex->framebuffers_bound))
2325 sctx->need_check_render_feedback = true;
2326
2327 si_update_bindless_texture_descriptor(sctx, tex_handle);
2328 } else {
2329 si_update_bindless_buffer_descriptor(sctx,
2330 tex_handle->desc_slot,
2331 sview->base.texture,
2332 sview->base.u.buf.offset,
2333 &tex_handle->desc_dirty);
2334 }
2335
2336 /* Re-upload the descriptor if it has been updated while it
2337 * wasn't resident.
2338 */
2339 if (tex_handle->desc_dirty)
2340 sctx->bindless_descriptors_dirty = true;
2341
2342 /* Add the texture handle to the per-context list. */
2343 util_dynarray_append(&sctx->resident_tex_handles,
2344 struct si_texture_handle *, tex_handle);
2345
2346 /* Add the buffers to the current CS in case si_begin_new_cs()
2347 * is not going to be called.
2348 */
2349 si_sampler_view_add_buffer(sctx, sview->base.texture,
2350 RADEON_USAGE_READ,
2351 sview->is_stencil_sampler, false);
2352 } else {
2353 /* Remove the texture handle from the per-context list. */
2354 util_dynarray_delete_unordered(&sctx->resident_tex_handles,
2355 struct si_texture_handle *,
2356 tex_handle);
2357
2358 if (sview->base.texture->target != PIPE_BUFFER) {
2359 util_dynarray_delete_unordered(
2360 &sctx->resident_tex_needs_depth_decompress,
2361 struct si_texture_handle *, tex_handle);
2362
2363 util_dynarray_delete_unordered(
2364 &sctx->resident_tex_needs_color_decompress,
2365 struct si_texture_handle *, tex_handle);
2366 }
2367 }
2368 }
2369
2370 static uint64_t si_create_image_handle(struct pipe_context *ctx,
2371 const struct pipe_image_view *view)
2372 {
2373 struct si_context *sctx = (struct si_context *)ctx;
2374 struct si_image_handle *img_handle;
2375 uint32_t desc_list[8];
2376 uint64_t handle;
2377
2378 if (!view || !view->resource)
2379 return 0;
2380
2381 img_handle = CALLOC_STRUCT(si_image_handle);
2382 if (!img_handle)
2383 return 0;
2384
2385 memset(desc_list, 0, sizeof(desc_list));
2386 si_init_descriptor_list(&desc_list[0], 8, 1, null_image_descriptor);
2387
2388 si_set_shader_image_desc(sctx, view, false, &desc_list[0]);
2389
2390 img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list,
2391 sizeof(desc_list));
2392 if (!img_handle->desc_slot) {
2393 FREE(img_handle);
2394 return 0;
2395 }
2396
2397 handle = img_handle->desc_slot;
2398
2399 if (!_mesa_hash_table_insert(sctx->img_handles, (void *)handle,
2400 img_handle)) {
2401 FREE(img_handle);
2402 return 0;
2403 }
2404
2405 util_copy_image_view(&img_handle->view, view);
2406
2407 r600_resource(view->resource)->image_handle_allocated = true;
2408
2409 return handle;
2410 }
2411
2412 static void si_delete_image_handle(struct pipe_context *ctx, uint64_t handle)
2413 {
2414 struct si_context *sctx = (struct si_context *)ctx;
2415 struct si_image_handle *img_handle;
2416 struct hash_entry *entry;
2417
2418 entry = _mesa_hash_table_search(sctx->img_handles, (void *)handle);
2419 if (!entry)
2420 return;
2421
2422 img_handle = (struct si_image_handle *)entry->data;
2423
2424 util_copy_image_view(&img_handle->view, NULL);
2425 _mesa_hash_table_remove(sctx->img_handles, entry);
2426 FREE(img_handle);
2427 }
2428
2429 static void si_make_image_handle_resident(struct pipe_context *ctx,
2430 uint64_t handle, unsigned access,
2431 bool resident)
2432 {
2433 struct si_context *sctx = (struct si_context *)ctx;
2434 struct si_image_handle *img_handle;
2435 struct pipe_image_view *view;
2436 struct r600_resource *res;
2437 struct hash_entry *entry;
2438
2439 entry = _mesa_hash_table_search(sctx->img_handles, (void *)handle);
2440 if (!entry)
2441 return;
2442
2443 img_handle = (struct si_image_handle *)entry->data;
2444 view = &img_handle->view;
2445 res = (struct r600_resource *)view->resource;
2446
2447 if (resident) {
2448 if (res->b.b.target != PIPE_BUFFER) {
2449 struct r600_texture *rtex = (struct r600_texture *)res;
2450 unsigned level = view->u.tex.level;
2451
2452 if (color_needs_decompression(rtex)) {
2453 util_dynarray_append(
2454 &sctx->resident_img_needs_color_decompress,
2455 struct si_image_handle *,
2456 img_handle);
2457 }
2458
2459 if (vi_dcc_enabled(rtex, level) &&
2460 p_atomic_read(&rtex->framebuffers_bound))
2461 sctx->need_check_render_feedback = true;
2462
2463 si_update_bindless_image_descriptor(sctx, img_handle);
2464 } else {
2465 si_update_bindless_buffer_descriptor(sctx,
2466 img_handle->desc_slot,
2467 view->resource,
2468 view->u.buf.offset,
2469 &img_handle->desc_dirty);
2470 }
2471
2472 /* Re-upload the descriptor if it has been updated while it
2473 * wasn't resident.
2474 */
2475 if (img_handle->desc_dirty)
2476 sctx->bindless_descriptors_dirty = true;
2477
2478 /* Add the image handle to the per-context list. */
2479 util_dynarray_append(&sctx->resident_img_handles,
2480 struct si_image_handle *, img_handle);
2481
2482 /* Add the buffers to the current CS in case si_begin_new_cs()
2483 * is not going to be called.
2484 */
2485 si_sampler_view_add_buffer(sctx, view->resource,
2486 (access & PIPE_IMAGE_ACCESS_WRITE) ?
2487 RADEON_USAGE_READWRITE :
2488 RADEON_USAGE_READ, false, false);
2489 } else {
2490 /* Remove the image handle from the per-context list. */
2491 util_dynarray_delete_unordered(&sctx->resident_img_handles,
2492 struct si_image_handle *,
2493 img_handle);
2494
2495 if (res->b.b.target != PIPE_BUFFER) {
2496 util_dynarray_delete_unordered(
2497 &sctx->resident_img_needs_color_decompress,
2498 struct si_image_handle *,
2499 img_handle);
2500 }
2501 }
2502 }
2503
2504
2505 void si_all_resident_buffers_begin_new_cs(struct si_context *sctx)
2506 {
2507 unsigned num_resident_tex_handles, num_resident_img_handles;
2508
2509 num_resident_tex_handles = sctx->resident_tex_handles.size /
2510 sizeof(struct si_texture_handle *);
2511 num_resident_img_handles = sctx->resident_img_handles.size /
2512 sizeof(struct si_image_handle *);
2513
2514 /* Add all resident texture handles. */
2515 util_dynarray_foreach(&sctx->resident_tex_handles,
2516 struct si_texture_handle *, tex_handle) {
2517 struct si_sampler_view *sview =
2518 (struct si_sampler_view *)(*tex_handle)->view;
2519
2520 si_sampler_view_add_buffer(sctx, sview->base.texture,
2521 RADEON_USAGE_READ,
2522 sview->is_stencil_sampler, false);
2523 }
2524
2525 /* Add all resident image handles. */
2526 util_dynarray_foreach(&sctx->resident_img_handles,
2527 struct si_image_handle *, img_handle) {
2528 struct pipe_image_view *view = &(*img_handle)->view;
2529
2530 si_sampler_view_add_buffer(sctx, view->resource,
2531 RADEON_USAGE_READWRITE,
2532 false, false);
2533 }
2534
2535 sctx->b.num_resident_handles += num_resident_tex_handles +
2536 num_resident_img_handles;
2537 }
2538
2539 /* INIT/DEINIT/UPLOAD */
2540
2541 void si_init_all_descriptors(struct si_context *sctx)
2542 {
2543 int i;
2544
2545 STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0);
2546 STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0);
2547
2548 for (i = 0; i < SI_NUM_SHADERS; i++) {
2549 bool gfx9_tcs = false;
2550 bool gfx9_gs = false;
2551 unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS;
2552 unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS;
2553 struct si_descriptors *desc;
2554
2555 if (sctx->b.chip_class >= GFX9) {
2556 gfx9_tcs = i == PIPE_SHADER_TESS_CTRL;
2557 gfx9_gs = i == PIPE_SHADER_GEOMETRY;
2558 }
2559
2560 desc = si_const_and_shader_buffer_descriptors(sctx, i);
2561 si_init_buffer_resources(&sctx->const_and_shader_buffers[i], desc,
2562 num_buffer_slots,
2563 gfx9_tcs ? GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS :
2564 gfx9_gs ? GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS :
2565 SI_SGPR_CONST_AND_SHADER_BUFFERS,
2566 RADEON_USAGE_READWRITE,
2567 RADEON_USAGE_READ,
2568 RADEON_PRIO_SHADER_RW_BUFFER,
2569 RADEON_PRIO_CONST_BUFFER);
2570 desc->slot_index_to_bind_directly = si_get_constbuf_slot(0);
2571
2572 desc = si_sampler_and_image_descriptors(sctx, i);
2573 si_init_descriptors(desc,
2574 gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES :
2575 gfx9_gs ? GFX9_SGPR_GS_SAMPLERS_AND_IMAGES :
2576 SI_SGPR_SAMPLERS_AND_IMAGES,
2577 16, num_sampler_slots);
2578
2579 int j;
2580 for (j = 0; j < SI_NUM_IMAGES; j++)
2581 memcpy(desc->list + j * 8, null_image_descriptor, 8 * 4);
2582 for (; j < SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2; j++)
2583 memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4);
2584 }
2585
2586 si_init_buffer_resources(&sctx->rw_buffers,
2587 &sctx->descriptors[SI_DESCS_RW_BUFFERS],
2588 SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
2589 /* The second set of usage/priority is used by
2590 * const buffers in RW buffer slots. */
2591 RADEON_USAGE_READWRITE, RADEON_USAGE_READ,
2592 RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER);
2593 sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots = SI_NUM_RW_BUFFERS;
2594
2595 si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
2596 4, SI_NUM_VERTEX_BUFFERS);
2597 FREE(sctx->vertex_buffers.list); /* not used */
2598 sctx->vertex_buffers.list = NULL;
2599
2600 /* Initialize an array of 1024 bindless descriptors, when the limit is
2601 * reached, just make it larger and re-upload the whole array.
2602 */
2603 si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,
2604 SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
2605 1024);
2606
2607 sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
2608
2609 /* Set pipe_context functions. */
2610 sctx->b.b.bind_sampler_states = si_bind_sampler_states;
2611 sctx->b.b.set_shader_images = si_set_shader_images;
2612 sctx->b.b.set_constant_buffer = si_pipe_set_constant_buffer;
2613 sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
2614 sctx->b.b.set_shader_buffers = si_set_shader_buffers;
2615 sctx->b.b.set_sampler_views = si_set_sampler_views;
2616 sctx->b.b.create_texture_handle = si_create_texture_handle;
2617 sctx->b.b.delete_texture_handle = si_delete_texture_handle;
2618 sctx->b.b.make_texture_handle_resident = si_make_texture_handle_resident;
2619 sctx->b.b.create_image_handle = si_create_image_handle;
2620 sctx->b.b.delete_image_handle = si_delete_image_handle;
2621 sctx->b.b.make_image_handle_resident = si_make_image_handle_resident;
2622 sctx->b.invalidate_buffer = si_invalidate_buffer;
2623 sctx->b.rebind_buffer = si_rebind_buffer;
2624
2625 /* Shader user data. */
2626 si_init_atom(sctx, &sctx->shader_pointers.atom, &sctx->atoms.s.shader_pointers,
2627 si_emit_graphics_shader_pointers);
2628
2629 /* Set default and immutable mappings. */
2630 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
2631
2632 if (sctx->b.chip_class >= GFX9) {
2633 si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
2634 R_00B430_SPI_SHADER_USER_DATA_LS_0);
2635 si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
2636 R_00B330_SPI_SHADER_USER_DATA_ES_0);
2637 } else {
2638 si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
2639 R_00B430_SPI_SHADER_USER_DATA_HS_0);
2640 si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
2641 R_00B230_SPI_SHADER_USER_DATA_GS_0);
2642 }
2643 si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
2644 }
2645
2646 static bool si_upload_shader_descriptors(struct si_context *sctx, unsigned mask)
2647 {
2648 unsigned dirty = sctx->descriptors_dirty & mask;
2649
2650 /* Assume nothing will go wrong: */
2651 sctx->shader_pointers_dirty |= dirty;
2652
2653 while (dirty) {
2654 unsigned i = u_bit_scan(&dirty);
2655
2656 if (!si_upload_descriptors(sctx, &sctx->descriptors[i]))
2657 return false;
2658 }
2659
2660 sctx->descriptors_dirty &= ~mask;
2661
2662 si_upload_bindless_descriptors(sctx);
2663
2664 return true;
2665 }
2666
2667 bool si_upload_graphics_shader_descriptors(struct si_context *sctx)
2668 {
2669 const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE);
2670 return si_upload_shader_descriptors(sctx, mask);
2671 }
2672
2673 bool si_upload_compute_shader_descriptors(struct si_context *sctx)
2674 {
2675 /* Does not update rw_buffers as that is not needed for compute shaders
2676 * and the input buffer is using the same SGPR's anyway.
2677 */
2678 const unsigned mask = u_bit_consecutive(SI_DESCS_FIRST_COMPUTE,
2679 SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE);
2680 return si_upload_shader_descriptors(sctx, mask);
2681 }
2682
2683 void si_release_all_descriptors(struct si_context *sctx)
2684 {
2685 int i;
2686
2687 for (i = 0; i < SI_NUM_SHADERS; i++) {
2688 si_release_buffer_resources(&sctx->const_and_shader_buffers[i],
2689 si_const_and_shader_buffer_descriptors(sctx, i));
2690 si_release_sampler_views(&sctx->samplers[i]);
2691 si_release_image_views(&sctx->images[i]);
2692 }
2693 si_release_buffer_resources(&sctx->rw_buffers,
2694 &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
2695 for (i = 0; i < SI_NUM_VERTEX_BUFFERS; i++)
2696 pipe_vertex_buffer_unreference(&sctx->vertex_buffer[i]);
2697
2698 for (i = 0; i < SI_NUM_DESCS; ++i)
2699 si_release_descriptors(&sctx->descriptors[i]);
2700
2701 sctx->vertex_buffers.list = NULL; /* points into a mapped buffer */
2702 si_release_descriptors(&sctx->vertex_buffers);
2703 si_release_bindless_descriptors(sctx);
2704 }
2705
2706 void si_all_descriptors_begin_new_cs(struct si_context *sctx)
2707 {
2708 int i;
2709
2710 for (i = 0; i < SI_NUM_SHADERS; i++) {
2711 si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[i]);
2712 si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i]);
2713 si_image_views_begin_new_cs(sctx, &sctx->images[i]);
2714 }
2715 si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers);
2716 si_vertex_buffers_begin_new_cs(sctx);
2717
2718 for (i = 0; i < SI_NUM_DESCS; ++i)
2719 si_descriptors_begin_new_cs(sctx, &sctx->descriptors[i]);
2720 si_descriptors_begin_new_cs(sctx, &sctx->bindless_descriptors);
2721
2722 si_shader_pointers_begin_new_cs(sctx);
2723 }
2724
2725 void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx,
2726 uint64_t new_active_mask)
2727 {
2728 struct si_descriptors *desc = &sctx->descriptors[desc_idx];
2729
2730 /* Ignore no-op updates and updates that disable all slots. */
2731 if (!new_active_mask ||
2732 new_active_mask == u_bit_consecutive64(desc->first_active_slot,
2733 desc->num_active_slots))
2734 return;
2735
2736 int first, count;
2737 u_bit_scan_consecutive_range64(&new_active_mask, &first, &count);
2738 assert(new_active_mask == 0);
2739
2740 /* Upload/dump descriptors if slots are being enabled. */
2741 if (first < desc->first_active_slot ||
2742 first + count > desc->first_active_slot + desc->num_active_slots)
2743 sctx->descriptors_dirty |= 1u << desc_idx;
2744
2745 desc->first_active_slot = first;
2746 desc->num_active_slots = count;
2747 }
2748
2749 void si_set_active_descriptors_for_shader(struct si_context *sctx,
2750 struct si_shader_selector *sel)
2751 {
2752 if (!sel)
2753 return;
2754
2755 si_set_active_descriptors(sctx,
2756 si_const_and_shader_buffer_descriptors_idx(sel->type),
2757 sel->active_const_and_shader_buffers);
2758 si_set_active_descriptors(sctx,
2759 si_sampler_and_image_descriptors_idx(sel->type),
2760 sel->active_samplers_and_images);
2761 }