radeonsi: remove 'Authors:' comments
[mesa.git] / src / gallium / drivers / radeonsi / si_descriptors.c
1 /*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 /* Resource binding slots and sampler states (each described with 8 or
25 * 4 dwords) are stored in lists in memory which is accessed by shaders
26 * using scalar load instructions.
27 *
28 * This file is responsible for managing such lists. It keeps a copy of all
29 * descriptors in CPU memory and re-uploads a whole list if some slots have
30 * been changed.
31 *
32 * This code is also reponsible for updating shader pointers to those lists.
33 *
34 * Note that CP DMA can't be used for updating the lists, because a GPU hang
35 * could leave the list in a mid-IB state and the next IB would get wrong
36 * descriptors and the whole context would be unusable at that point.
37 * (Note: The register shadowing can't be used due to the same reason)
38 *
39 * Also, uploading descriptors to newly allocated memory doesn't require
40 * a KCACHE flush.
41 *
42 *
43 * Possible scenarios for one 16 dword image+sampler slot:
44 *
45 * | Image | w/ FMASK | Buffer | NULL
46 * [ 0: 3] Image[0:3] | Image[0:3] | Null[0:3] | Null[0:3]
47 * [ 4: 7] Image[4:7] | Image[4:7] | Buffer[0:3] | 0
48 * [ 8:11] Null[0:3] | Fmask[0:3] | Null[0:3] | Null[0:3]
49 * [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3]
50 *
51 * FMASK implies MSAA, therefore no sampler state.
52 * Sampler states are never unbound except when FMASK is bound.
53 */
54
55 #include "radeon/r600_cs.h"
56 #include "si_pipe.h"
57 #include "sid.h"
58 #include "gfx9d.h"
59
60 #include "util/hash_table.h"
61 #include "util/u_idalloc.h"
62 #include "util/u_format.h"
63 #include "util/u_memory.h"
64 #include "util/u_upload_mgr.h"
65
66
67 /* NULL image and buffer descriptor for textures (alpha = 1) and images
68 * (alpha = 0).
69 *
70 * For images, all fields must be zero except for the swizzle, which
71 * supports arbitrary combinations of 0s and 1s. The texture type must be
72 * any valid type (e.g. 1D). If the texture type isn't set, the hw hangs.
73 *
74 * For buffers, all fields must be zero. If they are not, the hw hangs.
75 *
76 * This is the only reason why the buffer descriptor must be in words [4:7].
77 */
78 static uint32_t null_texture_descriptor[8] = {
79 0,
80 0,
81 0,
82 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_1) |
83 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
84 /* the rest must contain zeros, which is also used by the buffer
85 * descriptor */
86 };
87
88 static uint32_t null_image_descriptor[8] = {
89 0,
90 0,
91 0,
92 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
93 /* the rest must contain zeros, which is also used by the buffer
94 * descriptor */
95 };
96
97 static uint64_t si_desc_extract_buffer_address(uint32_t *desc)
98 {
99 return desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32);
100 }
101
102 static void si_init_descriptor_list(uint32_t *desc_list,
103 unsigned element_dw_size,
104 unsigned num_elements,
105 const uint32_t *null_descriptor)
106 {
107 int i;
108
109 /* Initialize the array to NULL descriptors if the element size is 8. */
110 if (null_descriptor) {
111 assert(element_dw_size % 8 == 0);
112 for (i = 0; i < num_elements * element_dw_size / 8; i++)
113 memcpy(desc_list + i * 8, null_descriptor, 8 * 4);
114 }
115 }
116
117 static void si_init_descriptors(struct si_descriptors *desc,
118 unsigned shader_userdata_index,
119 unsigned element_dw_size,
120 unsigned num_elements)
121 {
122 desc->list = CALLOC(num_elements, element_dw_size * 4);
123 desc->element_dw_size = element_dw_size;
124 desc->num_elements = num_elements;
125 desc->shader_userdata_offset = shader_userdata_index * 4;
126 desc->slot_index_to_bind_directly = -1;
127 }
128
129 static void si_release_descriptors(struct si_descriptors *desc)
130 {
131 r600_resource_reference(&desc->buffer, NULL);
132 FREE(desc->list);
133 }
134
135 static bool si_upload_descriptors(struct si_context *sctx,
136 struct si_descriptors *desc)
137 {
138 unsigned slot_size = desc->element_dw_size * 4;
139 unsigned first_slot_offset = desc->first_active_slot * slot_size;
140 unsigned upload_size = desc->num_active_slots * slot_size;
141
142 /* Skip the upload if no shader is using the descriptors. dirty_mask
143 * will stay dirty and the descriptors will be uploaded when there is
144 * a shader using them.
145 */
146 if (!upload_size)
147 return true;
148
149 /* If there is just one active descriptor, bind it directly. */
150 if ((int)desc->first_active_slot == desc->slot_index_to_bind_directly &&
151 desc->num_active_slots == 1) {
152 uint32_t *descriptor = &desc->list[desc->slot_index_to_bind_directly *
153 desc->element_dw_size];
154
155 /* The buffer is already in the buffer list. */
156 r600_resource_reference(&desc->buffer, NULL);
157 desc->gpu_list = NULL;
158 desc->gpu_address = si_desc_extract_buffer_address(descriptor);
159 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
160 return true;
161 }
162
163 uint32_t *ptr;
164 int buffer_offset;
165 u_upload_alloc(sctx->b.b.const_uploader, 0, upload_size,
166 si_optimal_tcc_alignment(sctx, upload_size),
167 (unsigned*)&buffer_offset,
168 (struct pipe_resource**)&desc->buffer,
169 (void**)&ptr);
170 if (!desc->buffer) {
171 desc->gpu_address = 0;
172 return false; /* skip the draw call */
173 }
174
175 util_memcpy_cpu_to_le32(ptr, (char*)desc->list + first_slot_offset,
176 upload_size);
177 desc->gpu_list = ptr - first_slot_offset / 4;
178
179 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
180 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
181
182 /* The shader pointer should point to slot 0. */
183 buffer_offset -= first_slot_offset;
184 desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
185
186 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
187 return true;
188 }
189
190 static void
191 si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc)
192 {
193 if (!desc->buffer)
194 return;
195
196 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
197 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
198 }
199
200 /* SAMPLER VIEWS */
201
202 static unsigned
203 si_sampler_and_image_descriptors_idx(unsigned shader)
204 {
205 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
206 SI_SHADER_DESCS_SAMPLERS_AND_IMAGES;
207 }
208
209 static struct si_descriptors *
210 si_sampler_and_image_descriptors(struct si_context *sctx, unsigned shader)
211 {
212 return &sctx->descriptors[si_sampler_and_image_descriptors_idx(shader)];
213 }
214
215 static void si_release_sampler_views(struct si_samplers *samplers)
216 {
217 int i;
218
219 for (i = 0; i < ARRAY_SIZE(samplers->views); i++) {
220 pipe_sampler_view_reference(&samplers->views[i], NULL);
221 }
222 }
223
224 static void si_sampler_view_add_buffer(struct si_context *sctx,
225 struct pipe_resource *resource,
226 enum radeon_bo_usage usage,
227 bool is_stencil_sampler,
228 bool check_mem)
229 {
230 struct r600_resource *rres;
231 struct r600_texture *rtex;
232 enum radeon_bo_priority priority;
233
234 if (!resource)
235 return;
236
237 if (resource->target != PIPE_BUFFER) {
238 struct r600_texture *tex = (struct r600_texture*)resource;
239
240 if (tex->is_depth && !r600_can_sample_zs(tex, is_stencil_sampler))
241 resource = &tex->flushed_depth_texture->resource.b.b;
242 }
243
244 rres = (struct r600_resource*)resource;
245 priority = r600_get_sampler_view_priority(rres);
246
247 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
248 rres, usage, priority,
249 check_mem);
250
251 if (resource->target == PIPE_BUFFER)
252 return;
253
254 /* Now add separate DCC or HTILE. */
255 rtex = (struct r600_texture*)resource;
256 if (rtex->dcc_separate_buffer) {
257 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
258 rtex->dcc_separate_buffer, usage,
259 RADEON_PRIO_DCC, check_mem);
260 }
261 }
262
263 static void si_sampler_views_begin_new_cs(struct si_context *sctx,
264 struct si_samplers *samplers)
265 {
266 unsigned mask = samplers->enabled_mask;
267
268 /* Add buffers to the CS. */
269 while (mask) {
270 int i = u_bit_scan(&mask);
271 struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[i];
272
273 si_sampler_view_add_buffer(sctx, sview->base.texture,
274 RADEON_USAGE_READ,
275 sview->is_stencil_sampler, false);
276 }
277 }
278
279 /* Set buffer descriptor fields that can be changed by reallocations. */
280 static void si_set_buf_desc_address(struct r600_resource *buf,
281 uint64_t offset, uint32_t *state)
282 {
283 uint64_t va = buf->gpu_address + offset;
284
285 state[0] = va;
286 state[1] &= C_008F04_BASE_ADDRESS_HI;
287 state[1] |= S_008F04_BASE_ADDRESS_HI(va >> 32);
288 }
289
290 /* Set texture descriptor fields that can be changed by reallocations.
291 *
292 * \param tex texture
293 * \param base_level_info information of the level of BASE_ADDRESS
294 * \param base_level the level of BASE_ADDRESS
295 * \param first_level pipe_sampler_view.u.tex.first_level
296 * \param block_width util_format_get_blockwidth()
297 * \param is_stencil select between separate Z & Stencil
298 * \param state descriptor to update
299 */
300 void si_set_mutable_tex_desc_fields(struct si_screen *sscreen,
301 struct r600_texture *tex,
302 const struct legacy_surf_level *base_level_info,
303 unsigned base_level, unsigned first_level,
304 unsigned block_width, bool is_stencil,
305 uint32_t *state)
306 {
307 uint64_t va, meta_va = 0;
308
309 if (tex->is_depth && !r600_can_sample_zs(tex, is_stencil)) {
310 tex = tex->flushed_depth_texture;
311 is_stencil = false;
312 }
313
314 va = tex->resource.gpu_address;
315
316 if (sscreen->b.chip_class >= GFX9) {
317 /* Only stencil_offset needs to be added here. */
318 if (is_stencil)
319 va += tex->surface.u.gfx9.stencil_offset;
320 else
321 va += tex->surface.u.gfx9.surf_offset;
322 } else {
323 va += base_level_info->offset;
324 }
325
326 state[0] = va >> 8;
327 state[1] &= C_008F14_BASE_ADDRESS_HI;
328 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
329
330 /* Only macrotiled modes can set tile swizzle.
331 * GFX9 doesn't use (legacy) base_level_info.
332 */
333 if (sscreen->b.chip_class >= GFX9 ||
334 base_level_info->mode == RADEON_SURF_MODE_2D)
335 state[0] |= tex->surface.tile_swizzle;
336
337 if (sscreen->b.chip_class >= VI) {
338 state[6] &= C_008F28_COMPRESSION_EN;
339 state[7] = 0;
340
341 if (vi_dcc_enabled(tex, first_level)) {
342 meta_va = (!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
343 tex->dcc_offset;
344
345 if (sscreen->b.chip_class == VI) {
346 meta_va += base_level_info->dcc_offset;
347 assert(base_level_info->mode == RADEON_SURF_MODE_2D);
348 }
349
350 meta_va |= (uint32_t)tex->surface.tile_swizzle << 8;
351 } else if (vi_tc_compat_htile_enabled(tex, first_level)) {
352 meta_va = tex->resource.gpu_address + tex->htile_offset;
353 }
354
355 if (meta_va) {
356 state[6] |= S_008F28_COMPRESSION_EN(1);
357 state[7] = meta_va >> 8;
358 }
359 }
360
361 if (sscreen->b.chip_class >= GFX9) {
362 state[3] &= C_008F1C_SW_MODE;
363 state[4] &= C_008F20_PITCH_GFX9;
364
365 if (is_stencil) {
366 state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.stencil.swizzle_mode);
367 state[4] |= S_008F20_PITCH_GFX9(tex->surface.u.gfx9.stencil.epitch);
368 } else {
369 state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode);
370 state[4] |= S_008F20_PITCH_GFX9(tex->surface.u.gfx9.surf.epitch);
371 }
372
373 state[5] &= C_008F24_META_DATA_ADDRESS &
374 C_008F24_META_PIPE_ALIGNED &
375 C_008F24_META_RB_ALIGNED;
376 if (meta_va) {
377 struct gfx9_surf_meta_flags meta;
378
379 if (tex->dcc_offset)
380 meta = tex->surface.u.gfx9.dcc;
381 else
382 meta = tex->surface.u.gfx9.htile;
383
384 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
385 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
386 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
387 }
388 } else {
389 /* SI-CI-VI */
390 unsigned pitch = base_level_info->nblk_x * block_width;
391 unsigned index = si_tile_mode_index(tex, base_level, is_stencil);
392
393 state[3] &= C_008F1C_TILING_INDEX;
394 state[3] |= S_008F1C_TILING_INDEX(index);
395 state[4] &= C_008F20_PITCH_GFX6;
396 state[4] |= S_008F20_PITCH_GFX6(pitch - 1);
397 }
398 }
399
400 static void si_set_sampler_state_desc(struct si_sampler_state *sstate,
401 struct si_sampler_view *sview,
402 struct r600_texture *tex,
403 uint32_t *desc)
404 {
405 if (sview && sview->is_integer)
406 memcpy(desc, sstate->integer_val, 4*4);
407 else if (tex && tex->upgraded_depth &&
408 (!sview || !sview->is_stencil_sampler))
409 memcpy(desc, sstate->upgraded_depth_val, 4*4);
410 else
411 memcpy(desc, sstate->val, 4*4);
412 }
413
414 static void si_set_sampler_view_desc(struct si_context *sctx,
415 struct si_sampler_view *sview,
416 struct si_sampler_state *sstate,
417 uint32_t *desc)
418 {
419 struct pipe_sampler_view *view = &sview->base;
420 struct r600_texture *rtex = (struct r600_texture *)view->texture;
421 bool is_buffer = rtex->resource.b.b.target == PIPE_BUFFER;
422
423 if (unlikely(!is_buffer && sview->dcc_incompatible)) {
424 if (vi_dcc_enabled(rtex, view->u.tex.first_level))
425 if (!si_texture_disable_dcc(&sctx->b, rtex))
426 sctx->b.decompress_dcc(&sctx->b.b, rtex);
427
428 sview->dcc_incompatible = false;
429 }
430
431 assert(rtex); /* views with texture == NULL aren't supported */
432 memcpy(desc, sview->state, 8*4);
433
434 if (is_buffer) {
435 si_set_buf_desc_address(&rtex->resource,
436 sview->base.u.buf.offset,
437 desc + 4);
438 } else {
439 bool is_separate_stencil = rtex->db_compatible &&
440 sview->is_stencil_sampler;
441
442 si_set_mutable_tex_desc_fields(sctx->screen, rtex,
443 sview->base_level_info,
444 sview->base_level,
445 sview->base.u.tex.first_level,
446 sview->block_width,
447 is_separate_stencil,
448 desc);
449 }
450
451 if (!is_buffer && rtex->fmask.size) {
452 memcpy(desc + 8, sview->fmask_state, 8*4);
453 } else {
454 /* Disable FMASK and bind sampler state in [12:15]. */
455 memcpy(desc + 8, null_texture_descriptor, 4*4);
456
457 if (sstate)
458 si_set_sampler_state_desc(sstate, sview,
459 is_buffer ? NULL : rtex,
460 desc + 12);
461 }
462 }
463
464 static bool color_needs_decompression(struct r600_texture *rtex)
465 {
466 return rtex->fmask.size ||
467 (rtex->dirty_level_mask &&
468 (rtex->cmask.size || rtex->dcc_offset));
469 }
470
471 static bool depth_needs_decompression(struct r600_texture *rtex)
472 {
473 /* If the depth/stencil texture is TC-compatible, no decompression
474 * will be done. The decompression function will only flush DB caches
475 * to make it coherent with shaders. That's necessary because the driver
476 * doesn't flush DB caches in any other case.
477 */
478 return rtex->db_compatible;
479 }
480
481 static void si_set_sampler_view(struct si_context *sctx,
482 unsigned shader,
483 unsigned slot, struct pipe_sampler_view *view,
484 bool disallow_early_out)
485 {
486 struct si_samplers *samplers = &sctx->samplers[shader];
487 struct si_sampler_view *rview = (struct si_sampler_view*)view;
488 struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader);
489 unsigned desc_slot = si_get_sampler_slot(slot);
490 uint32_t *desc = descs->list + desc_slot * 16;
491
492 if (samplers->views[slot] == view && !disallow_early_out)
493 return;
494
495 if (view) {
496 struct r600_texture *rtex = (struct r600_texture *)view->texture;
497
498 si_set_sampler_view_desc(sctx, rview,
499 samplers->sampler_states[slot], desc);
500
501 if (rtex->resource.b.b.target == PIPE_BUFFER) {
502 rtex->resource.bind_history |= PIPE_BIND_SAMPLER_VIEW;
503 samplers->needs_depth_decompress_mask &= ~(1u << slot);
504 samplers->needs_color_decompress_mask &= ~(1u << slot);
505 } else {
506 if (depth_needs_decompression(rtex)) {
507 samplers->needs_depth_decompress_mask |= 1u << slot;
508 } else {
509 samplers->needs_depth_decompress_mask &= ~(1u << slot);
510 }
511 if (color_needs_decompression(rtex)) {
512 samplers->needs_color_decompress_mask |= 1u << slot;
513 } else {
514 samplers->needs_color_decompress_mask &= ~(1u << slot);
515 }
516
517 if (rtex->dcc_offset &&
518 p_atomic_read(&rtex->framebuffers_bound))
519 sctx->need_check_render_feedback = true;
520 }
521
522 pipe_sampler_view_reference(&samplers->views[slot], view);
523 samplers->enabled_mask |= 1u << slot;
524
525 /* Since this can flush, it must be done after enabled_mask is
526 * updated. */
527 si_sampler_view_add_buffer(sctx, view->texture,
528 RADEON_USAGE_READ,
529 rview->is_stencil_sampler, true);
530 } else {
531 pipe_sampler_view_reference(&samplers->views[slot], NULL);
532 memcpy(desc, null_texture_descriptor, 8*4);
533 /* Only clear the lower dwords of FMASK. */
534 memcpy(desc + 8, null_texture_descriptor, 4*4);
535 /* Re-set the sampler state if we are transitioning from FMASK. */
536 if (samplers->sampler_states[slot])
537 si_set_sampler_state_desc(samplers->sampler_states[slot], NULL, NULL,
538 desc + 12);
539
540 samplers->enabled_mask &= ~(1u << slot);
541 samplers->needs_depth_decompress_mask &= ~(1u << slot);
542 samplers->needs_color_decompress_mask &= ~(1u << slot);
543 }
544
545 sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
546 }
547
548 static void si_update_shader_needs_decompress_mask(struct si_context *sctx,
549 unsigned shader)
550 {
551 struct si_samplers *samplers = &sctx->samplers[shader];
552 unsigned shader_bit = 1 << shader;
553
554 if (samplers->needs_depth_decompress_mask ||
555 samplers->needs_color_decompress_mask ||
556 sctx->images[shader].needs_color_decompress_mask)
557 sctx->shader_needs_decompress_mask |= shader_bit;
558 else
559 sctx->shader_needs_decompress_mask &= ~shader_bit;
560 }
561
562 static void si_set_sampler_views(struct pipe_context *ctx,
563 enum pipe_shader_type shader, unsigned start,
564 unsigned count,
565 struct pipe_sampler_view **views)
566 {
567 struct si_context *sctx = (struct si_context *)ctx;
568 int i;
569
570 if (!count || shader >= SI_NUM_SHADERS)
571 return;
572
573 if (views) {
574 for (i = 0; i < count; i++)
575 si_set_sampler_view(sctx, shader, start + i, views[i], false);
576 } else {
577 for (i = 0; i < count; i++)
578 si_set_sampler_view(sctx, shader, start + i, NULL, false);
579 }
580
581 si_update_shader_needs_decompress_mask(sctx, shader);
582 }
583
584 static void
585 si_samplers_update_needs_color_decompress_mask(struct si_samplers *samplers)
586 {
587 unsigned mask = samplers->enabled_mask;
588
589 while (mask) {
590 int i = u_bit_scan(&mask);
591 struct pipe_resource *res = samplers->views[i]->texture;
592
593 if (res && res->target != PIPE_BUFFER) {
594 struct r600_texture *rtex = (struct r600_texture *)res;
595
596 if (color_needs_decompression(rtex)) {
597 samplers->needs_color_decompress_mask |= 1u << i;
598 } else {
599 samplers->needs_color_decompress_mask &= ~(1u << i);
600 }
601 }
602 }
603 }
604
605 /* IMAGE VIEWS */
606
607 static void
608 si_release_image_views(struct si_images *images)
609 {
610 unsigned i;
611
612 for (i = 0; i < SI_NUM_IMAGES; ++i) {
613 struct pipe_image_view *view = &images->views[i];
614
615 pipe_resource_reference(&view->resource, NULL);
616 }
617 }
618
619 static void
620 si_image_views_begin_new_cs(struct si_context *sctx, struct si_images *images)
621 {
622 uint mask = images->enabled_mask;
623
624 /* Add buffers to the CS. */
625 while (mask) {
626 int i = u_bit_scan(&mask);
627 struct pipe_image_view *view = &images->views[i];
628
629 assert(view->resource);
630
631 si_sampler_view_add_buffer(sctx, view->resource,
632 RADEON_USAGE_READWRITE, false, false);
633 }
634 }
635
636 static void
637 si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot)
638 {
639 struct si_images *images = &ctx->images[shader];
640
641 if (images->enabled_mask & (1u << slot)) {
642 struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
643 unsigned desc_slot = si_get_image_slot(slot);
644
645 pipe_resource_reference(&images->views[slot].resource, NULL);
646 images->needs_color_decompress_mask &= ~(1 << slot);
647
648 memcpy(descs->list + desc_slot*8, null_image_descriptor, 8*4);
649 images->enabled_mask &= ~(1u << slot);
650 ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
651 }
652 }
653
654 static void
655 si_mark_image_range_valid(const struct pipe_image_view *view)
656 {
657 struct r600_resource *res = (struct r600_resource *)view->resource;
658
659 assert(res && res->b.b.target == PIPE_BUFFER);
660
661 util_range_add(&res->valid_buffer_range,
662 view->u.buf.offset,
663 view->u.buf.offset + view->u.buf.size);
664 }
665
666 static void si_set_shader_image_desc(struct si_context *ctx,
667 const struct pipe_image_view *view,
668 bool skip_decompress,
669 uint32_t *desc)
670 {
671 struct si_screen *screen = ctx->screen;
672 struct r600_resource *res;
673
674 res = (struct r600_resource *)view->resource;
675
676 if (res->b.b.target == PIPE_BUFFER) {
677 if (view->access & PIPE_IMAGE_ACCESS_WRITE)
678 si_mark_image_range_valid(view);
679
680 si_make_buffer_descriptor(screen, res,
681 view->format,
682 view->u.buf.offset,
683 view->u.buf.size, desc);
684 si_set_buf_desc_address(res, view->u.buf.offset, desc + 4);
685 } else {
686 static const unsigned char swizzle[4] = { 0, 1, 2, 3 };
687 struct r600_texture *tex = (struct r600_texture *)res;
688 unsigned level = view->u.tex.level;
689 unsigned width, height, depth, hw_level;
690 bool uses_dcc = vi_dcc_enabled(tex, level);
691
692 assert(!tex->is_depth);
693 assert(tex->fmask.size == 0);
694
695 if (uses_dcc && !skip_decompress &&
696 (view->access & PIPE_IMAGE_ACCESS_WRITE ||
697 !vi_dcc_formats_compatible(res->b.b.format, view->format))) {
698 /* If DCC can't be disabled, at least decompress it.
699 * The decompression is relatively cheap if the surface
700 * has been decompressed already.
701 */
702 if (!si_texture_disable_dcc(&ctx->b, tex))
703 ctx->b.decompress_dcc(&ctx->b.b, tex);
704 }
705
706 if (ctx->b.chip_class >= GFX9) {
707 /* Always set the base address. The swizzle modes don't
708 * allow setting mipmap level offsets as the base.
709 */
710 width = res->b.b.width0;
711 height = res->b.b.height0;
712 depth = res->b.b.depth0;
713 hw_level = level;
714 } else {
715 /* Always force the base level to the selected level.
716 *
717 * This is required for 3D textures, where otherwise
718 * selecting a single slice for non-layered bindings
719 * fails. It doesn't hurt the other targets.
720 */
721 width = u_minify(res->b.b.width0, level);
722 height = u_minify(res->b.b.height0, level);
723 depth = u_minify(res->b.b.depth0, level);
724 hw_level = 0;
725 }
726
727 si_make_texture_descriptor(screen, tex,
728 false, res->b.b.target,
729 view->format, swizzle,
730 hw_level, hw_level,
731 view->u.tex.first_layer,
732 view->u.tex.last_layer,
733 width, height, depth,
734 desc, NULL);
735 si_set_mutable_tex_desc_fields(screen, tex,
736 &tex->surface.u.legacy.level[level],
737 level, level,
738 util_format_get_blockwidth(view->format),
739 false, desc);
740 }
741 }
742
743 static void si_set_shader_image(struct si_context *ctx,
744 unsigned shader,
745 unsigned slot, const struct pipe_image_view *view,
746 bool skip_decompress)
747 {
748 struct si_images *images = &ctx->images[shader];
749 struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
750 struct r600_resource *res;
751 unsigned desc_slot = si_get_image_slot(slot);
752 uint32_t *desc = descs->list + desc_slot * 8;
753
754 if (!view || !view->resource) {
755 si_disable_shader_image(ctx, shader, slot);
756 return;
757 }
758
759 res = (struct r600_resource *)view->resource;
760
761 if (&images->views[slot] != view)
762 util_copy_image_view(&images->views[slot], view);
763
764 si_set_shader_image_desc(ctx, view, skip_decompress, desc);
765
766 if (res->b.b.target == PIPE_BUFFER) {
767 images->needs_color_decompress_mask &= ~(1 << slot);
768 res->bind_history |= PIPE_BIND_SHADER_IMAGE;
769 } else {
770 struct r600_texture *tex = (struct r600_texture *)res;
771 unsigned level = view->u.tex.level;
772
773 if (color_needs_decompression(tex)) {
774 images->needs_color_decompress_mask |= 1 << slot;
775 } else {
776 images->needs_color_decompress_mask &= ~(1 << slot);
777 }
778
779 if (vi_dcc_enabled(tex, level) &&
780 p_atomic_read(&tex->framebuffers_bound))
781 ctx->need_check_render_feedback = true;
782 }
783
784 images->enabled_mask |= 1u << slot;
785 ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
786
787 /* Since this can flush, it must be done after enabled_mask is updated. */
788 si_sampler_view_add_buffer(ctx, &res->b.b,
789 (view->access & PIPE_IMAGE_ACCESS_WRITE) ?
790 RADEON_USAGE_READWRITE : RADEON_USAGE_READ,
791 false, true);
792 }
793
794 static void
795 si_set_shader_images(struct pipe_context *pipe,
796 enum pipe_shader_type shader,
797 unsigned start_slot, unsigned count,
798 const struct pipe_image_view *views)
799 {
800 struct si_context *ctx = (struct si_context *)pipe;
801 unsigned i, slot;
802
803 assert(shader < SI_NUM_SHADERS);
804
805 if (!count)
806 return;
807
808 assert(start_slot + count <= SI_NUM_IMAGES);
809
810 if (views) {
811 for (i = 0, slot = start_slot; i < count; ++i, ++slot)
812 si_set_shader_image(ctx, shader, slot, &views[i], false);
813 } else {
814 for (i = 0, slot = start_slot; i < count; ++i, ++slot)
815 si_set_shader_image(ctx, shader, slot, NULL, false);
816 }
817
818 si_update_shader_needs_decompress_mask(ctx, shader);
819 }
820
821 static void
822 si_images_update_needs_color_decompress_mask(struct si_images *images)
823 {
824 unsigned mask = images->enabled_mask;
825
826 while (mask) {
827 int i = u_bit_scan(&mask);
828 struct pipe_resource *res = images->views[i].resource;
829
830 if (res && res->target != PIPE_BUFFER) {
831 struct r600_texture *rtex = (struct r600_texture *)res;
832
833 if (color_needs_decompression(rtex)) {
834 images->needs_color_decompress_mask |= 1 << i;
835 } else {
836 images->needs_color_decompress_mask &= ~(1 << i);
837 }
838 }
839 }
840 }
841
842 /* SAMPLER STATES */
843
844 static void si_bind_sampler_states(struct pipe_context *ctx,
845 enum pipe_shader_type shader,
846 unsigned start, unsigned count, void **states)
847 {
848 struct si_context *sctx = (struct si_context *)ctx;
849 struct si_samplers *samplers = &sctx->samplers[shader];
850 struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, shader);
851 struct si_sampler_state **sstates = (struct si_sampler_state**)states;
852 int i;
853
854 if (!count || shader >= SI_NUM_SHADERS)
855 return;
856
857 for (i = 0; i < count; i++) {
858 unsigned slot = start + i;
859 unsigned desc_slot = si_get_sampler_slot(slot);
860
861 if (!sstates[i] ||
862 sstates[i] == samplers->sampler_states[slot])
863 continue;
864
865 #ifdef DEBUG
866 assert(sstates[i]->magic == SI_SAMPLER_STATE_MAGIC);
867 #endif
868 samplers->sampler_states[slot] = sstates[i];
869
870 /* If FMASK is bound, don't overwrite it.
871 * The sampler state will be set after FMASK is unbound.
872 */
873 struct si_sampler_view *sview =
874 (struct si_sampler_view *)samplers->views[slot];
875
876 struct r600_texture *tex = NULL;
877
878 if (sview && sview->base.texture &&
879 sview->base.texture->target != PIPE_BUFFER)
880 tex = (struct r600_texture *)sview->base.texture;
881
882 if (tex && tex->fmask.size)
883 continue;
884
885 si_set_sampler_state_desc(sstates[i], sview, tex,
886 desc->list + desc_slot * 16 + 12);
887
888 sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
889 }
890 }
891
892 /* BUFFER RESOURCES */
893
894 static void si_init_buffer_resources(struct si_buffer_resources *buffers,
895 struct si_descriptors *descs,
896 unsigned num_buffers,
897 unsigned shader_userdata_index,
898 enum radeon_bo_usage shader_usage,
899 enum radeon_bo_usage shader_usage_constbuf,
900 enum radeon_bo_priority priority,
901 enum radeon_bo_priority priority_constbuf)
902 {
903 buffers->shader_usage = shader_usage;
904 buffers->shader_usage_constbuf = shader_usage_constbuf;
905 buffers->priority = priority;
906 buffers->priority_constbuf = priority_constbuf;
907 buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
908
909 si_init_descriptors(descs, shader_userdata_index, 4, num_buffers);
910 }
911
912 static void si_release_buffer_resources(struct si_buffer_resources *buffers,
913 struct si_descriptors *descs)
914 {
915 int i;
916
917 for (i = 0; i < descs->num_elements; i++) {
918 pipe_resource_reference(&buffers->buffers[i], NULL);
919 }
920
921 FREE(buffers->buffers);
922 }
923
924 static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
925 struct si_buffer_resources *buffers)
926 {
927 unsigned mask = buffers->enabled_mask;
928
929 /* Add buffers to the CS. */
930 while (mask) {
931 int i = u_bit_scan(&mask);
932
933 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
934 r600_resource(buffers->buffers[i]),
935 i < SI_NUM_SHADER_BUFFERS ? buffers->shader_usage :
936 buffers->shader_usage_constbuf,
937 i < SI_NUM_SHADER_BUFFERS ? buffers->priority :
938 buffers->priority_constbuf);
939 }
940 }
941
942 static void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers,
943 struct si_descriptors *descs,
944 unsigned idx, struct pipe_resource **buf,
945 unsigned *offset, unsigned *size)
946 {
947 pipe_resource_reference(buf, buffers->buffers[idx]);
948 if (*buf) {
949 struct r600_resource *res = r600_resource(*buf);
950 const uint32_t *desc = descs->list + idx * 4;
951 uint64_t va;
952
953 *size = desc[2];
954
955 assert(G_008F04_STRIDE(desc[1]) == 0);
956 va = ((uint64_t)desc[1] << 32) | desc[0];
957
958 assert(va >= res->gpu_address && va + *size <= res->gpu_address + res->bo_size);
959 *offset = va - res->gpu_address;
960 }
961 }
962
963 /* VERTEX BUFFERS */
964
965 static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
966 {
967 struct si_descriptors *desc = &sctx->vertex_buffers;
968 int count = sctx->vertex_elements ? sctx->vertex_elements->count : 0;
969 int i;
970
971 for (i = 0; i < count; i++) {
972 int vb = sctx->vertex_elements->vertex_buffer_index[i];
973
974 if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
975 continue;
976 if (!sctx->vertex_buffer[vb].buffer.resource)
977 continue;
978
979 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
980 (struct r600_resource*)sctx->vertex_buffer[vb].buffer.resource,
981 RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
982 }
983
984 if (!desc->buffer)
985 return;
986 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
987 desc->buffer, RADEON_USAGE_READ,
988 RADEON_PRIO_DESCRIPTORS);
989 }
990
991 bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
992 {
993 struct si_vertex_elements *velems = sctx->vertex_elements;
994 struct si_descriptors *desc = &sctx->vertex_buffers;
995 unsigned i, count;
996 unsigned desc_list_byte_size;
997 unsigned first_vb_use_mask;
998 uint64_t va;
999 uint32_t *ptr;
1000
1001 if (!sctx->vertex_buffers_dirty || !velems)
1002 return true;
1003
1004 count = velems->count;
1005
1006 if (!count)
1007 return true;
1008
1009 desc_list_byte_size = velems->desc_list_byte_size;
1010 first_vb_use_mask = velems->first_vb_use_mask;
1011
1012 /* Vertex buffer descriptors are the only ones which are uploaded
1013 * directly through a staging buffer and don't go through
1014 * the fine-grained upload path.
1015 */
1016 unsigned buffer_offset = 0;
1017 u_upload_alloc(sctx->b.b.const_uploader, 0,
1018 desc_list_byte_size,
1019 si_optimal_tcc_alignment(sctx, desc_list_byte_size),
1020 &buffer_offset,
1021 (struct pipe_resource**)&desc->buffer, (void**)&ptr);
1022 if (!desc->buffer) {
1023 desc->gpu_address = 0;
1024 return false;
1025 }
1026
1027 desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
1028 desc->list = ptr;
1029 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1030 desc->buffer, RADEON_USAGE_READ,
1031 RADEON_PRIO_DESCRIPTORS);
1032
1033 assert(count <= SI_MAX_ATTRIBS);
1034
1035 for (i = 0; i < count; i++) {
1036 struct pipe_vertex_buffer *vb;
1037 struct r600_resource *rbuffer;
1038 unsigned offset;
1039 unsigned vbo_index = velems->vertex_buffer_index[i];
1040 uint32_t *desc = &ptr[i*4];
1041
1042 vb = &sctx->vertex_buffer[vbo_index];
1043 rbuffer = (struct r600_resource*)vb->buffer.resource;
1044 if (!rbuffer) {
1045 memset(desc, 0, 16);
1046 continue;
1047 }
1048
1049 offset = vb->buffer_offset + velems->src_offset[i];
1050 va = rbuffer->gpu_address + offset;
1051
1052 /* Fill in T# buffer resource description */
1053 desc[0] = va;
1054 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1055 S_008F04_STRIDE(vb->stride);
1056
1057 if (sctx->b.chip_class != VI && vb->stride) {
1058 /* Round up by rounding down and adding 1 */
1059 desc[2] = (vb->buffer.resource->width0 - offset -
1060 velems->format_size[i]) /
1061 vb->stride + 1;
1062 } else {
1063 desc[2] = vb->buffer.resource->width0 - offset;
1064 }
1065
1066 desc[3] = velems->rsrc_word3[i];
1067
1068 if (first_vb_use_mask & (1 << i)) {
1069 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1070 (struct r600_resource*)vb->buffer.resource,
1071 RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
1072 }
1073 }
1074
1075 /* Don't flush the const cache. It would have a very negative effect
1076 * on performance (confirmed by testing). New descriptors are always
1077 * uploaded to a fresh new buffer, so I don't think flushing the const
1078 * cache is needed. */
1079 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
1080 sctx->vertex_buffers_dirty = false;
1081 sctx->vertex_buffer_pointer_dirty = true;
1082 sctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS;
1083 return true;
1084 }
1085
1086
1087 /* CONSTANT BUFFERS */
1088
1089 static unsigned
1090 si_const_and_shader_buffer_descriptors_idx(unsigned shader)
1091 {
1092 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
1093 SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS;
1094 }
1095
1096 static struct si_descriptors *
1097 si_const_and_shader_buffer_descriptors(struct si_context *sctx, unsigned shader)
1098 {
1099 return &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)];
1100 }
1101
1102 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
1103 const uint8_t *ptr, unsigned size, uint32_t *const_offset)
1104 {
1105 void *tmp;
1106
1107 u_upload_alloc(sctx->b.b.const_uploader, 0, size,
1108 si_optimal_tcc_alignment(sctx, size),
1109 const_offset,
1110 (struct pipe_resource**)rbuffer, &tmp);
1111 if (*rbuffer)
1112 util_memcpy_cpu_to_le32(tmp, ptr, size);
1113 }
1114
1115 static void si_set_constant_buffer(struct si_context *sctx,
1116 struct si_buffer_resources *buffers,
1117 unsigned descriptors_idx,
1118 uint slot, const struct pipe_constant_buffer *input)
1119 {
1120 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1121 assert(slot < descs->num_elements);
1122 pipe_resource_reference(&buffers->buffers[slot], NULL);
1123
1124 /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
1125 * with a NULL buffer). We need to use a dummy buffer instead. */
1126 if (sctx->b.chip_class == CIK &&
1127 (!input || (!input->buffer && !input->user_buffer)))
1128 input = &sctx->null_const_buf;
1129
1130 if (input && (input->buffer || input->user_buffer)) {
1131 struct pipe_resource *buffer = NULL;
1132 uint64_t va;
1133
1134 /* Upload the user buffer if needed. */
1135 if (input->user_buffer) {
1136 unsigned buffer_offset;
1137
1138 si_upload_const_buffer(sctx,
1139 (struct r600_resource**)&buffer, input->user_buffer,
1140 input->buffer_size, &buffer_offset);
1141 if (!buffer) {
1142 /* Just unbind on failure. */
1143 si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, NULL);
1144 return;
1145 }
1146 va = r600_resource(buffer)->gpu_address + buffer_offset;
1147 } else {
1148 pipe_resource_reference(&buffer, input->buffer);
1149 va = r600_resource(buffer)->gpu_address + input->buffer_offset;
1150 /* Only track usage for non-user buffers. */
1151 r600_resource(buffer)->bind_history |= PIPE_BIND_CONSTANT_BUFFER;
1152 }
1153
1154 /* Set the descriptor. */
1155 uint32_t *desc = descs->list + slot*4;
1156 desc[0] = va;
1157 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1158 S_008F04_STRIDE(0);
1159 desc[2] = input->buffer_size;
1160 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1161 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1162 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1163 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1164 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1165 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1166
1167 buffers->buffers[slot] = buffer;
1168 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1169 (struct r600_resource*)buffer,
1170 buffers->shader_usage_constbuf,
1171 buffers->priority_constbuf, true);
1172 buffers->enabled_mask |= 1u << slot;
1173 } else {
1174 /* Clear the descriptor. */
1175 memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
1176 buffers->enabled_mask &= ~(1u << slot);
1177 }
1178
1179 sctx->descriptors_dirty |= 1u << descriptors_idx;
1180 }
1181
1182 void si_set_rw_buffer(struct si_context *sctx,
1183 uint slot, const struct pipe_constant_buffer *input)
1184 {
1185 si_set_constant_buffer(sctx, &sctx->rw_buffers,
1186 SI_DESCS_RW_BUFFERS, slot, input);
1187 }
1188
1189 static void si_pipe_set_constant_buffer(struct pipe_context *ctx,
1190 enum pipe_shader_type shader, uint slot,
1191 const struct pipe_constant_buffer *input)
1192 {
1193 struct si_context *sctx = (struct si_context *)ctx;
1194
1195 if (shader >= SI_NUM_SHADERS)
1196 return;
1197
1198 slot = si_get_constbuf_slot(slot);
1199 si_set_constant_buffer(sctx, &sctx->const_and_shader_buffers[shader],
1200 si_const_and_shader_buffer_descriptors_idx(shader),
1201 slot, input);
1202 }
1203
1204 void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader,
1205 uint slot, struct pipe_constant_buffer *cbuf)
1206 {
1207 cbuf->user_buffer = NULL;
1208 si_get_buffer_from_descriptors(
1209 &sctx->const_and_shader_buffers[shader],
1210 si_const_and_shader_buffer_descriptors(sctx, shader),
1211 si_get_constbuf_slot(slot),
1212 &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size);
1213 }
1214
1215 /* SHADER BUFFERS */
1216
1217 static void si_set_shader_buffers(struct pipe_context *ctx,
1218 enum pipe_shader_type shader,
1219 unsigned start_slot, unsigned count,
1220 const struct pipe_shader_buffer *sbuffers)
1221 {
1222 struct si_context *sctx = (struct si_context *)ctx;
1223 struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
1224 struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
1225 unsigned i;
1226
1227 assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
1228
1229 for (i = 0; i < count; ++i) {
1230 const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
1231 struct r600_resource *buf;
1232 unsigned slot = si_get_shaderbuf_slot(start_slot + i);
1233 uint32_t *desc = descs->list + slot * 4;
1234 uint64_t va;
1235
1236 if (!sbuffer || !sbuffer->buffer) {
1237 pipe_resource_reference(&buffers->buffers[slot], NULL);
1238 memset(desc, 0, sizeof(uint32_t) * 4);
1239 buffers->enabled_mask &= ~(1u << slot);
1240 sctx->descriptors_dirty |=
1241 1u << si_const_and_shader_buffer_descriptors_idx(shader);
1242 continue;
1243 }
1244
1245 buf = (struct r600_resource *)sbuffer->buffer;
1246 va = buf->gpu_address + sbuffer->buffer_offset;
1247
1248 desc[0] = va;
1249 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1250 S_008F04_STRIDE(0);
1251 desc[2] = sbuffer->buffer_size;
1252 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1253 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1254 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1255 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1256 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1257 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1258
1259 pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
1260 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, buf,
1261 buffers->shader_usage,
1262 buffers->priority, true);
1263 buf->bind_history |= PIPE_BIND_SHADER_BUFFER;
1264
1265 buffers->enabled_mask |= 1u << slot;
1266 sctx->descriptors_dirty |=
1267 1u << si_const_and_shader_buffer_descriptors_idx(shader);
1268
1269 util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset,
1270 sbuffer->buffer_offset + sbuffer->buffer_size);
1271 }
1272 }
1273
1274 void si_get_shader_buffers(struct si_context *sctx,
1275 enum pipe_shader_type shader,
1276 uint start_slot, uint count,
1277 struct pipe_shader_buffer *sbuf)
1278 {
1279 struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
1280 struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
1281
1282 for (unsigned i = 0; i < count; ++i) {
1283 si_get_buffer_from_descriptors(
1284 buffers, descs,
1285 si_get_shaderbuf_slot(start_slot + i),
1286 &sbuf[i].buffer, &sbuf[i].buffer_offset,
1287 &sbuf[i].buffer_size);
1288 }
1289 }
1290
1291 /* RING BUFFERS */
1292
1293 void si_set_ring_buffer(struct pipe_context *ctx, uint slot,
1294 struct pipe_resource *buffer,
1295 unsigned stride, unsigned num_records,
1296 bool add_tid, bool swizzle,
1297 unsigned element_size, unsigned index_stride, uint64_t offset)
1298 {
1299 struct si_context *sctx = (struct si_context *)ctx;
1300 struct si_buffer_resources *buffers = &sctx->rw_buffers;
1301 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1302
1303 /* The stride field in the resource descriptor has 14 bits */
1304 assert(stride < (1 << 14));
1305
1306 assert(slot < descs->num_elements);
1307 pipe_resource_reference(&buffers->buffers[slot], NULL);
1308
1309 if (buffer) {
1310 uint64_t va;
1311
1312 va = r600_resource(buffer)->gpu_address + offset;
1313
1314 switch (element_size) {
1315 default:
1316 assert(!"Unsupported ring buffer element size");
1317 case 0:
1318 case 2:
1319 element_size = 0;
1320 break;
1321 case 4:
1322 element_size = 1;
1323 break;
1324 case 8:
1325 element_size = 2;
1326 break;
1327 case 16:
1328 element_size = 3;
1329 break;
1330 }
1331
1332 switch (index_stride) {
1333 default:
1334 assert(!"Unsupported ring buffer index stride");
1335 case 0:
1336 case 8:
1337 index_stride = 0;
1338 break;
1339 case 16:
1340 index_stride = 1;
1341 break;
1342 case 32:
1343 index_stride = 2;
1344 break;
1345 case 64:
1346 index_stride = 3;
1347 break;
1348 }
1349
1350 if (sctx->b.chip_class >= VI && stride)
1351 num_records *= stride;
1352
1353 /* Set the descriptor. */
1354 uint32_t *desc = descs->list + slot*4;
1355 desc[0] = va;
1356 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1357 S_008F04_STRIDE(stride) |
1358 S_008F04_SWIZZLE_ENABLE(swizzle);
1359 desc[2] = num_records;
1360 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1361 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1362 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1363 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1364 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1365 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1366 S_008F0C_INDEX_STRIDE(index_stride) |
1367 S_008F0C_ADD_TID_ENABLE(add_tid);
1368
1369 if (sctx->b.chip_class >= GFX9)
1370 assert(!swizzle || element_size == 1); /* always 4 bytes on GFX9 */
1371 else
1372 desc[3] |= S_008F0C_ELEMENT_SIZE(element_size);
1373
1374 pipe_resource_reference(&buffers->buffers[slot], buffer);
1375 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1376 (struct r600_resource*)buffer,
1377 buffers->shader_usage, buffers->priority);
1378 buffers->enabled_mask |= 1u << slot;
1379 } else {
1380 /* Clear the descriptor. */
1381 memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
1382 buffers->enabled_mask &= ~(1u << slot);
1383 }
1384
1385 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
1386 }
1387
1388 static void si_desc_reset_buffer_offset(struct pipe_context *ctx,
1389 uint32_t *desc, uint64_t old_buf_va,
1390 struct pipe_resource *new_buf)
1391 {
1392 /* Retrieve the buffer offset from the descriptor. */
1393 uint64_t old_desc_va = si_desc_extract_buffer_address(desc);
1394
1395 assert(old_buf_va <= old_desc_va);
1396 uint64_t offset_within_buffer = old_desc_va - old_buf_va;
1397
1398 /* Update the descriptor. */
1399 si_set_buf_desc_address(r600_resource(new_buf), offset_within_buffer,
1400 desc);
1401 }
1402
1403 /* INTERNAL CONST BUFFERS */
1404
1405 static void si_set_polygon_stipple(struct pipe_context *ctx,
1406 const struct pipe_poly_stipple *state)
1407 {
1408 struct si_context *sctx = (struct si_context *)ctx;
1409 struct pipe_constant_buffer cb = {};
1410 unsigned stipple[32];
1411 int i;
1412
1413 for (i = 0; i < 32; i++)
1414 stipple[i] = util_bitreverse(state->stipple[i]);
1415
1416 cb.user_buffer = stipple;
1417 cb.buffer_size = sizeof(stipple);
1418
1419 si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &cb);
1420 }
1421
1422 /* TEXTURE METADATA ENABLE/DISABLE */
1423
1424 static void
1425 si_resident_handles_update_needs_color_decompress(struct si_context *sctx)
1426 {
1427 util_dynarray_clear(&sctx->resident_tex_needs_color_decompress);
1428 util_dynarray_clear(&sctx->resident_img_needs_color_decompress);
1429
1430 util_dynarray_foreach(&sctx->resident_tex_handles,
1431 struct si_texture_handle *, tex_handle) {
1432 struct pipe_resource *res = (*tex_handle)->view->texture;
1433 struct r600_texture *rtex;
1434
1435 if (!res || res->target == PIPE_BUFFER)
1436 continue;
1437
1438 rtex = (struct r600_texture *)res;
1439 if (!color_needs_decompression(rtex))
1440 continue;
1441
1442 util_dynarray_append(&sctx->resident_tex_needs_color_decompress,
1443 struct si_texture_handle *, *tex_handle);
1444 }
1445
1446 util_dynarray_foreach(&sctx->resident_img_handles,
1447 struct si_image_handle *, img_handle) {
1448 struct pipe_image_view *view = &(*img_handle)->view;
1449 struct pipe_resource *res = view->resource;
1450 struct r600_texture *rtex;
1451
1452 if (!res || res->target == PIPE_BUFFER)
1453 continue;
1454
1455 rtex = (struct r600_texture *)res;
1456 if (!color_needs_decompression(rtex))
1457 continue;
1458
1459 util_dynarray_append(&sctx->resident_img_needs_color_decompress,
1460 struct si_image_handle *, *img_handle);
1461 }
1462 }
1463
1464 /* CMASK can be enabled (for fast clear) and disabled (for texture export)
1465 * while the texture is bound, possibly by a different context. In that case,
1466 * call this function to update needs_*_decompress_masks.
1467 */
1468 void si_update_needs_color_decompress_masks(struct si_context *sctx)
1469 {
1470 for (int i = 0; i < SI_NUM_SHADERS; ++i) {
1471 si_samplers_update_needs_color_decompress_mask(&sctx->samplers[i]);
1472 si_images_update_needs_color_decompress_mask(&sctx->images[i]);
1473 si_update_shader_needs_decompress_mask(sctx, i);
1474 }
1475
1476 si_resident_handles_update_needs_color_decompress(sctx);
1477 }
1478
1479 /* BUFFER DISCARD/INVALIDATION */
1480
1481 /** Reset descriptors of buffer resources after \p buf has been invalidated. */
1482 static void si_reset_buffer_resources(struct si_context *sctx,
1483 struct si_buffer_resources *buffers,
1484 unsigned descriptors_idx,
1485 unsigned slot_mask,
1486 struct pipe_resource *buf,
1487 uint64_t old_va,
1488 enum radeon_bo_usage usage,
1489 enum radeon_bo_priority priority)
1490 {
1491 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1492 unsigned mask = buffers->enabled_mask & slot_mask;
1493
1494 while (mask) {
1495 unsigned i = u_bit_scan(&mask);
1496 if (buffers->buffers[i] == buf) {
1497 si_desc_reset_buffer_offset(&sctx->b.b,
1498 descs->list + i*4,
1499 old_va, buf);
1500 sctx->descriptors_dirty |= 1u << descriptors_idx;
1501
1502 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1503 (struct r600_resource *)buf,
1504 usage, priority, true);
1505 }
1506 }
1507 }
1508
1509 static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf,
1510 uint64_t old_va)
1511 {
1512 struct si_context *sctx = (struct si_context*)ctx;
1513 struct r600_resource *rbuffer = r600_resource(buf);
1514 unsigned i, shader;
1515 unsigned num_elems = sctx->vertex_elements ?
1516 sctx->vertex_elements->count : 0;
1517
1518 /* We changed the buffer, now we need to bind it where the old one
1519 * was bound. This consists of 2 things:
1520 * 1) Updating the resource descriptor and dirtying it.
1521 * 2) Adding a relocation to the CS, so that it's usable.
1522 */
1523
1524 /* Vertex buffers. */
1525 if (rbuffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
1526 for (i = 0; i < num_elems; i++) {
1527 int vb = sctx->vertex_elements->vertex_buffer_index[i];
1528
1529 if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
1530 continue;
1531 if (!sctx->vertex_buffer[vb].buffer.resource)
1532 continue;
1533
1534 if (sctx->vertex_buffer[vb].buffer.resource == buf) {
1535 sctx->vertex_buffers_dirty = true;
1536 break;
1537 }
1538 }
1539 }
1540
1541 /* Streamout buffers. (other internal buffers can't be invalidated) */
1542 if (rbuffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
1543 for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
1544 struct si_buffer_resources *buffers = &sctx->rw_buffers;
1545 struct si_descriptors *descs =
1546 &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1547
1548 if (buffers->buffers[i] != buf)
1549 continue;
1550
1551 si_desc_reset_buffer_offset(ctx, descs->list + i*4,
1552 old_va, buf);
1553 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
1554
1555 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1556 rbuffer, buffers->shader_usage,
1557 RADEON_PRIO_SHADER_RW_BUFFER,
1558 true);
1559
1560 /* Update the streamout state. */
1561 if (sctx->streamout.begin_emitted)
1562 si_emit_streamout_end(sctx);
1563 sctx->streamout.append_bitmask =
1564 sctx->streamout.enabled_mask;
1565 si_streamout_buffers_dirty(sctx);
1566 }
1567 }
1568
1569 /* Constant and shader buffers. */
1570 if (rbuffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
1571 for (shader = 0; shader < SI_NUM_SHADERS; shader++)
1572 si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
1573 si_const_and_shader_buffer_descriptors_idx(shader),
1574 u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
1575 buf, old_va,
1576 sctx->const_and_shader_buffers[shader].shader_usage_constbuf,
1577 sctx->const_and_shader_buffers[shader].priority_constbuf);
1578 }
1579
1580 if (rbuffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
1581 for (shader = 0; shader < SI_NUM_SHADERS; shader++)
1582 si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
1583 si_const_and_shader_buffer_descriptors_idx(shader),
1584 u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS),
1585 buf, old_va,
1586 sctx->const_and_shader_buffers[shader].shader_usage,
1587 sctx->const_and_shader_buffers[shader].priority);
1588 }
1589
1590 if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
1591 /* Texture buffers - update bindings. */
1592 for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
1593 struct si_samplers *samplers = &sctx->samplers[shader];
1594 struct si_descriptors *descs =
1595 si_sampler_and_image_descriptors(sctx, shader);
1596 unsigned mask = samplers->enabled_mask;
1597
1598 while (mask) {
1599 unsigned i = u_bit_scan(&mask);
1600 if (samplers->views[i]->texture == buf) {
1601 unsigned desc_slot = si_get_sampler_slot(i);
1602
1603 si_desc_reset_buffer_offset(ctx,
1604 descs->list +
1605 desc_slot * 16 + 4,
1606 old_va, buf);
1607 sctx->descriptors_dirty |=
1608 1u << si_sampler_and_image_descriptors_idx(shader);
1609
1610 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1611 rbuffer, RADEON_USAGE_READ,
1612 RADEON_PRIO_SAMPLER_BUFFER,
1613 true);
1614 }
1615 }
1616 }
1617 }
1618
1619 /* Shader images */
1620 if (rbuffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
1621 for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
1622 struct si_images *images = &sctx->images[shader];
1623 struct si_descriptors *descs =
1624 si_sampler_and_image_descriptors(sctx, shader);
1625 unsigned mask = images->enabled_mask;
1626
1627 while (mask) {
1628 unsigned i = u_bit_scan(&mask);
1629
1630 if (images->views[i].resource == buf) {
1631 unsigned desc_slot = si_get_image_slot(i);
1632
1633 if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
1634 si_mark_image_range_valid(&images->views[i]);
1635
1636 si_desc_reset_buffer_offset(
1637 ctx, descs->list + desc_slot * 8 + 4,
1638 old_va, buf);
1639 sctx->descriptors_dirty |=
1640 1u << si_sampler_and_image_descriptors_idx(shader);
1641
1642 radeon_add_to_buffer_list_check_mem(
1643 &sctx->b, &sctx->b.gfx, rbuffer,
1644 RADEON_USAGE_READWRITE,
1645 RADEON_PRIO_SAMPLER_BUFFER, true);
1646 }
1647 }
1648 }
1649 }
1650
1651 /* Bindless texture handles */
1652 if (rbuffer->texture_handle_allocated) {
1653 struct si_descriptors *descs = &sctx->bindless_descriptors;
1654
1655 util_dynarray_foreach(&sctx->resident_tex_handles,
1656 struct si_texture_handle *, tex_handle) {
1657 struct pipe_sampler_view *view = (*tex_handle)->view;
1658 unsigned desc_slot = (*tex_handle)->desc_slot;
1659
1660 if (view->texture == buf) {
1661 si_set_buf_desc_address(rbuffer,
1662 view->u.buf.offset,
1663 descs->list +
1664 desc_slot * 16 + 4);
1665
1666 (*tex_handle)->desc_dirty = true;
1667 sctx->bindless_descriptors_dirty = true;
1668
1669 radeon_add_to_buffer_list_check_mem(
1670 &sctx->b, &sctx->b.gfx, rbuffer,
1671 RADEON_USAGE_READ,
1672 RADEON_PRIO_SAMPLER_BUFFER, true);
1673 }
1674 }
1675 }
1676
1677 /* Bindless image handles */
1678 if (rbuffer->image_handle_allocated) {
1679 struct si_descriptors *descs = &sctx->bindless_descriptors;
1680
1681 util_dynarray_foreach(&sctx->resident_img_handles,
1682 struct si_image_handle *, img_handle) {
1683 struct pipe_image_view *view = &(*img_handle)->view;
1684 unsigned desc_slot = (*img_handle)->desc_slot;
1685
1686 if (view->resource == buf) {
1687 if (view->access & PIPE_IMAGE_ACCESS_WRITE)
1688 si_mark_image_range_valid(view);
1689
1690 si_set_buf_desc_address(rbuffer,
1691 view->u.buf.offset,
1692 descs->list +
1693 desc_slot * 16 + 4);
1694
1695 (*img_handle)->desc_dirty = true;
1696 sctx->bindless_descriptors_dirty = true;
1697
1698 radeon_add_to_buffer_list_check_mem(
1699 &sctx->b, &sctx->b.gfx, rbuffer,
1700 RADEON_USAGE_READWRITE,
1701 RADEON_PRIO_SAMPLER_BUFFER, true);
1702 }
1703 }
1704 }
1705 }
1706
1707 /* Reallocate a buffer a update all resource bindings where the buffer is
1708 * bound.
1709 *
1710 * This is used to avoid CPU-GPU synchronizations, because it makes the buffer
1711 * idle by discarding its contents. Apps usually tell us when to do this using
1712 * map_buffer flags, for example.
1713 */
1714 static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf)
1715 {
1716 struct si_context *sctx = (struct si_context*)ctx;
1717 struct r600_resource *rbuffer = r600_resource(buf);
1718 uint64_t old_va = rbuffer->gpu_address;
1719
1720 /* Reallocate the buffer in the same pipe_resource. */
1721 si_alloc_resource(&sctx->screen->b, rbuffer);
1722
1723 si_rebind_buffer(ctx, buf, old_va);
1724 }
1725
1726 static void si_upload_bindless_descriptor(struct si_context *sctx,
1727 unsigned desc_slot,
1728 unsigned num_dwords)
1729 {
1730 struct si_descriptors *desc = &sctx->bindless_descriptors;
1731 struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
1732 unsigned desc_slot_offset = desc_slot * 16;
1733 uint32_t *data;
1734 uint64_t va;
1735
1736 data = desc->list + desc_slot_offset;
1737 va = desc->gpu_address + desc_slot_offset * 4;
1738
1739 radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + num_dwords, 0));
1740 radeon_emit(cs, S_370_DST_SEL(V_370_TC_L2) |
1741 S_370_WR_CONFIRM(1) |
1742 S_370_ENGINE_SEL(V_370_ME));
1743 radeon_emit(cs, va);
1744 radeon_emit(cs, va >> 32);
1745 radeon_emit_array(cs, data, num_dwords);
1746 }
1747
1748 static void si_upload_bindless_descriptors(struct si_context *sctx)
1749 {
1750 if (!sctx->bindless_descriptors_dirty)
1751 return;
1752
1753 /* Wait for graphics/compute to be idle before updating the resident
1754 * descriptors directly in memory, in case the GPU is using them.
1755 */
1756 sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
1757 SI_CONTEXT_CS_PARTIAL_FLUSH;
1758 si_emit_cache_flush(sctx);
1759
1760 util_dynarray_foreach(&sctx->resident_tex_handles,
1761 struct si_texture_handle *, tex_handle) {
1762 unsigned desc_slot = (*tex_handle)->desc_slot;
1763
1764 if (!(*tex_handle)->desc_dirty)
1765 continue;
1766
1767 si_upload_bindless_descriptor(sctx, desc_slot, 16);
1768 (*tex_handle)->desc_dirty = false;
1769 }
1770
1771 util_dynarray_foreach(&sctx->resident_img_handles,
1772 struct si_image_handle *, img_handle) {
1773 unsigned desc_slot = (*img_handle)->desc_slot;
1774
1775 if (!(*img_handle)->desc_dirty)
1776 continue;
1777
1778 si_upload_bindless_descriptor(sctx, desc_slot, 8);
1779 (*img_handle)->desc_dirty = false;
1780 }
1781
1782 /* Invalidate L1 because it doesn't know that L2 changed. */
1783 sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1;
1784 si_emit_cache_flush(sctx);
1785
1786 sctx->bindless_descriptors_dirty = false;
1787 }
1788
1789 /* Update mutable image descriptor fields of all resident textures. */
1790 static void si_update_bindless_texture_descriptor(struct si_context *sctx,
1791 struct si_texture_handle *tex_handle)
1792 {
1793 struct si_sampler_view *sview = (struct si_sampler_view *)tex_handle->view;
1794 struct si_descriptors *desc = &sctx->bindless_descriptors;
1795 unsigned desc_slot_offset = tex_handle->desc_slot * 16;
1796 uint32_t desc_list[16];
1797
1798 if (sview->base.texture->target == PIPE_BUFFER)
1799 return;
1800
1801 memcpy(desc_list, desc->list + desc_slot_offset, sizeof(desc_list));
1802 si_set_sampler_view_desc(sctx, sview, &tex_handle->sstate,
1803 desc->list + desc_slot_offset);
1804
1805 if (memcmp(desc_list, desc->list + desc_slot_offset,
1806 sizeof(desc_list))) {
1807 tex_handle->desc_dirty = true;
1808 sctx->bindless_descriptors_dirty = true;
1809 }
1810 }
1811
1812 static void si_update_bindless_image_descriptor(struct si_context *sctx,
1813 struct si_image_handle *img_handle)
1814 {
1815 struct si_descriptors *desc = &sctx->bindless_descriptors;
1816 unsigned desc_slot_offset = img_handle->desc_slot * 16;
1817 struct pipe_image_view *view = &img_handle->view;
1818 uint32_t desc_list[8];
1819
1820 if (view->resource->target == PIPE_BUFFER)
1821 return;
1822
1823 memcpy(desc_list, desc->list + desc_slot_offset,
1824 sizeof(desc_list));
1825 si_set_shader_image_desc(sctx, view, true,
1826 desc->list + desc_slot_offset);
1827
1828 if (memcmp(desc_list, desc->list + desc_slot_offset,
1829 sizeof(desc_list))) {
1830 img_handle->desc_dirty = true;
1831 sctx->bindless_descriptors_dirty = true;
1832 }
1833 }
1834
1835 static void si_update_all_resident_texture_descriptors(struct si_context *sctx)
1836 {
1837 util_dynarray_foreach(&sctx->resident_tex_handles,
1838 struct si_texture_handle *, tex_handle) {
1839 si_update_bindless_texture_descriptor(sctx, *tex_handle);
1840 }
1841
1842 util_dynarray_foreach(&sctx->resident_img_handles,
1843 struct si_image_handle *, img_handle) {
1844 si_update_bindless_image_descriptor(sctx, *img_handle);
1845 }
1846
1847 si_upload_bindless_descriptors(sctx);
1848 }
1849
1850 /* Update mutable image descriptor fields of all bound textures. */
1851 void si_update_all_texture_descriptors(struct si_context *sctx)
1852 {
1853 unsigned shader;
1854
1855 for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
1856 struct si_samplers *samplers = &sctx->samplers[shader];
1857 struct si_images *images = &sctx->images[shader];
1858 unsigned mask;
1859
1860 /* Images. */
1861 mask = images->enabled_mask;
1862 while (mask) {
1863 unsigned i = u_bit_scan(&mask);
1864 struct pipe_image_view *view = &images->views[i];
1865
1866 if (!view->resource ||
1867 view->resource->target == PIPE_BUFFER)
1868 continue;
1869
1870 si_set_shader_image(sctx, shader, i, view, true);
1871 }
1872
1873 /* Sampler views. */
1874 mask = samplers->enabled_mask;
1875 while (mask) {
1876 unsigned i = u_bit_scan(&mask);
1877 struct pipe_sampler_view *view = samplers->views[i];
1878
1879 if (!view ||
1880 !view->texture ||
1881 view->texture->target == PIPE_BUFFER)
1882 continue;
1883
1884 si_set_sampler_view(sctx, shader, i,
1885 samplers->views[i], true);
1886 }
1887
1888 si_update_shader_needs_decompress_mask(sctx, shader);
1889 }
1890
1891 si_update_all_resident_texture_descriptors(sctx);
1892 }
1893
1894 /* SHADER USER DATA */
1895
1896 static void si_mark_shader_pointers_dirty(struct si_context *sctx,
1897 unsigned shader)
1898 {
1899 sctx->shader_pointers_dirty |=
1900 u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS,
1901 SI_NUM_SHADER_DESCS);
1902
1903 if (shader == PIPE_SHADER_VERTEX)
1904 sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
1905
1906 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
1907 }
1908
1909 static void si_shader_pointers_begin_new_cs(struct si_context *sctx)
1910 {
1911 sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
1912 sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
1913 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
1914 sctx->graphics_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
1915 sctx->compute_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
1916 }
1917
1918 /* Set a base register address for user data constants in the given shader.
1919 * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*.
1920 */
1921 static void si_set_user_data_base(struct si_context *sctx,
1922 unsigned shader, uint32_t new_base)
1923 {
1924 uint32_t *base = &sctx->shader_pointers.sh_base[shader];
1925
1926 if (*base != new_base) {
1927 *base = new_base;
1928
1929 if (new_base) {
1930 si_mark_shader_pointers_dirty(sctx, shader);
1931
1932 if (shader == PIPE_SHADER_VERTEX)
1933 sctx->last_vs_state = ~0;
1934 }
1935 }
1936 }
1937
1938 /* This must be called when these shaders are changed from non-NULL to NULL
1939 * and vice versa:
1940 * - geometry shader
1941 * - tessellation control shader
1942 * - tessellation evaluation shader
1943 */
1944 void si_shader_change_notify(struct si_context *sctx)
1945 {
1946 /* VS can be bound as VS, ES, or LS. */
1947 if (sctx->tes_shader.cso) {
1948 if (sctx->b.chip_class >= GFX9) {
1949 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1950 R_00B430_SPI_SHADER_USER_DATA_LS_0);
1951 } else {
1952 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1953 R_00B530_SPI_SHADER_USER_DATA_LS_0);
1954 }
1955 } else if (sctx->gs_shader.cso) {
1956 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1957 R_00B330_SPI_SHADER_USER_DATA_ES_0);
1958 } else {
1959 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1960 R_00B130_SPI_SHADER_USER_DATA_VS_0);
1961 }
1962
1963 /* TES can be bound as ES, VS, or not bound. */
1964 if (sctx->tes_shader.cso) {
1965 if (sctx->gs_shader.cso)
1966 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
1967 R_00B330_SPI_SHADER_USER_DATA_ES_0);
1968 else
1969 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
1970 R_00B130_SPI_SHADER_USER_DATA_VS_0);
1971 } else {
1972 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0);
1973 }
1974 }
1975
1976 static void si_emit_shader_pointer_head(struct radeon_winsys_cs *cs,
1977 struct si_descriptors *desc,
1978 unsigned sh_base,
1979 unsigned pointer_count)
1980 {
1981 radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * 2, 0));
1982 radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2);
1983 }
1984
1985 static void si_emit_shader_pointer_body(struct radeon_winsys_cs *cs,
1986 struct si_descriptors *desc)
1987 {
1988 uint64_t va = desc->gpu_address;
1989
1990 radeon_emit(cs, va);
1991 radeon_emit(cs, va >> 32);
1992 }
1993
1994 static void si_emit_shader_pointer(struct si_context *sctx,
1995 struct si_descriptors *desc,
1996 unsigned sh_base)
1997 {
1998 struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
1999
2000 si_emit_shader_pointer_head(cs, desc, sh_base, 1);
2001 si_emit_shader_pointer_body(cs, desc);
2002 }
2003
2004 static void si_emit_consecutive_shader_pointers(struct si_context *sctx,
2005 unsigned pointer_mask,
2006 unsigned sh_base)
2007 {
2008 if (!sh_base)
2009 return;
2010
2011 struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2012 unsigned mask = sctx->shader_pointers_dirty & pointer_mask;
2013
2014 while (mask) {
2015 int start, count;
2016 u_bit_scan_consecutive_range(&mask, &start, &count);
2017
2018 struct si_descriptors *descs = &sctx->descriptors[start];
2019
2020 si_emit_shader_pointer_head(cs, descs, sh_base, count);
2021 for (int i = 0; i < count; i++)
2022 si_emit_shader_pointer_body(cs, descs + i);
2023 }
2024 }
2025
2026 static void si_emit_global_shader_pointers(struct si_context *sctx,
2027 struct si_descriptors *descs)
2028 {
2029 if (sctx->b.chip_class == GFX9) {
2030 /* Broadcast it to all shader stages. */
2031 si_emit_shader_pointer(sctx, descs,
2032 R_00B530_SPI_SHADER_USER_DATA_COMMON_0);
2033 return;
2034 }
2035
2036 si_emit_shader_pointer(sctx, descs,
2037 R_00B030_SPI_SHADER_USER_DATA_PS_0);
2038 si_emit_shader_pointer(sctx, descs,
2039 R_00B130_SPI_SHADER_USER_DATA_VS_0);
2040 si_emit_shader_pointer(sctx, descs,
2041 R_00B330_SPI_SHADER_USER_DATA_ES_0);
2042 si_emit_shader_pointer(sctx, descs,
2043 R_00B230_SPI_SHADER_USER_DATA_GS_0);
2044 si_emit_shader_pointer(sctx, descs,
2045 R_00B430_SPI_SHADER_USER_DATA_HS_0);
2046 si_emit_shader_pointer(sctx, descs,
2047 R_00B530_SPI_SHADER_USER_DATA_LS_0);
2048 }
2049
2050 void si_emit_graphics_shader_pointers(struct si_context *sctx,
2051 struct r600_atom *atom)
2052 {
2053 uint32_t *sh_base = sctx->shader_pointers.sh_base;
2054
2055 if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) {
2056 si_emit_global_shader_pointers(sctx,
2057 &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
2058 }
2059
2060 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX),
2061 sh_base[PIPE_SHADER_VERTEX]);
2062 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
2063 sh_base[PIPE_SHADER_TESS_CTRL]);
2064 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_EVAL),
2065 sh_base[PIPE_SHADER_TESS_EVAL]);
2066 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
2067 sh_base[PIPE_SHADER_GEOMETRY]);
2068 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT),
2069 sh_base[PIPE_SHADER_FRAGMENT]);
2070
2071 sctx->shader_pointers_dirty &=
2072 ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
2073
2074 if (sctx->vertex_buffer_pointer_dirty) {
2075 si_emit_shader_pointer(sctx, &sctx->vertex_buffers,
2076 sh_base[PIPE_SHADER_VERTEX]);
2077 sctx->vertex_buffer_pointer_dirty = false;
2078 }
2079
2080 if (sctx->graphics_bindless_pointer_dirty) {
2081 si_emit_global_shader_pointers(sctx,
2082 &sctx->bindless_descriptors);
2083 sctx->graphics_bindless_pointer_dirty = false;
2084 }
2085 }
2086
2087 void si_emit_compute_shader_pointers(struct si_context *sctx)
2088 {
2089 unsigned base = R_00B900_COMPUTE_USER_DATA_0;
2090
2091 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE),
2092 R_00B900_COMPUTE_USER_DATA_0);
2093 sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE);
2094
2095 if (sctx->compute_bindless_pointer_dirty) {
2096 si_emit_shader_pointer(sctx, &sctx->bindless_descriptors, base);
2097 sctx->compute_bindless_pointer_dirty = false;
2098 }
2099 }
2100
2101 /* BINDLESS */
2102
2103 static void si_init_bindless_descriptors(struct si_context *sctx,
2104 struct si_descriptors *desc,
2105 unsigned shader_userdata_index,
2106 unsigned num_elements)
2107 {
2108 MAYBE_UNUSED unsigned desc_slot;
2109
2110 si_init_descriptors(desc, shader_userdata_index, 16, num_elements);
2111 sctx->bindless_descriptors.num_active_slots = num_elements;
2112
2113 /* The first bindless descriptor is stored at slot 1, because 0 is not
2114 * considered to be a valid handle.
2115 */
2116 sctx->num_bindless_descriptors = 1;
2117
2118 /* Track which bindless slots are used (or not). */
2119 util_idalloc_init(&sctx->bindless_used_slots);
2120 util_idalloc_resize(&sctx->bindless_used_slots, num_elements);
2121
2122 /* Reserve slot 0 because it's an invalid handle for bindless. */
2123 desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots);
2124 assert(desc_slot == 0);
2125 }
2126
2127 static void si_release_bindless_descriptors(struct si_context *sctx)
2128 {
2129 si_release_descriptors(&sctx->bindless_descriptors);
2130 util_idalloc_fini(&sctx->bindless_used_slots);
2131 }
2132
2133 static unsigned si_get_first_free_bindless_slot(struct si_context *sctx)
2134 {
2135 struct si_descriptors *desc = &sctx->bindless_descriptors;
2136 unsigned desc_slot;
2137
2138 desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots);
2139 if (desc_slot >= desc->num_elements) {
2140 /* The array of bindless descriptors is full, resize it. */
2141 unsigned slot_size = desc->element_dw_size * 4;
2142 unsigned new_num_elements = desc->num_elements * 2;
2143
2144 desc->list = REALLOC(desc->list, desc->num_elements * slot_size,
2145 new_num_elements * slot_size);
2146 desc->num_elements = new_num_elements;
2147 desc->num_active_slots = new_num_elements;
2148 }
2149
2150 assert(desc_slot);
2151 return desc_slot;
2152 }
2153
2154 static unsigned
2155 si_create_bindless_descriptor(struct si_context *sctx, uint32_t *desc_list,
2156 unsigned size)
2157 {
2158 struct si_descriptors *desc = &sctx->bindless_descriptors;
2159 unsigned desc_slot, desc_slot_offset;
2160
2161 /* Find a free slot. */
2162 desc_slot = si_get_first_free_bindless_slot(sctx);
2163
2164 /* For simplicity, sampler and image bindless descriptors use fixed
2165 * 16-dword slots for now. Image descriptors only need 8-dword but this
2166 * doesn't really matter because no real apps use image handles.
2167 */
2168 desc_slot_offset = desc_slot * 16;
2169
2170 /* Copy the descriptor into the array. */
2171 memcpy(desc->list + desc_slot_offset, desc_list, size);
2172
2173 /* Re-upload the whole array of bindless descriptors into a new buffer.
2174 */
2175 if (!si_upload_descriptors(sctx, desc))
2176 return 0;
2177
2178 /* Make sure to re-emit the shader pointers for all stages. */
2179 sctx->graphics_bindless_pointer_dirty = true;
2180 sctx->compute_bindless_pointer_dirty = true;
2181
2182 return desc_slot;
2183 }
2184
2185 static void si_update_bindless_buffer_descriptor(struct si_context *sctx,
2186 unsigned desc_slot,
2187 struct pipe_resource *resource,
2188 uint64_t offset,
2189 bool *desc_dirty)
2190 {
2191 struct si_descriptors *desc = &sctx->bindless_descriptors;
2192 struct r600_resource *buf = r600_resource(resource);
2193 unsigned desc_slot_offset = desc_slot * 16;
2194 uint32_t *desc_list = desc->list + desc_slot_offset + 4;
2195 uint64_t old_desc_va;
2196
2197 assert(resource->target == PIPE_BUFFER);
2198
2199 /* Retrieve the old buffer addr from the descriptor. */
2200 old_desc_va = si_desc_extract_buffer_address(desc_list);
2201
2202 if (old_desc_va != buf->gpu_address + offset) {
2203 /* The buffer has been invalidated when the handle wasn't
2204 * resident, update the descriptor and the dirty flag.
2205 */
2206 si_set_buf_desc_address(buf, offset, &desc_list[0]);
2207
2208 *desc_dirty = true;
2209 }
2210 }
2211
2212 static uint64_t si_create_texture_handle(struct pipe_context *ctx,
2213 struct pipe_sampler_view *view,
2214 const struct pipe_sampler_state *state)
2215 {
2216 struct si_sampler_view *sview = (struct si_sampler_view *)view;
2217 struct si_context *sctx = (struct si_context *)ctx;
2218 struct si_texture_handle *tex_handle;
2219 struct si_sampler_state *sstate;
2220 uint32_t desc_list[16];
2221 uint64_t handle;
2222
2223 tex_handle = CALLOC_STRUCT(si_texture_handle);
2224 if (!tex_handle)
2225 return 0;
2226
2227 memset(desc_list, 0, sizeof(desc_list));
2228 si_init_descriptor_list(&desc_list[0], 16, 1, null_texture_descriptor);
2229
2230 sstate = ctx->create_sampler_state(ctx, state);
2231 if (!sstate) {
2232 FREE(tex_handle);
2233 return 0;
2234 }
2235
2236 si_set_sampler_view_desc(sctx, sview, sstate, &desc_list[0]);
2237 memcpy(&tex_handle->sstate, sstate, sizeof(*sstate));
2238 ctx->delete_sampler_state(ctx, sstate);
2239
2240 tex_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list,
2241 sizeof(desc_list));
2242 if (!tex_handle->desc_slot) {
2243 FREE(tex_handle);
2244 return 0;
2245 }
2246
2247 handle = tex_handle->desc_slot;
2248
2249 if (!_mesa_hash_table_insert(sctx->tex_handles, (void *)handle,
2250 tex_handle)) {
2251 FREE(tex_handle);
2252 return 0;
2253 }
2254
2255 pipe_sampler_view_reference(&tex_handle->view, view);
2256
2257 r600_resource(sview->base.texture)->texture_handle_allocated = true;
2258
2259 return handle;
2260 }
2261
2262 static void si_delete_texture_handle(struct pipe_context *ctx, uint64_t handle)
2263 {
2264 struct si_context *sctx = (struct si_context *)ctx;
2265 struct si_texture_handle *tex_handle;
2266 struct hash_entry *entry;
2267
2268 entry = _mesa_hash_table_search(sctx->tex_handles, (void *)handle);
2269 if (!entry)
2270 return;
2271
2272 tex_handle = (struct si_texture_handle *)entry->data;
2273
2274 /* Allow this descriptor slot to be re-used. */
2275 util_idalloc_free(&sctx->bindless_used_slots, tex_handle->desc_slot);
2276
2277 pipe_sampler_view_reference(&tex_handle->view, NULL);
2278 _mesa_hash_table_remove(sctx->tex_handles, entry);
2279 FREE(tex_handle);
2280 }
2281
2282 static void si_make_texture_handle_resident(struct pipe_context *ctx,
2283 uint64_t handle, bool resident)
2284 {
2285 struct si_context *sctx = (struct si_context *)ctx;
2286 struct si_texture_handle *tex_handle;
2287 struct si_sampler_view *sview;
2288 struct hash_entry *entry;
2289
2290 entry = _mesa_hash_table_search(sctx->tex_handles, (void *)handle);
2291 if (!entry)
2292 return;
2293
2294 tex_handle = (struct si_texture_handle *)entry->data;
2295 sview = (struct si_sampler_view *)tex_handle->view;
2296
2297 if (resident) {
2298 if (sview->base.texture->target != PIPE_BUFFER) {
2299 struct r600_texture *rtex =
2300 (struct r600_texture *)sview->base.texture;
2301
2302 if (depth_needs_decompression(rtex)) {
2303 util_dynarray_append(
2304 &sctx->resident_tex_needs_depth_decompress,
2305 struct si_texture_handle *,
2306 tex_handle);
2307 }
2308
2309 if (color_needs_decompression(rtex)) {
2310 util_dynarray_append(
2311 &sctx->resident_tex_needs_color_decompress,
2312 struct si_texture_handle *,
2313 tex_handle);
2314 }
2315
2316 if (rtex->dcc_offset &&
2317 p_atomic_read(&rtex->framebuffers_bound))
2318 sctx->need_check_render_feedback = true;
2319
2320 si_update_bindless_texture_descriptor(sctx, tex_handle);
2321 } else {
2322 si_update_bindless_buffer_descriptor(sctx,
2323 tex_handle->desc_slot,
2324 sview->base.texture,
2325 sview->base.u.buf.offset,
2326 &tex_handle->desc_dirty);
2327 }
2328
2329 /* Re-upload the descriptor if it has been updated while it
2330 * wasn't resident.
2331 */
2332 if (tex_handle->desc_dirty)
2333 sctx->bindless_descriptors_dirty = true;
2334
2335 /* Add the texture handle to the per-context list. */
2336 util_dynarray_append(&sctx->resident_tex_handles,
2337 struct si_texture_handle *, tex_handle);
2338
2339 /* Add the buffers to the current CS in case si_begin_new_cs()
2340 * is not going to be called.
2341 */
2342 si_sampler_view_add_buffer(sctx, sview->base.texture,
2343 RADEON_USAGE_READ,
2344 sview->is_stencil_sampler, false);
2345 } else {
2346 /* Remove the texture handle from the per-context list. */
2347 util_dynarray_delete_unordered(&sctx->resident_tex_handles,
2348 struct si_texture_handle *,
2349 tex_handle);
2350
2351 if (sview->base.texture->target != PIPE_BUFFER) {
2352 util_dynarray_delete_unordered(
2353 &sctx->resident_tex_needs_depth_decompress,
2354 struct si_texture_handle *, tex_handle);
2355
2356 util_dynarray_delete_unordered(
2357 &sctx->resident_tex_needs_color_decompress,
2358 struct si_texture_handle *, tex_handle);
2359 }
2360 }
2361 }
2362
2363 static uint64_t si_create_image_handle(struct pipe_context *ctx,
2364 const struct pipe_image_view *view)
2365 {
2366 struct si_context *sctx = (struct si_context *)ctx;
2367 struct si_image_handle *img_handle;
2368 uint32_t desc_list[8];
2369 uint64_t handle;
2370
2371 if (!view || !view->resource)
2372 return 0;
2373
2374 img_handle = CALLOC_STRUCT(si_image_handle);
2375 if (!img_handle)
2376 return 0;
2377
2378 memset(desc_list, 0, sizeof(desc_list));
2379 si_init_descriptor_list(&desc_list[0], 8, 1, null_image_descriptor);
2380
2381 si_set_shader_image_desc(sctx, view, false, &desc_list[0]);
2382
2383 img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list,
2384 sizeof(desc_list));
2385 if (!img_handle->desc_slot) {
2386 FREE(img_handle);
2387 return 0;
2388 }
2389
2390 handle = img_handle->desc_slot;
2391
2392 if (!_mesa_hash_table_insert(sctx->img_handles, (void *)handle,
2393 img_handle)) {
2394 FREE(img_handle);
2395 return 0;
2396 }
2397
2398 util_copy_image_view(&img_handle->view, view);
2399
2400 r600_resource(view->resource)->image_handle_allocated = true;
2401
2402 return handle;
2403 }
2404
2405 static void si_delete_image_handle(struct pipe_context *ctx, uint64_t handle)
2406 {
2407 struct si_context *sctx = (struct si_context *)ctx;
2408 struct si_image_handle *img_handle;
2409 struct hash_entry *entry;
2410
2411 entry = _mesa_hash_table_search(sctx->img_handles, (void *)handle);
2412 if (!entry)
2413 return;
2414
2415 img_handle = (struct si_image_handle *)entry->data;
2416
2417 util_copy_image_view(&img_handle->view, NULL);
2418 _mesa_hash_table_remove(sctx->img_handles, entry);
2419 FREE(img_handle);
2420 }
2421
2422 static void si_make_image_handle_resident(struct pipe_context *ctx,
2423 uint64_t handle, unsigned access,
2424 bool resident)
2425 {
2426 struct si_context *sctx = (struct si_context *)ctx;
2427 struct si_image_handle *img_handle;
2428 struct pipe_image_view *view;
2429 struct r600_resource *res;
2430 struct hash_entry *entry;
2431
2432 entry = _mesa_hash_table_search(sctx->img_handles, (void *)handle);
2433 if (!entry)
2434 return;
2435
2436 img_handle = (struct si_image_handle *)entry->data;
2437 view = &img_handle->view;
2438 res = (struct r600_resource *)view->resource;
2439
2440 if (resident) {
2441 if (res->b.b.target != PIPE_BUFFER) {
2442 struct r600_texture *rtex = (struct r600_texture *)res;
2443 unsigned level = view->u.tex.level;
2444
2445 if (color_needs_decompression(rtex)) {
2446 util_dynarray_append(
2447 &sctx->resident_img_needs_color_decompress,
2448 struct si_image_handle *,
2449 img_handle);
2450 }
2451
2452 if (vi_dcc_enabled(rtex, level) &&
2453 p_atomic_read(&rtex->framebuffers_bound))
2454 sctx->need_check_render_feedback = true;
2455
2456 si_update_bindless_image_descriptor(sctx, img_handle);
2457 } else {
2458 si_update_bindless_buffer_descriptor(sctx,
2459 img_handle->desc_slot,
2460 view->resource,
2461 view->u.buf.offset,
2462 &img_handle->desc_dirty);
2463 }
2464
2465 /* Re-upload the descriptor if it has been updated while it
2466 * wasn't resident.
2467 */
2468 if (img_handle->desc_dirty)
2469 sctx->bindless_descriptors_dirty = true;
2470
2471 /* Add the image handle to the per-context list. */
2472 util_dynarray_append(&sctx->resident_img_handles,
2473 struct si_image_handle *, img_handle);
2474
2475 /* Add the buffers to the current CS in case si_begin_new_cs()
2476 * is not going to be called.
2477 */
2478 si_sampler_view_add_buffer(sctx, view->resource,
2479 (access & PIPE_IMAGE_ACCESS_WRITE) ?
2480 RADEON_USAGE_READWRITE :
2481 RADEON_USAGE_READ, false, false);
2482 } else {
2483 /* Remove the image handle from the per-context list. */
2484 util_dynarray_delete_unordered(&sctx->resident_img_handles,
2485 struct si_image_handle *,
2486 img_handle);
2487
2488 if (res->b.b.target != PIPE_BUFFER) {
2489 util_dynarray_delete_unordered(
2490 &sctx->resident_img_needs_color_decompress,
2491 struct si_image_handle *,
2492 img_handle);
2493 }
2494 }
2495 }
2496
2497
2498 void si_all_resident_buffers_begin_new_cs(struct si_context *sctx)
2499 {
2500 unsigned num_resident_tex_handles, num_resident_img_handles;
2501
2502 num_resident_tex_handles = sctx->resident_tex_handles.size /
2503 sizeof(struct si_texture_handle *);
2504 num_resident_img_handles = sctx->resident_img_handles.size /
2505 sizeof(struct si_image_handle *);
2506
2507 /* Add all resident texture handles. */
2508 util_dynarray_foreach(&sctx->resident_tex_handles,
2509 struct si_texture_handle *, tex_handle) {
2510 struct si_sampler_view *sview =
2511 (struct si_sampler_view *)(*tex_handle)->view;
2512
2513 si_sampler_view_add_buffer(sctx, sview->base.texture,
2514 RADEON_USAGE_READ,
2515 sview->is_stencil_sampler, false);
2516 }
2517
2518 /* Add all resident image handles. */
2519 util_dynarray_foreach(&sctx->resident_img_handles,
2520 struct si_image_handle *, img_handle) {
2521 struct pipe_image_view *view = &(*img_handle)->view;
2522
2523 si_sampler_view_add_buffer(sctx, view->resource,
2524 RADEON_USAGE_READWRITE,
2525 false, false);
2526 }
2527
2528 sctx->b.num_resident_handles += num_resident_tex_handles +
2529 num_resident_img_handles;
2530 }
2531
2532 /* INIT/DEINIT/UPLOAD */
2533
2534 void si_init_all_descriptors(struct si_context *sctx)
2535 {
2536 int i;
2537
2538 STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0);
2539 STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0);
2540
2541 for (i = 0; i < SI_NUM_SHADERS; i++) {
2542 bool gfx9_tcs = false;
2543 bool gfx9_gs = false;
2544 unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS;
2545 unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS;
2546 struct si_descriptors *desc;
2547
2548 if (sctx->b.chip_class >= GFX9) {
2549 gfx9_tcs = i == PIPE_SHADER_TESS_CTRL;
2550 gfx9_gs = i == PIPE_SHADER_GEOMETRY;
2551 }
2552
2553 desc = si_const_and_shader_buffer_descriptors(sctx, i);
2554 si_init_buffer_resources(&sctx->const_and_shader_buffers[i], desc,
2555 num_buffer_slots,
2556 gfx9_tcs ? GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS :
2557 gfx9_gs ? GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS :
2558 SI_SGPR_CONST_AND_SHADER_BUFFERS,
2559 RADEON_USAGE_READWRITE,
2560 RADEON_USAGE_READ,
2561 RADEON_PRIO_SHADER_RW_BUFFER,
2562 RADEON_PRIO_CONST_BUFFER);
2563 desc->slot_index_to_bind_directly = si_get_constbuf_slot(0);
2564
2565 desc = si_sampler_and_image_descriptors(sctx, i);
2566 si_init_descriptors(desc,
2567 gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES :
2568 gfx9_gs ? GFX9_SGPR_GS_SAMPLERS_AND_IMAGES :
2569 SI_SGPR_SAMPLERS_AND_IMAGES,
2570 16, num_sampler_slots);
2571
2572 int j;
2573 for (j = 0; j < SI_NUM_IMAGES; j++)
2574 memcpy(desc->list + j * 8, null_image_descriptor, 8 * 4);
2575 for (; j < SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2; j++)
2576 memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4);
2577 }
2578
2579 si_init_buffer_resources(&sctx->rw_buffers,
2580 &sctx->descriptors[SI_DESCS_RW_BUFFERS],
2581 SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
2582 /* The second set of usage/priority is used by
2583 * const buffers in RW buffer slots. */
2584 RADEON_USAGE_READWRITE, RADEON_USAGE_READ,
2585 RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER);
2586 sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots = SI_NUM_RW_BUFFERS;
2587
2588 si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
2589 4, SI_NUM_VERTEX_BUFFERS);
2590 FREE(sctx->vertex_buffers.list); /* not used */
2591 sctx->vertex_buffers.list = NULL;
2592
2593 /* Initialize an array of 1024 bindless descriptors, when the limit is
2594 * reached, just make it larger and re-upload the whole array.
2595 */
2596 si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,
2597 SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
2598 1024);
2599
2600 sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
2601
2602 /* Set pipe_context functions. */
2603 sctx->b.b.bind_sampler_states = si_bind_sampler_states;
2604 sctx->b.b.set_shader_images = si_set_shader_images;
2605 sctx->b.b.set_constant_buffer = si_pipe_set_constant_buffer;
2606 sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
2607 sctx->b.b.set_shader_buffers = si_set_shader_buffers;
2608 sctx->b.b.set_sampler_views = si_set_sampler_views;
2609 sctx->b.b.create_texture_handle = si_create_texture_handle;
2610 sctx->b.b.delete_texture_handle = si_delete_texture_handle;
2611 sctx->b.b.make_texture_handle_resident = si_make_texture_handle_resident;
2612 sctx->b.b.create_image_handle = si_create_image_handle;
2613 sctx->b.b.delete_image_handle = si_delete_image_handle;
2614 sctx->b.b.make_image_handle_resident = si_make_image_handle_resident;
2615 sctx->b.invalidate_buffer = si_invalidate_buffer;
2616 sctx->b.rebind_buffer = si_rebind_buffer;
2617
2618 /* Shader user data. */
2619 si_init_atom(sctx, &sctx->shader_pointers.atom, &sctx->atoms.s.shader_pointers,
2620 si_emit_graphics_shader_pointers);
2621
2622 /* Set default and immutable mappings. */
2623 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
2624
2625 if (sctx->b.chip_class >= GFX9) {
2626 si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
2627 R_00B430_SPI_SHADER_USER_DATA_LS_0);
2628 si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
2629 R_00B330_SPI_SHADER_USER_DATA_ES_0);
2630 } else {
2631 si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
2632 R_00B430_SPI_SHADER_USER_DATA_HS_0);
2633 si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
2634 R_00B230_SPI_SHADER_USER_DATA_GS_0);
2635 }
2636 si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
2637 }
2638
2639 static bool si_upload_shader_descriptors(struct si_context *sctx, unsigned mask)
2640 {
2641 unsigned dirty = sctx->descriptors_dirty & mask;
2642
2643 /* Assume nothing will go wrong: */
2644 sctx->shader_pointers_dirty |= dirty;
2645
2646 while (dirty) {
2647 unsigned i = u_bit_scan(&dirty);
2648
2649 if (!si_upload_descriptors(sctx, &sctx->descriptors[i]))
2650 return false;
2651 }
2652
2653 sctx->descriptors_dirty &= ~mask;
2654
2655 si_upload_bindless_descriptors(sctx);
2656
2657 return true;
2658 }
2659
2660 bool si_upload_graphics_shader_descriptors(struct si_context *sctx)
2661 {
2662 const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE);
2663 return si_upload_shader_descriptors(sctx, mask);
2664 }
2665
2666 bool si_upload_compute_shader_descriptors(struct si_context *sctx)
2667 {
2668 /* Does not update rw_buffers as that is not needed for compute shaders
2669 * and the input buffer is using the same SGPR's anyway.
2670 */
2671 const unsigned mask = u_bit_consecutive(SI_DESCS_FIRST_COMPUTE,
2672 SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE);
2673 return si_upload_shader_descriptors(sctx, mask);
2674 }
2675
2676 void si_release_all_descriptors(struct si_context *sctx)
2677 {
2678 int i;
2679
2680 for (i = 0; i < SI_NUM_SHADERS; i++) {
2681 si_release_buffer_resources(&sctx->const_and_shader_buffers[i],
2682 si_const_and_shader_buffer_descriptors(sctx, i));
2683 si_release_sampler_views(&sctx->samplers[i]);
2684 si_release_image_views(&sctx->images[i]);
2685 }
2686 si_release_buffer_resources(&sctx->rw_buffers,
2687 &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
2688 for (i = 0; i < SI_NUM_VERTEX_BUFFERS; i++)
2689 pipe_vertex_buffer_unreference(&sctx->vertex_buffer[i]);
2690
2691 for (i = 0; i < SI_NUM_DESCS; ++i)
2692 si_release_descriptors(&sctx->descriptors[i]);
2693
2694 sctx->vertex_buffers.list = NULL; /* points into a mapped buffer */
2695 si_release_descriptors(&sctx->vertex_buffers);
2696 si_release_bindless_descriptors(sctx);
2697 }
2698
2699 void si_all_descriptors_begin_new_cs(struct si_context *sctx)
2700 {
2701 int i;
2702
2703 for (i = 0; i < SI_NUM_SHADERS; i++) {
2704 si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[i]);
2705 si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i]);
2706 si_image_views_begin_new_cs(sctx, &sctx->images[i]);
2707 }
2708 si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers);
2709 si_vertex_buffers_begin_new_cs(sctx);
2710
2711 for (i = 0; i < SI_NUM_DESCS; ++i)
2712 si_descriptors_begin_new_cs(sctx, &sctx->descriptors[i]);
2713 si_descriptors_begin_new_cs(sctx, &sctx->bindless_descriptors);
2714
2715 si_shader_pointers_begin_new_cs(sctx);
2716 }
2717
2718 void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx,
2719 uint64_t new_active_mask)
2720 {
2721 struct si_descriptors *desc = &sctx->descriptors[desc_idx];
2722
2723 /* Ignore no-op updates and updates that disable all slots. */
2724 if (!new_active_mask ||
2725 new_active_mask == u_bit_consecutive64(desc->first_active_slot,
2726 desc->num_active_slots))
2727 return;
2728
2729 int first, count;
2730 u_bit_scan_consecutive_range64(&new_active_mask, &first, &count);
2731 assert(new_active_mask == 0);
2732
2733 /* Upload/dump descriptors if slots are being enabled. */
2734 if (first < desc->first_active_slot ||
2735 first + count > desc->first_active_slot + desc->num_active_slots)
2736 sctx->descriptors_dirty |= 1u << desc_idx;
2737
2738 desc->first_active_slot = first;
2739 desc->num_active_slots = count;
2740 }
2741
2742 void si_set_active_descriptors_for_shader(struct si_context *sctx,
2743 struct si_shader_selector *sel)
2744 {
2745 if (!sel)
2746 return;
2747
2748 si_set_active_descriptors(sctx,
2749 si_const_and_shader_buffer_descriptors_idx(sel->type),
2750 sel->active_const_and_shader_buffers);
2751 si_set_active_descriptors(sctx,
2752 si_sampler_and_image_descriptors_idx(sel->type),
2753 sel->active_samplers_and_images);
2754 }