radeonsi: drop support for NULL sampler views
[mesa.git] / src / gallium / drivers / radeonsi / si_descriptors.c
1 /*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Marek Olšák <marek.olsak@amd.com>
25 */
26
27 /* Resource binding slots and sampler states (each described with 8 or
28 * 4 dwords) are stored in lists in memory which is accessed by shaders
29 * using scalar load instructions.
30 *
31 * This file is responsible for managing such lists. It keeps a copy of all
32 * descriptors in CPU memory and re-uploads a whole list if some slots have
33 * been changed.
34 *
35 * This code is also reponsible for updating shader pointers to those lists.
36 *
37 * Note that CP DMA can't be used for updating the lists, because a GPU hang
38 * could leave the list in a mid-IB state and the next IB would get wrong
39 * descriptors and the whole context would be unusable at that point.
40 * (Note: The register shadowing can't be used due to the same reason)
41 *
42 * Also, uploading descriptors to newly allocated memory doesn't require
43 * a KCACHE flush.
44 *
45 *
46 * Possible scenarios for one 16 dword image+sampler slot:
47 *
48 * | Image | w/ FMASK | Buffer | NULL
49 * [ 0: 3] Image[0:3] | Image[0:3] | Null[0:3] | Null[0:3]
50 * [ 4: 7] Image[4:7] | Image[4:7] | Buffer[0:3] | 0
51 * [ 8:11] Null[0:3] | Fmask[0:3] | Null[0:3] | Null[0:3]
52 * [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3]
53 *
54 * FMASK implies MSAA, therefore no sampler state.
55 * Sampler states are never unbound except when FMASK is bound.
56 */
57
58 #include "radeon/r600_cs.h"
59 #include "si_pipe.h"
60 #include "si_shader.h"
61 #include "sid.h"
62
63 #include "util/u_format.h"
64 #include "util/u_math.h"
65 #include "util/u_memory.h"
66 #include "util/u_suballoc.h"
67 #include "util/u_upload_mgr.h"
68
69
70 /* NULL image and buffer descriptor for textures (alpha = 1) and images
71 * (alpha = 0).
72 *
73 * For images, all fields must be zero except for the swizzle, which
74 * supports arbitrary combinations of 0s and 1s. The texture type must be
75 * any valid type (e.g. 1D). If the texture type isn't set, the hw hangs.
76 *
77 * For buffers, all fields must be zero. If they are not, the hw hangs.
78 *
79 * This is the only reason why the buffer descriptor must be in words [4:7].
80 */
81 static uint32_t null_texture_descriptor[8] = {
82 0,
83 0,
84 0,
85 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_1) |
86 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
87 /* the rest must contain zeros, which is also used by the buffer
88 * descriptor */
89 };
90
91 static uint32_t null_image_descriptor[8] = {
92 0,
93 0,
94 0,
95 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
96 /* the rest must contain zeros, which is also used by the buffer
97 * descriptor */
98 };
99
100 static void si_init_descriptors(struct si_descriptors *desc,
101 unsigned shader_userdata_index,
102 unsigned element_dw_size,
103 unsigned num_elements,
104 const uint32_t *null_descriptor,
105 unsigned *ce_offset)
106 {
107 int i;
108
109 assert(num_elements <= sizeof(desc->dirty_mask)*8);
110
111 desc->list = CALLOC(num_elements, element_dw_size * 4);
112 desc->element_dw_size = element_dw_size;
113 desc->num_elements = num_elements;
114 desc->dirty_mask = num_elements == 32 ? ~0u : (1u << num_elements) - 1;
115 desc->shader_userdata_offset = shader_userdata_index * 4;
116
117 if (ce_offset) {
118 desc->ce_offset = *ce_offset;
119
120 /* make sure that ce_offset stays 32 byte aligned */
121 *ce_offset += align(element_dw_size * num_elements * 4, 32);
122 }
123
124 /* Initialize the array to NULL descriptors if the element size is 8. */
125 if (null_descriptor) {
126 assert(element_dw_size % 8 == 0);
127 for (i = 0; i < num_elements * element_dw_size / 8; i++)
128 memcpy(desc->list + i * 8, null_descriptor,
129 8 * 4);
130 }
131 }
132
133 static void si_release_descriptors(struct si_descriptors *desc)
134 {
135 r600_resource_reference(&desc->buffer, NULL);
136 FREE(desc->list);
137 }
138
139 static bool si_ce_upload(struct si_context *sctx, unsigned ce_offset, unsigned size,
140 unsigned *out_offset, struct r600_resource **out_buf) {
141 uint64_t va;
142
143 u_suballocator_alloc(sctx->ce_suballocator, size, 64, out_offset,
144 (struct pipe_resource**)out_buf);
145 if (!out_buf)
146 return false;
147
148 va = (*out_buf)->gpu_address + *out_offset;
149
150 radeon_emit(sctx->ce_ib, PKT3(PKT3_DUMP_CONST_RAM, 3, 0));
151 radeon_emit(sctx->ce_ib, ce_offset);
152 radeon_emit(sctx->ce_ib, size / 4);
153 radeon_emit(sctx->ce_ib, va);
154 radeon_emit(sctx->ce_ib, va >> 32);
155
156 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, *out_buf,
157 RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
158
159 sctx->ce_need_synchronization = true;
160 return true;
161 }
162
163 static void si_ce_reinitialize_descriptors(struct si_context *sctx,
164 struct si_descriptors *desc)
165 {
166 if (desc->buffer) {
167 struct r600_resource *buffer = (struct r600_resource*)desc->buffer;
168 unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
169 uint64_t va = buffer->gpu_address + desc->buffer_offset;
170 struct radeon_winsys_cs *ib = sctx->ce_preamble_ib;
171
172 if (!ib)
173 ib = sctx->ce_ib;
174
175 list_size = align(list_size, 32);
176
177 radeon_emit(ib, PKT3(PKT3_LOAD_CONST_RAM, 3, 0));
178 radeon_emit(ib, va);
179 radeon_emit(ib, va >> 32);
180 radeon_emit(ib, list_size / 4);
181 radeon_emit(ib, desc->ce_offset);
182
183 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
184 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
185 }
186 desc->ce_ram_dirty = false;
187 }
188
189 void si_ce_reinitialize_all_descriptors(struct si_context *sctx)
190 {
191 int i;
192
193 for (i = 0; i < SI_NUM_DESCS; ++i)
194 si_ce_reinitialize_descriptors(sctx, &sctx->descriptors[i]);
195 }
196
197 void si_ce_enable_loads(struct radeon_winsys_cs *ib)
198 {
199 radeon_emit(ib, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
200 radeon_emit(ib, CONTEXT_CONTROL_LOAD_ENABLE(1) |
201 CONTEXT_CONTROL_LOAD_CE_RAM(1));
202 radeon_emit(ib, CONTEXT_CONTROL_SHADOW_ENABLE(1));
203 }
204
205 static bool si_upload_descriptors(struct si_context *sctx,
206 struct si_descriptors *desc,
207 struct r600_atom * atom)
208 {
209 unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
210
211 if (!desc->dirty_mask)
212 return true;
213
214 if (sctx->ce_ib) {
215 uint32_t const* list = (uint32_t const*)desc->list;
216
217 if (desc->ce_ram_dirty)
218 si_ce_reinitialize_descriptors(sctx, desc);
219
220 while(desc->dirty_mask) {
221 int begin, count;
222 u_bit_scan_consecutive_range(&desc->dirty_mask, &begin,
223 &count);
224
225 begin *= desc->element_dw_size;
226 count *= desc->element_dw_size;
227
228 radeon_emit(sctx->ce_ib,
229 PKT3(PKT3_WRITE_CONST_RAM, count, 0));
230 radeon_emit(sctx->ce_ib, desc->ce_offset + begin * 4);
231 radeon_emit_array(sctx->ce_ib, list + begin, count);
232 }
233
234 if (!si_ce_upload(sctx, desc->ce_offset, list_size,
235 &desc->buffer_offset, &desc->buffer))
236 return false;
237 } else {
238 void *ptr;
239
240 u_upload_alloc(sctx->b.uploader, 0, list_size, 256,
241 &desc->buffer_offset,
242 (struct pipe_resource**)&desc->buffer, &ptr);
243 if (!desc->buffer)
244 return false; /* skip the draw call */
245
246 util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
247
248 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
249 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
250 }
251 desc->pointer_dirty = true;
252 desc->dirty_mask = 0;
253
254 if (atom)
255 si_mark_atom_dirty(sctx, atom);
256
257 return true;
258 }
259
260 static void
261 si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc)
262 {
263 desc->ce_ram_dirty = true;
264
265 if (!desc->buffer)
266 return;
267
268 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
269 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
270 }
271
272 /* SAMPLER VIEWS */
273
274 static unsigned
275 si_sampler_descriptors_idx(unsigned shader)
276 {
277 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
278 SI_SHADER_DESCS_SAMPLERS;
279 }
280
281 static struct si_descriptors *
282 si_sampler_descriptors(struct si_context *sctx, unsigned shader)
283 {
284 return &sctx->descriptors[si_sampler_descriptors_idx(shader)];
285 }
286
287 static void si_release_sampler_views(struct si_sampler_views *views)
288 {
289 int i;
290
291 for (i = 0; i < ARRAY_SIZE(views->views); i++) {
292 pipe_sampler_view_reference(&views->views[i], NULL);
293 }
294 }
295
296 static void si_sampler_view_add_buffer(struct si_context *sctx,
297 struct pipe_resource *resource,
298 enum radeon_bo_usage usage,
299 bool is_stencil_sampler,
300 bool check_mem)
301 {
302 struct r600_resource *rres;
303 struct r600_texture *rtex;
304 enum radeon_bo_priority priority;
305
306 if (!resource)
307 return;
308
309 if (resource->target != PIPE_BUFFER) {
310 struct r600_texture *tex = (struct r600_texture*)resource;
311
312 if (tex->is_depth && !r600_can_sample_zs(tex, is_stencil_sampler))
313 resource = &tex->flushed_depth_texture->resource.b.b;
314 }
315
316 rres = (struct r600_resource*)resource;
317 priority = r600_get_sampler_view_priority(rres);
318
319 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
320 rres, usage, priority,
321 check_mem);
322
323 if (resource->target == PIPE_BUFFER)
324 return;
325
326 /* Now add separate DCC if it's present. */
327 rtex = (struct r600_texture*)resource;
328 if (!rtex->dcc_separate_buffer)
329 return;
330
331 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
332 rtex->dcc_separate_buffer, usage,
333 RADEON_PRIO_DCC, check_mem);
334 }
335
336 static void si_sampler_views_begin_new_cs(struct si_context *sctx,
337 struct si_sampler_views *views)
338 {
339 unsigned mask = views->enabled_mask;
340
341 /* Add buffers to the CS. */
342 while (mask) {
343 int i = u_bit_scan(&mask);
344 struct si_sampler_view *sview = (struct si_sampler_view *)views->views[i];
345
346 si_sampler_view_add_buffer(sctx, sview->base.texture,
347 RADEON_USAGE_READ,
348 sview->is_stencil_sampler, false);
349 }
350 }
351
352 /* Set texture descriptor fields that can be changed by reallocations.
353 *
354 * \param tex texture
355 * \param base_level_info information of the level of BASE_ADDRESS
356 * \param base_level the level of BASE_ADDRESS
357 * \param first_level pipe_sampler_view.u.tex.first_level
358 * \param block_width util_format_get_blockwidth()
359 * \param is_stencil select between separate Z & Stencil
360 * \param state descriptor to update
361 */
362 void si_set_mutable_tex_desc_fields(struct r600_texture *tex,
363 const struct radeon_surf_level *base_level_info,
364 unsigned base_level, unsigned first_level,
365 unsigned block_width, bool is_stencil,
366 uint32_t *state)
367 {
368 uint64_t va;
369 unsigned pitch = base_level_info->nblk_x * block_width;
370
371 if (tex->is_depth && !r600_can_sample_zs(tex, is_stencil)) {
372 tex = tex->flushed_depth_texture;
373 is_stencil = false;
374 }
375
376 va = tex->resource.gpu_address + base_level_info->offset;
377
378 state[1] &= C_008F14_BASE_ADDRESS_HI;
379 state[3] &= C_008F1C_TILING_INDEX;
380 state[4] &= C_008F20_PITCH;
381 state[6] &= C_008F28_COMPRESSION_EN;
382
383 state[0] = va >> 8;
384 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
385 state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(tex, base_level,
386 is_stencil));
387 state[4] |= S_008F20_PITCH(pitch - 1);
388
389 if (tex->dcc_offset && tex->surface.level[first_level].dcc_enabled) {
390 state[6] |= S_008F28_COMPRESSION_EN(1);
391 state[7] = ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
392 tex->dcc_offset +
393 base_level_info->dcc_offset) >> 8;
394 }
395 }
396
397 static void si_set_sampler_view(struct si_context *sctx,
398 unsigned shader,
399 unsigned slot, struct pipe_sampler_view *view,
400 bool disallow_early_out)
401 {
402 struct si_sampler_views *views = &sctx->samplers[shader].views;
403 struct si_sampler_view *rview = (struct si_sampler_view*)view;
404 struct si_descriptors *descs = si_sampler_descriptors(sctx, shader);
405
406 if (views->views[slot] == view && !disallow_early_out)
407 return;
408
409 if (view) {
410 struct r600_texture *rtex = (struct r600_texture *)view->texture;
411 uint32_t *desc = descs->list + slot * 16;
412
413 assert(rtex); /* views with texture == NULL aren't supported */
414 pipe_sampler_view_reference(&views->views[slot], view);
415 memcpy(desc, rview->state, 8*4);
416
417 if (rtex->resource.b.b.target != PIPE_BUFFER) {
418 bool is_separate_stencil =
419 rtex->db_compatible &&
420 rview->is_stencil_sampler;
421
422 si_set_mutable_tex_desc_fields(rtex,
423 rview->base_level_info,
424 rview->base_level,
425 rview->base.u.tex.first_level,
426 rview->block_width,
427 is_separate_stencil,
428 desc);
429 }
430
431 if (rtex->resource.b.b.target != PIPE_BUFFER &&
432 rtex->fmask.size) {
433 memcpy(desc + 8,
434 rview->fmask_state, 8*4);
435 } else {
436 /* Disable FMASK and bind sampler state in [12:15]. */
437 memcpy(desc + 8,
438 null_texture_descriptor, 4*4);
439
440 if (views->sampler_states[slot])
441 memcpy(desc + 12,
442 views->sampler_states[slot], 4*4);
443 }
444
445 views->enabled_mask |= 1u << slot;
446
447 /* Since this can flush, it must be done after enabled_mask is
448 * updated. */
449 si_sampler_view_add_buffer(sctx, view->texture,
450 RADEON_USAGE_READ,
451 rview->is_stencil_sampler, true);
452 } else {
453 pipe_sampler_view_reference(&views->views[slot], NULL);
454 memcpy(descs->list + slot*16, null_texture_descriptor, 8*4);
455 /* Only clear the lower dwords of FMASK. */
456 memcpy(descs->list + slot*16 + 8, null_texture_descriptor, 4*4);
457 views->enabled_mask &= ~(1u << slot);
458 }
459
460 descs->dirty_mask |= 1u << slot;
461 sctx->descriptors_dirty |= 1u << si_sampler_descriptors_idx(shader);
462 }
463
464 static bool is_compressed_colortex(struct r600_texture *rtex)
465 {
466 return rtex->cmask.size || rtex->fmask.size ||
467 (rtex->dcc_offset && rtex->dirty_level_mask);
468 }
469
470 static void si_set_sampler_views(struct pipe_context *ctx,
471 enum pipe_shader_type shader, unsigned start,
472 unsigned count,
473 struct pipe_sampler_view **views)
474 {
475 struct si_context *sctx = (struct si_context *)ctx;
476 struct si_textures_info *samplers = &sctx->samplers[shader];
477 int i;
478
479 if (!count || shader >= SI_NUM_SHADERS)
480 return;
481
482 for (i = 0; i < count; i++) {
483 unsigned slot = start + i;
484
485 if (!views || !views[i]) {
486 samplers->depth_texture_mask &= ~(1u << slot);
487 samplers->compressed_colortex_mask &= ~(1u << slot);
488 si_set_sampler_view(sctx, shader, slot, NULL, false);
489 continue;
490 }
491
492 si_set_sampler_view(sctx, shader, slot, views[i], false);
493
494 if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) {
495 struct r600_texture *rtex =
496 (struct r600_texture*)views[i]->texture;
497
498 if (rtex->db_compatible) {
499 samplers->depth_texture_mask |= 1u << slot;
500 } else {
501 samplers->depth_texture_mask &= ~(1u << slot);
502 }
503 if (is_compressed_colortex(rtex)) {
504 samplers->compressed_colortex_mask |= 1u << slot;
505 } else {
506 samplers->compressed_colortex_mask &= ~(1u << slot);
507 }
508
509 if (rtex->dcc_offset &&
510 p_atomic_read(&rtex->framebuffers_bound))
511 sctx->need_check_render_feedback = true;
512 } else {
513 samplers->depth_texture_mask &= ~(1u << slot);
514 samplers->compressed_colortex_mask &= ~(1u << slot);
515 }
516 }
517 }
518
519 static void
520 si_samplers_update_compressed_colortex_mask(struct si_textures_info *samplers)
521 {
522 unsigned mask = samplers->views.enabled_mask;
523
524 while (mask) {
525 int i = u_bit_scan(&mask);
526 struct pipe_resource *res = samplers->views.views[i]->texture;
527
528 if (res && res->target != PIPE_BUFFER) {
529 struct r600_texture *rtex = (struct r600_texture *)res;
530
531 if (is_compressed_colortex(rtex)) {
532 samplers->compressed_colortex_mask |= 1u << i;
533 } else {
534 samplers->compressed_colortex_mask &= ~(1u << i);
535 }
536 }
537 }
538 }
539
540 /* IMAGE VIEWS */
541
542 static unsigned
543 si_image_descriptors_idx(unsigned shader)
544 {
545 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
546 SI_SHADER_DESCS_IMAGES;
547 }
548
549 static struct si_descriptors*
550 si_image_descriptors(struct si_context *sctx, unsigned shader)
551 {
552 return &sctx->descriptors[si_image_descriptors_idx(shader)];
553 }
554
555 static void
556 si_release_image_views(struct si_images_info *images)
557 {
558 unsigned i;
559
560 for (i = 0; i < SI_NUM_IMAGES; ++i) {
561 struct pipe_image_view *view = &images->views[i];
562
563 pipe_resource_reference(&view->resource, NULL);
564 }
565 }
566
567 static void
568 si_image_views_begin_new_cs(struct si_context *sctx, struct si_images_info *images)
569 {
570 uint mask = images->enabled_mask;
571
572 /* Add buffers to the CS. */
573 while (mask) {
574 int i = u_bit_scan(&mask);
575 struct pipe_image_view *view = &images->views[i];
576
577 assert(view->resource);
578
579 si_sampler_view_add_buffer(sctx, view->resource,
580 RADEON_USAGE_READWRITE, false, false);
581 }
582 }
583
584 static void
585 si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot)
586 {
587 struct si_images_info *images = &ctx->images[shader];
588
589 if (images->enabled_mask & (1u << slot)) {
590 struct si_descriptors *descs = si_image_descriptors(ctx, shader);
591
592 pipe_resource_reference(&images->views[slot].resource, NULL);
593 images->compressed_colortex_mask &= ~(1 << slot);
594
595 memcpy(descs->list + slot*8, null_image_descriptor, 8*4);
596 images->enabled_mask &= ~(1u << slot);
597 descs->dirty_mask |= 1u << slot;
598 ctx->descriptors_dirty |= 1u << si_image_descriptors_idx(shader);
599 }
600 }
601
602 static void
603 si_mark_image_range_valid(const struct pipe_image_view *view)
604 {
605 struct r600_resource *res = (struct r600_resource *)view->resource;
606
607 assert(res && res->b.b.target == PIPE_BUFFER);
608
609 util_range_add(&res->valid_buffer_range,
610 view->u.buf.offset,
611 view->u.buf.offset + view->u.buf.size);
612 }
613
614 static void si_set_shader_image(struct si_context *ctx,
615 unsigned shader,
616 unsigned slot, const struct pipe_image_view *view)
617 {
618 struct si_screen *screen = ctx->screen;
619 struct si_images_info *images = &ctx->images[shader];
620 struct si_descriptors *descs = si_image_descriptors(ctx, shader);
621 struct r600_resource *res;
622
623 if (!view || !view->resource) {
624 si_disable_shader_image(ctx, shader, slot);
625 return;
626 }
627
628 res = (struct r600_resource *)view->resource;
629
630 if (&images->views[slot] != view)
631 util_copy_image_view(&images->views[slot], view);
632
633 if (res->b.b.target == PIPE_BUFFER) {
634 if (view->access & PIPE_IMAGE_ACCESS_WRITE)
635 si_mark_image_range_valid(view);
636
637 si_make_buffer_descriptor(screen, res,
638 view->format,
639 view->u.buf.offset,
640 view->u.buf.size,
641 descs->list + slot * 8);
642 images->compressed_colortex_mask &= ~(1 << slot);
643 } else {
644 static const unsigned char swizzle[4] = { 0, 1, 2, 3 };
645 struct r600_texture *tex = (struct r600_texture *)res;
646 unsigned level = view->u.tex.level;
647 unsigned width, height, depth;
648 uint32_t *desc = descs->list + slot * 8;
649 bool uses_dcc = tex->dcc_offset &&
650 tex->surface.level[level].dcc_enabled;
651
652 assert(!tex->is_depth);
653 assert(tex->fmask.size == 0);
654
655 if (uses_dcc &&
656 (view->access & PIPE_IMAGE_ACCESS_WRITE ||
657 !vi_dcc_formats_compatible(res->b.b.format, view->format))) {
658 /* If DCC can't be disabled, at least decompress it.
659 * The decompression is relatively cheap if the surface
660 * has been decompressed already.
661 */
662 if (r600_texture_disable_dcc(&ctx->b, tex))
663 uses_dcc = false;
664 else
665 ctx->b.decompress_dcc(&ctx->b.b, tex);
666 }
667
668 if (is_compressed_colortex(tex)) {
669 images->compressed_colortex_mask |= 1 << slot;
670 } else {
671 images->compressed_colortex_mask &= ~(1 << slot);
672 }
673
674 if (uses_dcc &&
675 p_atomic_read(&tex->framebuffers_bound))
676 ctx->need_check_render_feedback = true;
677
678 /* Always force the base level to the selected level.
679 *
680 * This is required for 3D textures, where otherwise
681 * selecting a single slice for non-layered bindings
682 * fails. It doesn't hurt the other targets.
683 */
684 width = u_minify(res->b.b.width0, level);
685 height = u_minify(res->b.b.height0, level);
686 depth = u_minify(res->b.b.depth0, level);
687
688 si_make_texture_descriptor(screen, tex,
689 false, res->b.b.target,
690 view->format, swizzle,
691 0, 0,
692 view->u.tex.first_layer,
693 view->u.tex.last_layer,
694 width, height, depth,
695 desc, NULL);
696 si_set_mutable_tex_desc_fields(tex, &tex->surface.level[level],
697 level, level,
698 util_format_get_blockwidth(view->format),
699 false, desc);
700 }
701
702 images->enabled_mask |= 1u << slot;
703 descs->dirty_mask |= 1u << slot;
704 ctx->descriptors_dirty |= 1u << si_image_descriptors_idx(shader);
705
706 /* Since this can flush, it must be done after enabled_mask is updated. */
707 si_sampler_view_add_buffer(ctx, &res->b.b,
708 RADEON_USAGE_READWRITE, false, true);
709 }
710
711 static void
712 si_set_shader_images(struct pipe_context *pipe,
713 enum pipe_shader_type shader,
714 unsigned start_slot, unsigned count,
715 const struct pipe_image_view *views)
716 {
717 struct si_context *ctx = (struct si_context *)pipe;
718 unsigned i, slot;
719
720 assert(shader < SI_NUM_SHADERS);
721
722 if (!count)
723 return;
724
725 assert(start_slot + count <= SI_NUM_IMAGES);
726
727 if (views) {
728 for (i = 0, slot = start_slot; i < count; ++i, ++slot)
729 si_set_shader_image(ctx, shader, slot, &views[i]);
730 } else {
731 for (i = 0, slot = start_slot; i < count; ++i, ++slot)
732 si_set_shader_image(ctx, shader, slot, NULL);
733 }
734 }
735
736 static void
737 si_images_update_compressed_colortex_mask(struct si_images_info *images)
738 {
739 unsigned mask = images->enabled_mask;
740
741 while (mask) {
742 int i = u_bit_scan(&mask);
743 struct pipe_resource *res = images->views[i].resource;
744
745 if (res && res->target != PIPE_BUFFER) {
746 struct r600_texture *rtex = (struct r600_texture *)res;
747
748 if (is_compressed_colortex(rtex)) {
749 images->compressed_colortex_mask |= 1 << i;
750 } else {
751 images->compressed_colortex_mask &= ~(1 << i);
752 }
753 }
754 }
755 }
756
757 /* SAMPLER STATES */
758
759 static void si_bind_sampler_states(struct pipe_context *ctx,
760 enum pipe_shader_type shader,
761 unsigned start, unsigned count, void **states)
762 {
763 struct si_context *sctx = (struct si_context *)ctx;
764 struct si_textures_info *samplers = &sctx->samplers[shader];
765 struct si_descriptors *desc = si_sampler_descriptors(sctx, shader);
766 struct si_sampler_state **sstates = (struct si_sampler_state**)states;
767 int i;
768
769 if (!count || shader >= SI_NUM_SHADERS)
770 return;
771
772 for (i = 0; i < count; i++) {
773 unsigned slot = start + i;
774
775 if (!sstates[i] ||
776 sstates[i] == samplers->views.sampler_states[slot])
777 continue;
778
779 samplers->views.sampler_states[slot] = sstates[i];
780
781 /* If FMASK is bound, don't overwrite it.
782 * The sampler state will be set after FMASK is unbound.
783 */
784 if (samplers->views.views[i] &&
785 samplers->views.views[i]->texture &&
786 samplers->views.views[i]->texture->target != PIPE_BUFFER &&
787 ((struct r600_texture*)samplers->views.views[i]->texture)->fmask.size)
788 continue;
789
790 memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4);
791 desc->dirty_mask |= 1u << slot;
792 sctx->descriptors_dirty |= 1u << si_sampler_descriptors_idx(shader);
793 }
794 }
795
796 /* BUFFER RESOURCES */
797
798 static void si_init_buffer_resources(struct si_buffer_resources *buffers,
799 struct si_descriptors *descs,
800 unsigned num_buffers,
801 unsigned shader_userdata_index,
802 enum radeon_bo_usage shader_usage,
803 enum radeon_bo_priority priority,
804 unsigned *ce_offset)
805 {
806 buffers->shader_usage = shader_usage;
807 buffers->priority = priority;
808 buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
809
810 si_init_descriptors(descs, shader_userdata_index, 4,
811 num_buffers, NULL, ce_offset);
812 }
813
814 static void si_release_buffer_resources(struct si_buffer_resources *buffers,
815 struct si_descriptors *descs)
816 {
817 int i;
818
819 for (i = 0; i < descs->num_elements; i++) {
820 pipe_resource_reference(&buffers->buffers[i], NULL);
821 }
822
823 FREE(buffers->buffers);
824 }
825
826 static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
827 struct si_buffer_resources *buffers)
828 {
829 unsigned mask = buffers->enabled_mask;
830
831 /* Add buffers to the CS. */
832 while (mask) {
833 int i = u_bit_scan(&mask);
834
835 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
836 (struct r600_resource*)buffers->buffers[i],
837 buffers->shader_usage, buffers->priority);
838 }
839 }
840
841 static void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers,
842 struct si_descriptors *descs,
843 unsigned idx, struct pipe_resource **buf,
844 unsigned *offset, unsigned *size)
845 {
846 pipe_resource_reference(buf, buffers->buffers[idx]);
847 if (*buf) {
848 struct r600_resource *res = r600_resource(*buf);
849 const uint32_t *desc = descs->list + idx * 4;
850 uint64_t va;
851
852 *size = desc[2];
853
854 assert(G_008F04_STRIDE(desc[1]) == 0);
855 va = ((uint64_t)desc[1] << 32) | desc[0];
856
857 assert(va >= res->gpu_address && va + *size <= res->gpu_address + res->bo_size);
858 *offset = va - res->gpu_address;
859 }
860 }
861
862 /* VERTEX BUFFERS */
863
864 static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
865 {
866 struct si_descriptors *desc = &sctx->vertex_buffers;
867 int count = sctx->vertex_elements ? sctx->vertex_elements->count : 0;
868 int i;
869
870 for (i = 0; i < count; i++) {
871 int vb = sctx->vertex_elements->elements[i].vertex_buffer_index;
872
873 if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
874 continue;
875 if (!sctx->vertex_buffer[vb].buffer)
876 continue;
877
878 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
879 (struct r600_resource*)sctx->vertex_buffer[vb].buffer,
880 RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
881 }
882
883 if (!desc->buffer)
884 return;
885 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
886 desc->buffer, RADEON_USAGE_READ,
887 RADEON_PRIO_DESCRIPTORS);
888 }
889
890 bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
891 {
892 struct si_descriptors *desc = &sctx->vertex_buffers;
893 bool bound[SI_NUM_VERTEX_BUFFERS] = {};
894 unsigned i, count = sctx->vertex_elements->count;
895 uint64_t va;
896 uint32_t *ptr;
897
898 if (!sctx->vertex_buffers_dirty)
899 return true;
900 if (!count || !sctx->vertex_elements)
901 return true;
902
903 /* Vertex buffer descriptors are the only ones which are uploaded
904 * directly through a staging buffer and don't go through
905 * the fine-grained upload path.
906 */
907 u_upload_alloc(sctx->b.uploader, 0, count * 16, 256, &desc->buffer_offset,
908 (struct pipe_resource**)&desc->buffer, (void**)&ptr);
909 if (!desc->buffer)
910 return false;
911
912 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
913 desc->buffer, RADEON_USAGE_READ,
914 RADEON_PRIO_DESCRIPTORS);
915
916 assert(count <= SI_NUM_VERTEX_BUFFERS);
917
918 for (i = 0; i < count; i++) {
919 struct pipe_vertex_element *ve = &sctx->vertex_elements->elements[i];
920 struct pipe_vertex_buffer *vb;
921 struct r600_resource *rbuffer;
922 unsigned offset;
923 uint32_t *desc = &ptr[i*4];
924
925 if (ve->vertex_buffer_index >= ARRAY_SIZE(sctx->vertex_buffer)) {
926 memset(desc, 0, 16);
927 continue;
928 }
929
930 vb = &sctx->vertex_buffer[ve->vertex_buffer_index];
931 rbuffer = (struct r600_resource*)vb->buffer;
932 if (!rbuffer) {
933 memset(desc, 0, 16);
934 continue;
935 }
936
937 offset = vb->buffer_offset + ve->src_offset;
938 va = rbuffer->gpu_address + offset;
939
940 /* Fill in T# buffer resource description */
941 desc[0] = va;
942 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
943 S_008F04_STRIDE(vb->stride);
944
945 if (sctx->b.chip_class <= CIK && vb->stride)
946 /* Round up by rounding down and adding 1 */
947 desc[2] = (vb->buffer->width0 - offset -
948 sctx->vertex_elements->format_size[i]) /
949 vb->stride + 1;
950 else
951 desc[2] = vb->buffer->width0 - offset;
952
953 desc[3] = sctx->vertex_elements->rsrc_word3[i];
954
955 if (!bound[ve->vertex_buffer_index]) {
956 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
957 (struct r600_resource*)vb->buffer,
958 RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
959 bound[ve->vertex_buffer_index] = true;
960 }
961 }
962
963 /* Don't flush the const cache. It would have a very negative effect
964 * on performance (confirmed by testing). New descriptors are always
965 * uploaded to a fresh new buffer, so I don't think flushing the const
966 * cache is needed. */
967 desc->pointer_dirty = true;
968 si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
969 sctx->vertex_buffers_dirty = false;
970 return true;
971 }
972
973
974 /* CONSTANT BUFFERS */
975
976 static unsigned
977 si_const_buffer_descriptors_idx(unsigned shader)
978 {
979 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
980 SI_SHADER_DESCS_CONST_BUFFERS;
981 }
982
983 static struct si_descriptors *
984 si_const_buffer_descriptors(struct si_context *sctx, unsigned shader)
985 {
986 return &sctx->descriptors[si_const_buffer_descriptors_idx(shader)];
987 }
988
989 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
990 const uint8_t *ptr, unsigned size, uint32_t *const_offset)
991 {
992 void *tmp;
993
994 u_upload_alloc(sctx->b.uploader, 0, size, 256, const_offset,
995 (struct pipe_resource**)rbuffer, &tmp);
996 if (*rbuffer)
997 util_memcpy_cpu_to_le32(tmp, ptr, size);
998 }
999
1000 static void si_set_constant_buffer(struct si_context *sctx,
1001 struct si_buffer_resources *buffers,
1002 unsigned descriptors_idx,
1003 uint slot, const struct pipe_constant_buffer *input)
1004 {
1005 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1006 assert(slot < descs->num_elements);
1007 pipe_resource_reference(&buffers->buffers[slot], NULL);
1008
1009 /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
1010 * with a NULL buffer). We need to use a dummy buffer instead. */
1011 if (sctx->b.chip_class == CIK &&
1012 (!input || (!input->buffer && !input->user_buffer)))
1013 input = &sctx->null_const_buf;
1014
1015 if (input && (input->buffer || input->user_buffer)) {
1016 struct pipe_resource *buffer = NULL;
1017 uint64_t va;
1018
1019 /* Upload the user buffer if needed. */
1020 if (input->user_buffer) {
1021 unsigned buffer_offset;
1022
1023 si_upload_const_buffer(sctx,
1024 (struct r600_resource**)&buffer, input->user_buffer,
1025 input->buffer_size, &buffer_offset);
1026 if (!buffer) {
1027 /* Just unbind on failure. */
1028 si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, NULL);
1029 return;
1030 }
1031 va = r600_resource(buffer)->gpu_address + buffer_offset;
1032 } else {
1033 pipe_resource_reference(&buffer, input->buffer);
1034 va = r600_resource(buffer)->gpu_address + input->buffer_offset;
1035 }
1036
1037 /* Set the descriptor. */
1038 uint32_t *desc = descs->list + slot*4;
1039 desc[0] = va;
1040 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1041 S_008F04_STRIDE(0);
1042 desc[2] = input->buffer_size;
1043 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1044 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1045 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1046 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1047 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1048 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1049
1050 buffers->buffers[slot] = buffer;
1051 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1052 (struct r600_resource*)buffer,
1053 buffers->shader_usage,
1054 buffers->priority, true);
1055 buffers->enabled_mask |= 1u << slot;
1056 } else {
1057 /* Clear the descriptor. */
1058 memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
1059 buffers->enabled_mask &= ~(1u << slot);
1060 }
1061
1062 descs->dirty_mask |= 1u << slot;
1063 sctx->descriptors_dirty |= 1u << descriptors_idx;
1064 }
1065
1066 void si_set_rw_buffer(struct si_context *sctx,
1067 uint slot, const struct pipe_constant_buffer *input)
1068 {
1069 si_set_constant_buffer(sctx, &sctx->rw_buffers,
1070 SI_DESCS_RW_BUFFERS, slot, input);
1071 }
1072
1073 static void si_pipe_set_constant_buffer(struct pipe_context *ctx,
1074 uint shader, uint slot,
1075 const struct pipe_constant_buffer *input)
1076 {
1077 struct si_context *sctx = (struct si_context *)ctx;
1078
1079 if (shader >= SI_NUM_SHADERS)
1080 return;
1081
1082 si_set_constant_buffer(sctx, &sctx->const_buffers[shader],
1083 si_const_buffer_descriptors_idx(shader),
1084 slot, input);
1085 }
1086
1087 void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader,
1088 uint slot, struct pipe_constant_buffer *cbuf)
1089 {
1090 cbuf->user_buffer = NULL;
1091 si_get_buffer_from_descriptors(
1092 &sctx->const_buffers[shader],
1093 si_const_buffer_descriptors(sctx, shader),
1094 slot, &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size);
1095 }
1096
1097 /* SHADER BUFFERS */
1098
1099 static unsigned
1100 si_shader_buffer_descriptors_idx(enum pipe_shader_type shader)
1101 {
1102 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
1103 SI_SHADER_DESCS_SHADER_BUFFERS;
1104 }
1105
1106 static struct si_descriptors *
1107 si_shader_buffer_descriptors(struct si_context *sctx,
1108 enum pipe_shader_type shader)
1109 {
1110 return &sctx->descriptors[si_shader_buffer_descriptors_idx(shader)];
1111 }
1112
1113 static void si_set_shader_buffers(struct pipe_context *ctx,
1114 enum pipe_shader_type shader,
1115 unsigned start_slot, unsigned count,
1116 const struct pipe_shader_buffer *sbuffers)
1117 {
1118 struct si_context *sctx = (struct si_context *)ctx;
1119 struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
1120 struct si_descriptors *descs = si_shader_buffer_descriptors(sctx, shader);
1121 unsigned i;
1122
1123 assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
1124
1125 for (i = 0; i < count; ++i) {
1126 const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
1127 struct r600_resource *buf;
1128 unsigned slot = start_slot + i;
1129 uint32_t *desc = descs->list + slot * 4;
1130 uint64_t va;
1131
1132 if (!sbuffer || !sbuffer->buffer) {
1133 pipe_resource_reference(&buffers->buffers[slot], NULL);
1134 memset(desc, 0, sizeof(uint32_t) * 4);
1135 buffers->enabled_mask &= ~(1u << slot);
1136 descs->dirty_mask |= 1u << slot;
1137 sctx->descriptors_dirty |=
1138 1u << si_shader_buffer_descriptors_idx(shader);
1139 continue;
1140 }
1141
1142 buf = (struct r600_resource *)sbuffer->buffer;
1143 va = buf->gpu_address + sbuffer->buffer_offset;
1144
1145 desc[0] = va;
1146 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1147 S_008F04_STRIDE(0);
1148 desc[2] = sbuffer->buffer_size;
1149 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1150 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1151 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1152 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1153 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1154 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1155
1156 pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
1157 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, buf,
1158 buffers->shader_usage,
1159 buffers->priority, true);
1160 buffers->enabled_mask |= 1u << slot;
1161 descs->dirty_mask |= 1u << slot;
1162 sctx->descriptors_dirty |=
1163 1u << si_shader_buffer_descriptors_idx(shader);
1164 }
1165 }
1166
1167 void si_get_shader_buffers(struct si_context *sctx, uint shader,
1168 uint start_slot, uint count,
1169 struct pipe_shader_buffer *sbuf)
1170 {
1171 struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
1172 struct si_descriptors *descs = si_shader_buffer_descriptors(sctx, shader);
1173
1174 for (unsigned i = 0; i < count; ++i) {
1175 si_get_buffer_from_descriptors(
1176 buffers, descs, start_slot + i,
1177 &sbuf[i].buffer, &sbuf[i].buffer_offset,
1178 &sbuf[i].buffer_size);
1179 }
1180 }
1181
1182 /* RING BUFFERS */
1183
1184 void si_set_ring_buffer(struct pipe_context *ctx, uint slot,
1185 struct pipe_resource *buffer,
1186 unsigned stride, unsigned num_records,
1187 bool add_tid, bool swizzle,
1188 unsigned element_size, unsigned index_stride, uint64_t offset)
1189 {
1190 struct si_context *sctx = (struct si_context *)ctx;
1191 struct si_buffer_resources *buffers = &sctx->rw_buffers;
1192 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1193
1194 /* The stride field in the resource descriptor has 14 bits */
1195 assert(stride < (1 << 14));
1196
1197 assert(slot < descs->num_elements);
1198 pipe_resource_reference(&buffers->buffers[slot], NULL);
1199
1200 if (buffer) {
1201 uint64_t va;
1202
1203 va = r600_resource(buffer)->gpu_address + offset;
1204
1205 switch (element_size) {
1206 default:
1207 assert(!"Unsupported ring buffer element size");
1208 case 0:
1209 case 2:
1210 element_size = 0;
1211 break;
1212 case 4:
1213 element_size = 1;
1214 break;
1215 case 8:
1216 element_size = 2;
1217 break;
1218 case 16:
1219 element_size = 3;
1220 break;
1221 }
1222
1223 switch (index_stride) {
1224 default:
1225 assert(!"Unsupported ring buffer index stride");
1226 case 0:
1227 case 8:
1228 index_stride = 0;
1229 break;
1230 case 16:
1231 index_stride = 1;
1232 break;
1233 case 32:
1234 index_stride = 2;
1235 break;
1236 case 64:
1237 index_stride = 3;
1238 break;
1239 }
1240
1241 if (sctx->b.chip_class >= VI && stride)
1242 num_records *= stride;
1243
1244 /* Set the descriptor. */
1245 uint32_t *desc = descs->list + slot*4;
1246 desc[0] = va;
1247 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1248 S_008F04_STRIDE(stride) |
1249 S_008F04_SWIZZLE_ENABLE(swizzle);
1250 desc[2] = num_records;
1251 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1252 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1253 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1254 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1255 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1256 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1257 S_008F0C_ELEMENT_SIZE(element_size) |
1258 S_008F0C_INDEX_STRIDE(index_stride) |
1259 S_008F0C_ADD_TID_ENABLE(add_tid);
1260
1261 pipe_resource_reference(&buffers->buffers[slot], buffer);
1262 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1263 (struct r600_resource*)buffer,
1264 buffers->shader_usage, buffers->priority);
1265 buffers->enabled_mask |= 1u << slot;
1266 } else {
1267 /* Clear the descriptor. */
1268 memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
1269 buffers->enabled_mask &= ~(1u << slot);
1270 }
1271
1272 descs->dirty_mask |= 1u << slot;
1273 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
1274 }
1275
1276 /* STREAMOUT BUFFERS */
1277
1278 static void si_set_streamout_targets(struct pipe_context *ctx,
1279 unsigned num_targets,
1280 struct pipe_stream_output_target **targets,
1281 const unsigned *offsets)
1282 {
1283 struct si_context *sctx = (struct si_context *)ctx;
1284 struct si_buffer_resources *buffers = &sctx->rw_buffers;
1285 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1286 unsigned old_num_targets = sctx->b.streamout.num_targets;
1287 unsigned i, bufidx;
1288
1289 /* We are going to unbind the buffers. Mark which caches need to be flushed. */
1290 if (sctx->b.streamout.num_targets && sctx->b.streamout.begin_emitted) {
1291 /* Since streamout uses vector writes which go through TC L2
1292 * and most other clients can use TC L2 as well, we don't need
1293 * to flush it.
1294 *
1295 * The only cases which requires flushing it is VGT DMA index
1296 * fetching (on <= CIK) and indirect draw data, which are rare
1297 * cases. Thus, flag the TC L2 dirtiness in the resource and
1298 * handle it at draw call time.
1299 */
1300 for (i = 0; i < sctx->b.streamout.num_targets; i++)
1301 if (sctx->b.streamout.targets[i])
1302 r600_resource(sctx->b.streamout.targets[i]->b.buffer)->TC_L2_dirty = true;
1303
1304 /* Invalidate the scalar cache in case a streamout buffer is
1305 * going to be used as a constant buffer.
1306 *
1307 * Invalidate TC L1, because streamout bypasses it (done by
1308 * setting GLC=1 in the store instruction), but it can contain
1309 * outdated data of streamout buffers.
1310 *
1311 * VS_PARTIAL_FLUSH is required if the buffers are going to be
1312 * used as an input immediately.
1313 */
1314 sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
1315 SI_CONTEXT_INV_VMEM_L1 |
1316 SI_CONTEXT_VS_PARTIAL_FLUSH;
1317 }
1318
1319 /* All readers of the streamout targets need to be finished before we can
1320 * start writing to the targets.
1321 */
1322 if (num_targets)
1323 sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
1324 SI_CONTEXT_CS_PARTIAL_FLUSH;
1325
1326 /* Streamout buffers must be bound in 2 places:
1327 * 1) in VGT by setting the VGT_STRMOUT registers
1328 * 2) as shader resources
1329 */
1330
1331 /* Set the VGT regs. */
1332 r600_set_streamout_targets(ctx, num_targets, targets, offsets);
1333
1334 /* Set the shader resources.*/
1335 for (i = 0; i < num_targets; i++) {
1336 bufidx = SI_VS_STREAMOUT_BUF0 + i;
1337
1338 if (targets[i]) {
1339 struct pipe_resource *buffer = targets[i]->buffer;
1340 uint64_t va = r600_resource(buffer)->gpu_address;
1341
1342 /* Set the descriptor.
1343 *
1344 * On VI, the format must be non-INVALID, otherwise
1345 * the buffer will be considered not bound and store
1346 * instructions will be no-ops.
1347 */
1348 uint32_t *desc = descs->list + bufidx*4;
1349 desc[0] = va;
1350 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
1351 desc[2] = 0xffffffff;
1352 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1353 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1354 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1355 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1356 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1357
1358 /* Set the resource. */
1359 pipe_resource_reference(&buffers->buffers[bufidx],
1360 buffer);
1361 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1362 (struct r600_resource*)buffer,
1363 buffers->shader_usage,
1364 RADEON_PRIO_SHADER_RW_BUFFER,
1365 true);
1366 buffers->enabled_mask |= 1u << bufidx;
1367 } else {
1368 /* Clear the descriptor and unset the resource. */
1369 memset(descs->list + bufidx*4, 0,
1370 sizeof(uint32_t) * 4);
1371 pipe_resource_reference(&buffers->buffers[bufidx],
1372 NULL);
1373 buffers->enabled_mask &= ~(1u << bufidx);
1374 }
1375 descs->dirty_mask |= 1u << bufidx;
1376 }
1377 for (; i < old_num_targets; i++) {
1378 bufidx = SI_VS_STREAMOUT_BUF0 + i;
1379 /* Clear the descriptor and unset the resource. */
1380 memset(descs->list + bufidx*4, 0, sizeof(uint32_t) * 4);
1381 pipe_resource_reference(&buffers->buffers[bufidx], NULL);
1382 buffers->enabled_mask &= ~(1u << bufidx);
1383 descs->dirty_mask |= 1u << bufidx;
1384 }
1385
1386 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
1387 }
1388
1389 static void si_desc_reset_buffer_offset(struct pipe_context *ctx,
1390 uint32_t *desc, uint64_t old_buf_va,
1391 struct pipe_resource *new_buf)
1392 {
1393 /* Retrieve the buffer offset from the descriptor. */
1394 uint64_t old_desc_va =
1395 desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32);
1396
1397 assert(old_buf_va <= old_desc_va);
1398 uint64_t offset_within_buffer = old_desc_va - old_buf_va;
1399
1400 /* Update the descriptor. */
1401 uint64_t va = r600_resource(new_buf)->gpu_address + offset_within_buffer;
1402
1403 desc[0] = va;
1404 desc[1] = (desc[1] & C_008F04_BASE_ADDRESS_HI) |
1405 S_008F04_BASE_ADDRESS_HI(va >> 32);
1406 }
1407
1408 /* INTERNAL CONST BUFFERS */
1409
1410 static void si_set_polygon_stipple(struct pipe_context *ctx,
1411 const struct pipe_poly_stipple *state)
1412 {
1413 struct si_context *sctx = (struct si_context *)ctx;
1414 struct pipe_constant_buffer cb = {};
1415 unsigned stipple[32];
1416 int i;
1417
1418 for (i = 0; i < 32; i++)
1419 stipple[i] = util_bitreverse(state->stipple[i]);
1420
1421 cb.user_buffer = stipple;
1422 cb.buffer_size = sizeof(stipple);
1423
1424 si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &cb);
1425 }
1426
1427 /* TEXTURE METADATA ENABLE/DISABLE */
1428
1429 /* CMASK can be enabled (for fast clear) and disabled (for texture export)
1430 * while the texture is bound, possibly by a different context. In that case,
1431 * call this function to update compressed_colortex_masks.
1432 */
1433 void si_update_compressed_colortex_masks(struct si_context *sctx)
1434 {
1435 for (int i = 0; i < SI_NUM_SHADERS; ++i) {
1436 si_samplers_update_compressed_colortex_mask(&sctx->samplers[i]);
1437 si_images_update_compressed_colortex_mask(&sctx->images[i]);
1438 }
1439 }
1440
1441 /* BUFFER DISCARD/INVALIDATION */
1442
1443 /** Reset descriptors of buffer resources after \p buf has been invalidated. */
1444 static void si_reset_buffer_resources(struct si_context *sctx,
1445 struct si_buffer_resources *buffers,
1446 unsigned descriptors_idx,
1447 struct pipe_resource *buf,
1448 uint64_t old_va)
1449 {
1450 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1451 unsigned mask = buffers->enabled_mask;
1452
1453 while (mask) {
1454 unsigned i = u_bit_scan(&mask);
1455 if (buffers->buffers[i] == buf) {
1456 si_desc_reset_buffer_offset(&sctx->b.b,
1457 descs->list + i*4,
1458 old_va, buf);
1459 descs->dirty_mask |= 1u << i;
1460 sctx->descriptors_dirty |= 1u << descriptors_idx;
1461
1462 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1463 (struct r600_resource *)buf,
1464 buffers->shader_usage,
1465 buffers->priority, true);
1466 }
1467 }
1468 }
1469
1470 /* Reallocate a buffer a update all resource bindings where the buffer is
1471 * bound.
1472 *
1473 * This is used to avoid CPU-GPU synchronizations, because it makes the buffer
1474 * idle by discarding its contents. Apps usually tell us when to do this using
1475 * map_buffer flags, for example.
1476 */
1477 static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf)
1478 {
1479 struct si_context *sctx = (struct si_context*)ctx;
1480 struct r600_resource *rbuffer = r600_resource(buf);
1481 unsigned i, shader;
1482 uint64_t old_va = rbuffer->gpu_address;
1483 unsigned num_elems = sctx->vertex_elements ?
1484 sctx->vertex_elements->count : 0;
1485 struct si_sampler_view *view;
1486
1487 /* Reallocate the buffer in the same pipe_resource. */
1488 r600_alloc_resource(&sctx->screen->b, rbuffer);
1489
1490 /* We changed the buffer, now we need to bind it where the old one
1491 * was bound. This consists of 2 things:
1492 * 1) Updating the resource descriptor and dirtying it.
1493 * 2) Adding a relocation to the CS, so that it's usable.
1494 */
1495
1496 /* Vertex buffers. */
1497 for (i = 0; i < num_elems; i++) {
1498 int vb = sctx->vertex_elements->elements[i].vertex_buffer_index;
1499
1500 if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
1501 continue;
1502 if (!sctx->vertex_buffer[vb].buffer)
1503 continue;
1504
1505 if (sctx->vertex_buffer[vb].buffer == buf) {
1506 sctx->vertex_buffers_dirty = true;
1507 break;
1508 }
1509 }
1510
1511 /* Streamout buffers. (other internal buffers can't be invalidated) */
1512 for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
1513 struct si_buffer_resources *buffers = &sctx->rw_buffers;
1514 struct si_descriptors *descs =
1515 &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1516
1517 if (buffers->buffers[i] != buf)
1518 continue;
1519
1520 si_desc_reset_buffer_offset(ctx, descs->list + i*4,
1521 old_va, buf);
1522 descs->dirty_mask |= 1u << i;
1523 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
1524
1525 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1526 rbuffer, buffers->shader_usage,
1527 RADEON_PRIO_SHADER_RW_BUFFER,
1528 true);
1529
1530 /* Update the streamout state. */
1531 if (sctx->b.streamout.begin_emitted)
1532 r600_emit_streamout_end(&sctx->b);
1533 sctx->b.streamout.append_bitmask =
1534 sctx->b.streamout.enabled_mask;
1535 r600_streamout_buffers_dirty(&sctx->b);
1536 }
1537
1538 /* Constant and shader buffers. */
1539 for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
1540 si_reset_buffer_resources(sctx, &sctx->const_buffers[shader],
1541 si_const_buffer_descriptors_idx(shader),
1542 buf, old_va);
1543 si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader],
1544 si_shader_buffer_descriptors_idx(shader),
1545 buf, old_va);
1546 }
1547
1548 /* Texture buffers - update virtual addresses in sampler view descriptors. */
1549 LIST_FOR_EACH_ENTRY(view, &sctx->b.texture_buffers, list) {
1550 if (view->base.texture == buf) {
1551 si_desc_reset_buffer_offset(ctx, &view->state[4], old_va, buf);
1552 }
1553 }
1554 /* Texture buffers - update bindings. */
1555 for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
1556 struct si_sampler_views *views = &sctx->samplers[shader].views;
1557 struct si_descriptors *descs =
1558 si_sampler_descriptors(sctx, shader);
1559 unsigned mask = views->enabled_mask;
1560
1561 while (mask) {
1562 unsigned i = u_bit_scan(&mask);
1563 if (views->views[i]->texture == buf) {
1564 si_desc_reset_buffer_offset(ctx,
1565 descs->list +
1566 i * 16 + 4,
1567 old_va, buf);
1568 descs->dirty_mask |= 1u << i;
1569 sctx->descriptors_dirty |=
1570 1u << si_sampler_descriptors_idx(shader);
1571
1572 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1573 rbuffer, RADEON_USAGE_READ,
1574 RADEON_PRIO_SAMPLER_BUFFER,
1575 true);
1576 }
1577 }
1578 }
1579
1580 /* Shader images */
1581 for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
1582 struct si_images_info *images = &sctx->images[shader];
1583 struct si_descriptors *descs =
1584 si_image_descriptors(sctx, shader);
1585 unsigned mask = images->enabled_mask;
1586
1587 while (mask) {
1588 unsigned i = u_bit_scan(&mask);
1589
1590 if (images->views[i].resource == buf) {
1591 if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
1592 si_mark_image_range_valid(&images->views[i]);
1593
1594 si_desc_reset_buffer_offset(
1595 ctx, descs->list + i * 8 + 4,
1596 old_va, buf);
1597 descs->dirty_mask |= 1u << i;
1598 sctx->descriptors_dirty |=
1599 1u << si_image_descriptors_idx(shader);
1600
1601 radeon_add_to_buffer_list_check_mem(
1602 &sctx->b, &sctx->b.gfx, rbuffer,
1603 RADEON_USAGE_READWRITE,
1604 RADEON_PRIO_SAMPLER_BUFFER, true);
1605 }
1606 }
1607 }
1608 }
1609
1610 /* Update mutable image descriptor fields of all bound textures. */
1611 void si_update_all_texture_descriptors(struct si_context *sctx)
1612 {
1613 unsigned shader;
1614
1615 for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
1616 struct si_sampler_views *samplers = &sctx->samplers[shader].views;
1617 struct si_images_info *images = &sctx->images[shader];
1618 unsigned mask;
1619
1620 /* Images. */
1621 mask = images->enabled_mask;
1622 while (mask) {
1623 unsigned i = u_bit_scan(&mask);
1624 struct pipe_image_view *view = &images->views[i];
1625
1626 if (!view->resource ||
1627 view->resource->target == PIPE_BUFFER)
1628 continue;
1629
1630 si_set_shader_image(sctx, shader, i, view);
1631 }
1632
1633 /* Sampler views. */
1634 mask = samplers->enabled_mask;
1635 while (mask) {
1636 unsigned i = u_bit_scan(&mask);
1637 struct pipe_sampler_view *view = samplers->views[i];
1638
1639 if (!view ||
1640 !view->texture ||
1641 view->texture->target == PIPE_BUFFER)
1642 continue;
1643
1644 si_set_sampler_view(sctx, shader, i,
1645 samplers->views[i], true);
1646 }
1647 }
1648 }
1649
1650 /* SHADER USER DATA */
1651
1652 static void si_mark_shader_pointers_dirty(struct si_context *sctx,
1653 unsigned shader)
1654 {
1655 struct si_descriptors *descs =
1656 &sctx->descriptors[SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS];
1657
1658 for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
1659 descs->pointer_dirty = true;
1660
1661 if (shader == PIPE_SHADER_VERTEX)
1662 sctx->vertex_buffers.pointer_dirty = true;
1663
1664 si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
1665 }
1666
1667 static void si_shader_userdata_begin_new_cs(struct si_context *sctx)
1668 {
1669 int i;
1670
1671 for (i = 0; i < SI_NUM_SHADERS; i++) {
1672 si_mark_shader_pointers_dirty(sctx, i);
1673 }
1674 sctx->descriptors[SI_DESCS_RW_BUFFERS].pointer_dirty = true;
1675 }
1676
1677 /* Set a base register address for user data constants in the given shader.
1678 * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*.
1679 */
1680 static void si_set_user_data_base(struct si_context *sctx,
1681 unsigned shader, uint32_t new_base)
1682 {
1683 uint32_t *base = &sctx->shader_userdata.sh_base[shader];
1684
1685 if (*base != new_base) {
1686 *base = new_base;
1687
1688 if (new_base)
1689 si_mark_shader_pointers_dirty(sctx, shader);
1690 }
1691 }
1692
1693 /* This must be called when these shaders are changed from non-NULL to NULL
1694 * and vice versa:
1695 * - geometry shader
1696 * - tessellation control shader
1697 * - tessellation evaluation shader
1698 */
1699 void si_shader_change_notify(struct si_context *sctx)
1700 {
1701 /* VS can be bound as VS, ES, or LS. */
1702 if (sctx->tes_shader.cso)
1703 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1704 R_00B530_SPI_SHADER_USER_DATA_LS_0);
1705 else if (sctx->gs_shader.cso)
1706 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1707 R_00B330_SPI_SHADER_USER_DATA_ES_0);
1708 else
1709 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1710 R_00B130_SPI_SHADER_USER_DATA_VS_0);
1711
1712 /* TES can be bound as ES, VS, or not bound. */
1713 if (sctx->tes_shader.cso) {
1714 if (sctx->gs_shader.cso)
1715 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
1716 R_00B330_SPI_SHADER_USER_DATA_ES_0);
1717 else
1718 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
1719 R_00B130_SPI_SHADER_USER_DATA_VS_0);
1720 } else {
1721 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0);
1722 }
1723 }
1724
1725 static void si_emit_shader_pointer(struct si_context *sctx,
1726 struct si_descriptors *desc,
1727 unsigned sh_base, bool keep_dirty)
1728 {
1729 struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
1730 uint64_t va;
1731
1732 if (!desc->pointer_dirty || !desc->buffer)
1733 return;
1734
1735 va = desc->buffer->gpu_address +
1736 desc->buffer_offset;
1737
1738 radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
1739 radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2);
1740 radeon_emit(cs, va);
1741 radeon_emit(cs, va >> 32);
1742
1743 desc->pointer_dirty = keep_dirty;
1744 }
1745
1746 void si_emit_graphics_shader_userdata(struct si_context *sctx,
1747 struct r600_atom *atom)
1748 {
1749 unsigned shader;
1750 uint32_t *sh_base = sctx->shader_userdata.sh_base;
1751 struct si_descriptors *descs;
1752
1753 descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1754
1755 if (descs->pointer_dirty) {
1756 si_emit_shader_pointer(sctx, descs,
1757 R_00B030_SPI_SHADER_USER_DATA_PS_0, true);
1758 si_emit_shader_pointer(sctx, descs,
1759 R_00B130_SPI_SHADER_USER_DATA_VS_0, true);
1760 si_emit_shader_pointer(sctx, descs,
1761 R_00B230_SPI_SHADER_USER_DATA_GS_0, true);
1762 si_emit_shader_pointer(sctx, descs,
1763 R_00B330_SPI_SHADER_USER_DATA_ES_0, true);
1764 si_emit_shader_pointer(sctx, descs,
1765 R_00B430_SPI_SHADER_USER_DATA_HS_0, true);
1766 descs->pointer_dirty = false;
1767 }
1768
1769 descs = &sctx->descriptors[SI_DESCS_FIRST_SHADER];
1770
1771 for (shader = 0; shader < SI_NUM_GRAPHICS_SHADERS; shader++) {
1772 unsigned base = sh_base[shader];
1773 unsigned i;
1774
1775 if (!base)
1776 continue;
1777
1778 for (i = 0; i < SI_NUM_SHADER_DESCS; i++, descs++)
1779 si_emit_shader_pointer(sctx, descs, base, false);
1780 }
1781 si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false);
1782 }
1783
1784 void si_emit_compute_shader_userdata(struct si_context *sctx)
1785 {
1786 unsigned base = R_00B900_COMPUTE_USER_DATA_0;
1787 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_FIRST_COMPUTE];
1788
1789 for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
1790 si_emit_shader_pointer(sctx, descs, base, false);
1791 }
1792
1793 /* INIT/DEINIT/UPLOAD */
1794
1795 void si_init_all_descriptors(struct si_context *sctx)
1796 {
1797 int i;
1798 unsigned ce_offset = 0;
1799
1800 for (i = 0; i < SI_NUM_SHADERS; i++) {
1801 si_init_buffer_resources(&sctx->const_buffers[i],
1802 si_const_buffer_descriptors(sctx, i),
1803 SI_NUM_CONST_BUFFERS, SI_SGPR_CONST_BUFFERS,
1804 RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER,
1805 &ce_offset);
1806 si_init_buffer_resources(&sctx->shader_buffers[i],
1807 si_shader_buffer_descriptors(sctx, i),
1808 SI_NUM_SHADER_BUFFERS, SI_SGPR_SHADER_BUFFERS,
1809 RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER,
1810 &ce_offset);
1811
1812 si_init_descriptors(si_sampler_descriptors(sctx, i),
1813 SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS,
1814 null_texture_descriptor, &ce_offset);
1815
1816 si_init_descriptors(si_image_descriptors(sctx, i),
1817 SI_SGPR_IMAGES, 8, SI_NUM_IMAGES,
1818 null_image_descriptor, &ce_offset);
1819 }
1820
1821 si_init_buffer_resources(&sctx->rw_buffers,
1822 &sctx->descriptors[SI_DESCS_RW_BUFFERS],
1823 SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
1824 RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS,
1825 &ce_offset);
1826 si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
1827 4, SI_NUM_VERTEX_BUFFERS, NULL, NULL);
1828
1829 sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
1830
1831 assert(ce_offset <= 32768);
1832
1833 /* Set pipe_context functions. */
1834 sctx->b.b.bind_sampler_states = si_bind_sampler_states;
1835 sctx->b.b.set_shader_images = si_set_shader_images;
1836 sctx->b.b.set_constant_buffer = si_pipe_set_constant_buffer;
1837 sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
1838 sctx->b.b.set_shader_buffers = si_set_shader_buffers;
1839 sctx->b.b.set_sampler_views = si_set_sampler_views;
1840 sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
1841 sctx->b.invalidate_buffer = si_invalidate_buffer;
1842
1843 /* Shader user data. */
1844 si_init_atom(sctx, &sctx->shader_userdata.atom, &sctx->atoms.s.shader_userdata,
1845 si_emit_graphics_shader_userdata);
1846
1847 /* Set default and immutable mappings. */
1848 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
1849 si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL, R_00B430_SPI_SHADER_USER_DATA_HS_0);
1850 si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY, R_00B230_SPI_SHADER_USER_DATA_GS_0);
1851 si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
1852 }
1853
1854 bool si_upload_graphics_shader_descriptors(struct si_context *sctx)
1855 {
1856 const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE);
1857 unsigned dirty = sctx->descriptors_dirty & mask;
1858
1859 while (dirty) {
1860 unsigned i = u_bit_scan(&dirty);
1861
1862 if (!si_upload_descriptors(sctx, &sctx->descriptors[i],
1863 &sctx->shader_userdata.atom))
1864 return false;
1865 }
1866
1867 sctx->descriptors_dirty &= ~mask;
1868 return true;
1869 }
1870
1871 bool si_upload_compute_shader_descriptors(struct si_context *sctx)
1872 {
1873 /* Does not update rw_buffers as that is not needed for compute shaders
1874 * and the input buffer is using the same SGPR's anyway.
1875 */
1876 const unsigned mask = u_bit_consecutive(SI_DESCS_FIRST_COMPUTE,
1877 SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE);
1878 unsigned dirty = sctx->descriptors_dirty & mask;
1879
1880 while (dirty) {
1881 unsigned i = u_bit_scan(&dirty);
1882
1883 if (!si_upload_descriptors(sctx, &sctx->descriptors[i], NULL))
1884 return false;
1885 }
1886
1887 sctx->descriptors_dirty &= ~mask;
1888
1889 return true;
1890 }
1891
1892 void si_release_all_descriptors(struct si_context *sctx)
1893 {
1894 int i;
1895
1896 for (i = 0; i < SI_NUM_SHADERS; i++) {
1897 si_release_buffer_resources(&sctx->const_buffers[i],
1898 si_const_buffer_descriptors(sctx, i));
1899 si_release_buffer_resources(&sctx->shader_buffers[i],
1900 si_shader_buffer_descriptors(sctx, i));
1901 si_release_sampler_views(&sctx->samplers[i].views);
1902 si_release_image_views(&sctx->images[i]);
1903 }
1904 si_release_buffer_resources(&sctx->rw_buffers,
1905 &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
1906
1907 for (i = 0; i < SI_NUM_DESCS; ++i)
1908 si_release_descriptors(&sctx->descriptors[i]);
1909 si_release_descriptors(&sctx->vertex_buffers);
1910 }
1911
1912 void si_all_descriptors_begin_new_cs(struct si_context *sctx)
1913 {
1914 int i;
1915
1916 for (i = 0; i < SI_NUM_SHADERS; i++) {
1917 si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
1918 si_buffer_resources_begin_new_cs(sctx, &sctx->shader_buffers[i]);
1919 si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
1920 si_image_views_begin_new_cs(sctx, &sctx->images[i]);
1921 }
1922 si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers);
1923 si_vertex_buffers_begin_new_cs(sctx);
1924
1925 for (i = 0; i < SI_NUM_DESCS; ++i)
1926 si_descriptors_begin_new_cs(sctx, &sctx->descriptors[i]);
1927
1928 si_shader_userdata_begin_new_cs(sctx);
1929 }