radeonsi: extract writing of a single streamout output
[mesa.git] / src / gallium / drivers / radeonsi / si_descriptors.c
1 /*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Marek Olšák <marek.olsak@amd.com>
25 */
26
27 /* Resource binding slots and sampler states (each described with 8 or
28 * 4 dwords) are stored in lists in memory which is accessed by shaders
29 * using scalar load instructions.
30 *
31 * This file is responsible for managing such lists. It keeps a copy of all
32 * descriptors in CPU memory and re-uploads a whole list if some slots have
33 * been changed.
34 *
35 * This code is also reponsible for updating shader pointers to those lists.
36 *
37 * Note that CP DMA can't be used for updating the lists, because a GPU hang
38 * could leave the list in a mid-IB state and the next IB would get wrong
39 * descriptors and the whole context would be unusable at that point.
40 * (Note: The register shadowing can't be used due to the same reason)
41 *
42 * Also, uploading descriptors to newly allocated memory doesn't require
43 * a KCACHE flush.
44 *
45 *
46 * Possible scenarios for one 16 dword image+sampler slot:
47 *
48 * | Image | w/ FMASK | Buffer | NULL
49 * [ 0: 3] Image[0:3] | Image[0:3] | Null[0:3] | Null[0:3]
50 * [ 4: 7] Image[4:7] | Image[4:7] | Buffer[0:3] | 0
51 * [ 8:11] Null[0:3] | Fmask[0:3] | Null[0:3] | Null[0:3]
52 * [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3]
53 *
54 * FMASK implies MSAA, therefore no sampler state.
55 * Sampler states are never unbound except when FMASK is bound.
56 */
57
58 #include "radeon/r600_cs.h"
59 #include "si_pipe.h"
60 #include "sid.h"
61
62 #include "util/u_format.h"
63 #include "util/u_memory.h"
64 #include "util/u_upload_mgr.h"
65
66
67 /* NULL image and buffer descriptor for textures (alpha = 1) and images
68 * (alpha = 0).
69 *
70 * For images, all fields must be zero except for the swizzle, which
71 * supports arbitrary combinations of 0s and 1s. The texture type must be
72 * any valid type (e.g. 1D). If the texture type isn't set, the hw hangs.
73 *
74 * For buffers, all fields must be zero. If they are not, the hw hangs.
75 *
76 * This is the only reason why the buffer descriptor must be in words [4:7].
77 */
78 static uint32_t null_texture_descriptor[8] = {
79 0,
80 0,
81 0,
82 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_1) |
83 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
84 /* the rest must contain zeros, which is also used by the buffer
85 * descriptor */
86 };
87
88 static uint32_t null_image_descriptor[8] = {
89 0,
90 0,
91 0,
92 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
93 /* the rest must contain zeros, which is also used by the buffer
94 * descriptor */
95 };
96
97 static void si_init_descriptors(struct si_descriptors *desc,
98 unsigned shader_userdata_index,
99 unsigned element_dw_size,
100 unsigned num_elements,
101 const uint32_t *null_descriptor,
102 unsigned *ce_offset)
103 {
104 int i;
105
106 assert(num_elements <= sizeof(desc->dirty_mask)*8);
107
108 desc->list = CALLOC(num_elements, element_dw_size * 4);
109 desc->element_dw_size = element_dw_size;
110 desc->num_elements = num_elements;
111 desc->dirty_mask = num_elements == 32 ? ~0u : (1u << num_elements) - 1;
112 desc->shader_userdata_offset = shader_userdata_index * 4;
113
114 if (ce_offset) {
115 desc->ce_offset = *ce_offset;
116
117 /* make sure that ce_offset stays 32 byte aligned */
118 *ce_offset += align(element_dw_size * num_elements * 4, 32);
119 }
120
121 /* Initialize the array to NULL descriptors if the element size is 8. */
122 if (null_descriptor) {
123 assert(element_dw_size % 8 == 0);
124 for (i = 0; i < num_elements * element_dw_size / 8; i++)
125 memcpy(desc->list + i * 8, null_descriptor,
126 8 * 4);
127 }
128 }
129
130 static void si_release_descriptors(struct si_descriptors *desc)
131 {
132 r600_resource_reference(&desc->buffer, NULL);
133 FREE(desc->list);
134 }
135
136 static bool si_ce_upload(struct si_context *sctx, unsigned ce_offset, unsigned size,
137 unsigned *out_offset, struct r600_resource **out_buf) {
138 uint64_t va;
139
140 u_suballocator_alloc(sctx->ce_suballocator, size, 64, out_offset,
141 (struct pipe_resource**)out_buf);
142 if (!out_buf)
143 return false;
144
145 va = (*out_buf)->gpu_address + *out_offset;
146
147 radeon_emit(sctx->ce_ib, PKT3(PKT3_DUMP_CONST_RAM, 3, 0));
148 radeon_emit(sctx->ce_ib, ce_offset);
149 radeon_emit(sctx->ce_ib, size / 4);
150 radeon_emit(sctx->ce_ib, va);
151 radeon_emit(sctx->ce_ib, va >> 32);
152
153 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, *out_buf,
154 RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
155
156 sctx->ce_need_synchronization = true;
157 return true;
158 }
159
160 static void si_ce_reinitialize_descriptors(struct si_context *sctx,
161 struct si_descriptors *desc)
162 {
163 if (desc->buffer) {
164 struct r600_resource *buffer = (struct r600_resource*)desc->buffer;
165 unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
166 uint64_t va = buffer->gpu_address + desc->buffer_offset;
167 struct radeon_winsys_cs *ib = sctx->ce_preamble_ib;
168
169 if (!ib)
170 ib = sctx->ce_ib;
171
172 list_size = align(list_size, 32);
173
174 radeon_emit(ib, PKT3(PKT3_LOAD_CONST_RAM, 3, 0));
175 radeon_emit(ib, va);
176 radeon_emit(ib, va >> 32);
177 radeon_emit(ib, list_size / 4);
178 radeon_emit(ib, desc->ce_offset);
179
180 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
181 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
182 }
183 desc->ce_ram_dirty = false;
184 }
185
186 void si_ce_reinitialize_all_descriptors(struct si_context *sctx)
187 {
188 int i;
189
190 for (i = 0; i < SI_NUM_DESCS; ++i)
191 si_ce_reinitialize_descriptors(sctx, &sctx->descriptors[i]);
192 }
193
194 void si_ce_enable_loads(struct radeon_winsys_cs *ib)
195 {
196 radeon_emit(ib, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
197 radeon_emit(ib, CONTEXT_CONTROL_LOAD_ENABLE(1) |
198 CONTEXT_CONTROL_LOAD_CE_RAM(1));
199 radeon_emit(ib, CONTEXT_CONTROL_SHADOW_ENABLE(1));
200 }
201
202 static bool si_upload_descriptors(struct si_context *sctx,
203 struct si_descriptors *desc,
204 struct r600_atom * atom)
205 {
206 unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
207
208 if (!desc->dirty_mask)
209 return true;
210
211 if (sctx->ce_ib) {
212 uint32_t const* list = (uint32_t const*)desc->list;
213
214 if (desc->ce_ram_dirty)
215 si_ce_reinitialize_descriptors(sctx, desc);
216
217 while(desc->dirty_mask) {
218 int begin, count;
219 u_bit_scan_consecutive_range(&desc->dirty_mask, &begin,
220 &count);
221
222 begin *= desc->element_dw_size;
223 count *= desc->element_dw_size;
224
225 radeon_emit(sctx->ce_ib,
226 PKT3(PKT3_WRITE_CONST_RAM, count, 0));
227 radeon_emit(sctx->ce_ib, desc->ce_offset + begin * 4);
228 radeon_emit_array(sctx->ce_ib, list + begin, count);
229 }
230
231 if (!si_ce_upload(sctx, desc->ce_offset, list_size,
232 &desc->buffer_offset, &desc->buffer))
233 return false;
234 } else {
235 void *ptr;
236
237 u_upload_alloc(sctx->b.uploader, 0, list_size, 256,
238 &desc->buffer_offset,
239 (struct pipe_resource**)&desc->buffer, &ptr);
240 if (!desc->buffer)
241 return false; /* skip the draw call */
242
243 util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
244 desc->gpu_list = ptr;
245
246 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
247 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
248 }
249 desc->pointer_dirty = true;
250 desc->dirty_mask = 0;
251
252 if (atom)
253 si_mark_atom_dirty(sctx, atom);
254
255 return true;
256 }
257
258 static void
259 si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc)
260 {
261 desc->ce_ram_dirty = true;
262
263 if (!desc->buffer)
264 return;
265
266 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
267 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
268 }
269
270 /* SAMPLER VIEWS */
271
272 static unsigned
273 si_sampler_descriptors_idx(unsigned shader)
274 {
275 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
276 SI_SHADER_DESCS_SAMPLERS;
277 }
278
279 static struct si_descriptors *
280 si_sampler_descriptors(struct si_context *sctx, unsigned shader)
281 {
282 return &sctx->descriptors[si_sampler_descriptors_idx(shader)];
283 }
284
285 static void si_release_sampler_views(struct si_sampler_views *views)
286 {
287 int i;
288
289 for (i = 0; i < ARRAY_SIZE(views->views); i++) {
290 pipe_sampler_view_reference(&views->views[i], NULL);
291 }
292 }
293
294 static void si_sampler_view_add_buffer(struct si_context *sctx,
295 struct pipe_resource *resource,
296 enum radeon_bo_usage usage,
297 bool is_stencil_sampler,
298 bool check_mem)
299 {
300 struct r600_resource *rres;
301 struct r600_texture *rtex;
302 enum radeon_bo_priority priority;
303
304 if (!resource)
305 return;
306
307 if (resource->target != PIPE_BUFFER) {
308 struct r600_texture *tex = (struct r600_texture*)resource;
309
310 if (tex->is_depth && !r600_can_sample_zs(tex, is_stencil_sampler))
311 resource = &tex->flushed_depth_texture->resource.b.b;
312 }
313
314 rres = (struct r600_resource*)resource;
315 priority = r600_get_sampler_view_priority(rres);
316
317 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
318 rres, usage, priority,
319 check_mem);
320
321 if (resource->target == PIPE_BUFFER)
322 return;
323
324 /* Now add separate DCC if it's present. */
325 rtex = (struct r600_texture*)resource;
326 if (!rtex->dcc_separate_buffer)
327 return;
328
329 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
330 rtex->dcc_separate_buffer, usage,
331 RADEON_PRIO_DCC, check_mem);
332 }
333
334 static void si_sampler_views_begin_new_cs(struct si_context *sctx,
335 struct si_sampler_views *views)
336 {
337 unsigned mask = views->enabled_mask;
338
339 /* Add buffers to the CS. */
340 while (mask) {
341 int i = u_bit_scan(&mask);
342 struct si_sampler_view *sview = (struct si_sampler_view *)views->views[i];
343
344 si_sampler_view_add_buffer(sctx, sview->base.texture,
345 RADEON_USAGE_READ,
346 sview->is_stencil_sampler, false);
347 }
348 }
349
350 /* Set buffer descriptor fields that can be changed by reallocations. */
351 static void si_set_buf_desc_address(struct r600_resource *buf,
352 uint64_t offset, uint32_t *state)
353 {
354 uint64_t va = buf->gpu_address + offset;
355
356 state[0] = va;
357 state[1] &= C_008F04_BASE_ADDRESS_HI;
358 state[1] |= S_008F04_BASE_ADDRESS_HI(va >> 32);
359 }
360
361 /* Set texture descriptor fields that can be changed by reallocations.
362 *
363 * \param tex texture
364 * \param base_level_info information of the level of BASE_ADDRESS
365 * \param base_level the level of BASE_ADDRESS
366 * \param first_level pipe_sampler_view.u.tex.first_level
367 * \param block_width util_format_get_blockwidth()
368 * \param is_stencil select between separate Z & Stencil
369 * \param state descriptor to update
370 */
371 void si_set_mutable_tex_desc_fields(struct r600_texture *tex,
372 const struct radeon_surf_level *base_level_info,
373 unsigned base_level, unsigned first_level,
374 unsigned block_width, bool is_stencil,
375 uint32_t *state)
376 {
377 uint64_t va;
378 unsigned pitch = base_level_info->nblk_x * block_width;
379
380 if (tex->is_depth && !r600_can_sample_zs(tex, is_stencil)) {
381 tex = tex->flushed_depth_texture;
382 is_stencil = false;
383 }
384
385 va = tex->resource.gpu_address + base_level_info->offset;
386
387 state[1] &= C_008F14_BASE_ADDRESS_HI;
388 state[3] &= C_008F1C_TILING_INDEX;
389 state[4] &= C_008F20_PITCH;
390 state[6] &= C_008F28_COMPRESSION_EN;
391
392 state[0] = va >> 8;
393 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
394 state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(tex, base_level,
395 is_stencil));
396 state[4] |= S_008F20_PITCH(pitch - 1);
397
398 if (tex->dcc_offset && first_level < tex->surface.num_dcc_levels) {
399 state[6] |= S_008F28_COMPRESSION_EN(1);
400 state[7] = ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
401 tex->dcc_offset +
402 base_level_info->dcc_offset) >> 8;
403 } else if (tex->tc_compatible_htile) {
404 state[6] |= S_008F28_COMPRESSION_EN(1);
405 state[7] = tex->htile_buffer->gpu_address >> 8;
406 }
407 }
408
409 static void si_set_sampler_view(struct si_context *sctx,
410 unsigned shader,
411 unsigned slot, struct pipe_sampler_view *view,
412 bool disallow_early_out)
413 {
414 struct si_sampler_views *views = &sctx->samplers[shader].views;
415 struct si_sampler_view *rview = (struct si_sampler_view*)view;
416 struct si_descriptors *descs = si_sampler_descriptors(sctx, shader);
417 uint32_t *desc = descs->list + slot * 16;
418
419 if (views->views[slot] == view && !disallow_early_out)
420 return;
421
422 if (view) {
423 struct r600_texture *rtex = (struct r600_texture *)view->texture;
424
425 assert(rtex); /* views with texture == NULL aren't supported */
426 pipe_sampler_view_reference(&views->views[slot], view);
427 memcpy(desc, rview->state, 8*4);
428
429 if (rtex->resource.b.b.target == PIPE_BUFFER) {
430 rtex->resource.bind_history |= PIPE_BIND_SAMPLER_VIEW;
431
432 si_set_buf_desc_address(&rtex->resource,
433 view->u.buf.offset,
434 desc + 4);
435 } else {
436 bool is_separate_stencil =
437 rtex->db_compatible &&
438 rview->is_stencil_sampler;
439
440 si_set_mutable_tex_desc_fields(rtex,
441 rview->base_level_info,
442 rview->base_level,
443 rview->base.u.tex.first_level,
444 rview->block_width,
445 is_separate_stencil,
446 desc);
447 }
448
449 if (rtex->resource.b.b.target != PIPE_BUFFER &&
450 rtex->fmask.size) {
451 memcpy(desc + 8,
452 rview->fmask_state, 8*4);
453 } else {
454 /* Disable FMASK and bind sampler state in [12:15]. */
455 memcpy(desc + 8,
456 null_texture_descriptor, 4*4);
457
458 if (views->sampler_states[slot])
459 memcpy(desc + 12,
460 views->sampler_states[slot]->val, 4*4);
461 }
462
463 views->enabled_mask |= 1u << slot;
464
465 /* Since this can flush, it must be done after enabled_mask is
466 * updated. */
467 si_sampler_view_add_buffer(sctx, view->texture,
468 RADEON_USAGE_READ,
469 rview->is_stencil_sampler, true);
470 } else {
471 pipe_sampler_view_reference(&views->views[slot], NULL);
472 memcpy(desc, null_texture_descriptor, 8*4);
473 /* Only clear the lower dwords of FMASK. */
474 memcpy(desc + 8, null_texture_descriptor, 4*4);
475 /* Re-set the sampler state if we are transitioning from FMASK. */
476 if (views->sampler_states[slot])
477 memcpy(desc + 12,
478 views->sampler_states[slot]->val, 4*4);
479
480 views->enabled_mask &= ~(1u << slot);
481 }
482
483 descs->dirty_mask |= 1u << slot;
484 sctx->descriptors_dirty |= 1u << si_sampler_descriptors_idx(shader);
485 }
486
487 static bool is_compressed_colortex(struct r600_texture *rtex)
488 {
489 return rtex->cmask.size || rtex->fmask.size ||
490 (rtex->dcc_offset && rtex->dirty_level_mask);
491 }
492
493 static void si_set_sampler_views(struct pipe_context *ctx,
494 enum pipe_shader_type shader, unsigned start,
495 unsigned count,
496 struct pipe_sampler_view **views)
497 {
498 struct si_context *sctx = (struct si_context *)ctx;
499 struct si_textures_info *samplers = &sctx->samplers[shader];
500 int i;
501
502 if (!count || shader >= SI_NUM_SHADERS)
503 return;
504
505 for (i = 0; i < count; i++) {
506 unsigned slot = start + i;
507
508 if (!views || !views[i]) {
509 samplers->depth_texture_mask &= ~(1u << slot);
510 samplers->compressed_colortex_mask &= ~(1u << slot);
511 si_set_sampler_view(sctx, shader, slot, NULL, false);
512 continue;
513 }
514
515 si_set_sampler_view(sctx, shader, slot, views[i], false);
516
517 if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) {
518 struct r600_texture *rtex =
519 (struct r600_texture*)views[i]->texture;
520 struct si_sampler_view *rview = (struct si_sampler_view *)views[i];
521
522 if (rtex->db_compatible &&
523 (!rtex->tc_compatible_htile || rview->is_stencil_sampler)) {
524 samplers->depth_texture_mask |= 1u << slot;
525 } else {
526 samplers->depth_texture_mask &= ~(1u << slot);
527 }
528 if (is_compressed_colortex(rtex)) {
529 samplers->compressed_colortex_mask |= 1u << slot;
530 } else {
531 samplers->compressed_colortex_mask &= ~(1u << slot);
532 }
533
534 if (rtex->dcc_offset &&
535 p_atomic_read(&rtex->framebuffers_bound))
536 sctx->need_check_render_feedback = true;
537 } else {
538 samplers->depth_texture_mask &= ~(1u << slot);
539 samplers->compressed_colortex_mask &= ~(1u << slot);
540 }
541 }
542 }
543
544 static void
545 si_samplers_update_compressed_colortex_mask(struct si_textures_info *samplers)
546 {
547 unsigned mask = samplers->views.enabled_mask;
548
549 while (mask) {
550 int i = u_bit_scan(&mask);
551 struct pipe_resource *res = samplers->views.views[i]->texture;
552
553 if (res && res->target != PIPE_BUFFER) {
554 struct r600_texture *rtex = (struct r600_texture *)res;
555
556 if (is_compressed_colortex(rtex)) {
557 samplers->compressed_colortex_mask |= 1u << i;
558 } else {
559 samplers->compressed_colortex_mask &= ~(1u << i);
560 }
561 }
562 }
563 }
564
565 /* IMAGE VIEWS */
566
567 static unsigned
568 si_image_descriptors_idx(unsigned shader)
569 {
570 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
571 SI_SHADER_DESCS_IMAGES;
572 }
573
574 static struct si_descriptors*
575 si_image_descriptors(struct si_context *sctx, unsigned shader)
576 {
577 return &sctx->descriptors[si_image_descriptors_idx(shader)];
578 }
579
580 static void
581 si_release_image_views(struct si_images_info *images)
582 {
583 unsigned i;
584
585 for (i = 0; i < SI_NUM_IMAGES; ++i) {
586 struct pipe_image_view *view = &images->views[i];
587
588 pipe_resource_reference(&view->resource, NULL);
589 }
590 }
591
592 static void
593 si_image_views_begin_new_cs(struct si_context *sctx, struct si_images_info *images)
594 {
595 uint mask = images->enabled_mask;
596
597 /* Add buffers to the CS. */
598 while (mask) {
599 int i = u_bit_scan(&mask);
600 struct pipe_image_view *view = &images->views[i];
601
602 assert(view->resource);
603
604 si_sampler_view_add_buffer(sctx, view->resource,
605 RADEON_USAGE_READWRITE, false, false);
606 }
607 }
608
609 static void
610 si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot)
611 {
612 struct si_images_info *images = &ctx->images[shader];
613
614 if (images->enabled_mask & (1u << slot)) {
615 struct si_descriptors *descs = si_image_descriptors(ctx, shader);
616
617 pipe_resource_reference(&images->views[slot].resource, NULL);
618 images->compressed_colortex_mask &= ~(1 << slot);
619
620 memcpy(descs->list + slot*8, null_image_descriptor, 8*4);
621 images->enabled_mask &= ~(1u << slot);
622 descs->dirty_mask |= 1u << slot;
623 ctx->descriptors_dirty |= 1u << si_image_descriptors_idx(shader);
624 }
625 }
626
627 static void
628 si_mark_image_range_valid(const struct pipe_image_view *view)
629 {
630 struct r600_resource *res = (struct r600_resource *)view->resource;
631
632 assert(res && res->b.b.target == PIPE_BUFFER);
633
634 util_range_add(&res->valid_buffer_range,
635 view->u.buf.offset,
636 view->u.buf.offset + view->u.buf.size);
637 }
638
639 static void si_set_shader_image(struct si_context *ctx,
640 unsigned shader,
641 unsigned slot, const struct pipe_image_view *view)
642 {
643 struct si_screen *screen = ctx->screen;
644 struct si_images_info *images = &ctx->images[shader];
645 struct si_descriptors *descs = si_image_descriptors(ctx, shader);
646 struct r600_resource *res;
647 uint32_t *desc = descs->list + slot * 8;
648
649 if (!view || !view->resource) {
650 si_disable_shader_image(ctx, shader, slot);
651 return;
652 }
653
654 res = (struct r600_resource *)view->resource;
655
656 if (&images->views[slot] != view)
657 util_copy_image_view(&images->views[slot], view);
658
659 if (res->b.b.target == PIPE_BUFFER) {
660 if (view->access & PIPE_IMAGE_ACCESS_WRITE)
661 si_mark_image_range_valid(view);
662
663 si_make_buffer_descriptor(screen, res,
664 view->format,
665 view->u.buf.offset,
666 view->u.buf.size,
667 descs->list + slot * 8);
668 si_set_buf_desc_address(res, view->u.buf.offset, desc + 4);
669
670 images->compressed_colortex_mask &= ~(1 << slot);
671 res->bind_history |= PIPE_BIND_SHADER_IMAGE;
672 } else {
673 static const unsigned char swizzle[4] = { 0, 1, 2, 3 };
674 struct r600_texture *tex = (struct r600_texture *)res;
675 unsigned level = view->u.tex.level;
676 unsigned width, height, depth;
677 bool uses_dcc = tex->dcc_offset &&
678 level < tex->surface.num_dcc_levels;
679
680 assert(!tex->is_depth);
681 assert(tex->fmask.size == 0);
682
683 if (uses_dcc &&
684 (view->access & PIPE_IMAGE_ACCESS_WRITE ||
685 !vi_dcc_formats_compatible(res->b.b.format, view->format))) {
686 /* If DCC can't be disabled, at least decompress it.
687 * The decompression is relatively cheap if the surface
688 * has been decompressed already.
689 */
690 if (r600_texture_disable_dcc(&ctx->b, tex))
691 uses_dcc = false;
692 else
693 ctx->b.decompress_dcc(&ctx->b.b, tex);
694 }
695
696 if (is_compressed_colortex(tex)) {
697 images->compressed_colortex_mask |= 1 << slot;
698 } else {
699 images->compressed_colortex_mask &= ~(1 << slot);
700 }
701
702 if (uses_dcc &&
703 p_atomic_read(&tex->framebuffers_bound))
704 ctx->need_check_render_feedback = true;
705
706 /* Always force the base level to the selected level.
707 *
708 * This is required for 3D textures, where otherwise
709 * selecting a single slice for non-layered bindings
710 * fails. It doesn't hurt the other targets.
711 */
712 width = u_minify(res->b.b.width0, level);
713 height = u_minify(res->b.b.height0, level);
714 depth = u_minify(res->b.b.depth0, level);
715
716 si_make_texture_descriptor(screen, tex,
717 false, res->b.b.target,
718 view->format, swizzle,
719 0, 0,
720 view->u.tex.first_layer,
721 view->u.tex.last_layer,
722 width, height, depth,
723 desc, NULL);
724 si_set_mutable_tex_desc_fields(tex, &tex->surface.level[level],
725 level, level,
726 util_format_get_blockwidth(view->format),
727 false, desc);
728 }
729
730 images->enabled_mask |= 1u << slot;
731 descs->dirty_mask |= 1u << slot;
732 ctx->descriptors_dirty |= 1u << si_image_descriptors_idx(shader);
733
734 /* Since this can flush, it must be done after enabled_mask is updated. */
735 si_sampler_view_add_buffer(ctx, &res->b.b,
736 RADEON_USAGE_READWRITE, false, true);
737 }
738
739 static void
740 si_set_shader_images(struct pipe_context *pipe,
741 enum pipe_shader_type shader,
742 unsigned start_slot, unsigned count,
743 const struct pipe_image_view *views)
744 {
745 struct si_context *ctx = (struct si_context *)pipe;
746 unsigned i, slot;
747
748 assert(shader < SI_NUM_SHADERS);
749
750 if (!count)
751 return;
752
753 assert(start_slot + count <= SI_NUM_IMAGES);
754
755 if (views) {
756 for (i = 0, slot = start_slot; i < count; ++i, ++slot)
757 si_set_shader_image(ctx, shader, slot, &views[i]);
758 } else {
759 for (i = 0, slot = start_slot; i < count; ++i, ++slot)
760 si_set_shader_image(ctx, shader, slot, NULL);
761 }
762 }
763
764 static void
765 si_images_update_compressed_colortex_mask(struct si_images_info *images)
766 {
767 unsigned mask = images->enabled_mask;
768
769 while (mask) {
770 int i = u_bit_scan(&mask);
771 struct pipe_resource *res = images->views[i].resource;
772
773 if (res && res->target != PIPE_BUFFER) {
774 struct r600_texture *rtex = (struct r600_texture *)res;
775
776 if (is_compressed_colortex(rtex)) {
777 images->compressed_colortex_mask |= 1 << i;
778 } else {
779 images->compressed_colortex_mask &= ~(1 << i);
780 }
781 }
782 }
783 }
784
785 /* SAMPLER STATES */
786
787 static void si_bind_sampler_states(struct pipe_context *ctx,
788 enum pipe_shader_type shader,
789 unsigned start, unsigned count, void **states)
790 {
791 struct si_context *sctx = (struct si_context *)ctx;
792 struct si_textures_info *samplers = &sctx->samplers[shader];
793 struct si_descriptors *desc = si_sampler_descriptors(sctx, shader);
794 struct si_sampler_state **sstates = (struct si_sampler_state**)states;
795 int i;
796
797 if (!count || shader >= SI_NUM_SHADERS)
798 return;
799
800 for (i = 0; i < count; i++) {
801 unsigned slot = start + i;
802
803 if (!sstates[i] ||
804 sstates[i] == samplers->views.sampler_states[slot])
805 continue;
806
807 #ifdef DEBUG
808 assert(sstates[i]->magic == SI_SAMPLER_STATE_MAGIC);
809 #endif
810 samplers->views.sampler_states[slot] = sstates[i];
811
812 /* If FMASK is bound, don't overwrite it.
813 * The sampler state will be set after FMASK is unbound.
814 */
815 if (samplers->views.views[slot] &&
816 samplers->views.views[slot]->texture &&
817 samplers->views.views[slot]->texture->target != PIPE_BUFFER &&
818 ((struct r600_texture*)samplers->views.views[slot]->texture)->fmask.size)
819 continue;
820
821 memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4);
822 desc->dirty_mask |= 1u << slot;
823 sctx->descriptors_dirty |= 1u << si_sampler_descriptors_idx(shader);
824 }
825 }
826
827 /* BUFFER RESOURCES */
828
829 static void si_init_buffer_resources(struct si_buffer_resources *buffers,
830 struct si_descriptors *descs,
831 unsigned num_buffers,
832 unsigned shader_userdata_index,
833 enum radeon_bo_usage shader_usage,
834 enum radeon_bo_priority priority,
835 unsigned *ce_offset)
836 {
837 buffers->shader_usage = shader_usage;
838 buffers->priority = priority;
839 buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
840
841 si_init_descriptors(descs, shader_userdata_index, 4,
842 num_buffers, NULL, ce_offset);
843 }
844
845 static void si_release_buffer_resources(struct si_buffer_resources *buffers,
846 struct si_descriptors *descs)
847 {
848 int i;
849
850 for (i = 0; i < descs->num_elements; i++) {
851 pipe_resource_reference(&buffers->buffers[i], NULL);
852 }
853
854 FREE(buffers->buffers);
855 }
856
857 static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
858 struct si_buffer_resources *buffers)
859 {
860 unsigned mask = buffers->enabled_mask;
861
862 /* Add buffers to the CS. */
863 while (mask) {
864 int i = u_bit_scan(&mask);
865
866 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
867 (struct r600_resource*)buffers->buffers[i],
868 buffers->shader_usage, buffers->priority);
869 }
870 }
871
872 static void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers,
873 struct si_descriptors *descs,
874 unsigned idx, struct pipe_resource **buf,
875 unsigned *offset, unsigned *size)
876 {
877 pipe_resource_reference(buf, buffers->buffers[idx]);
878 if (*buf) {
879 struct r600_resource *res = r600_resource(*buf);
880 const uint32_t *desc = descs->list + idx * 4;
881 uint64_t va;
882
883 *size = desc[2];
884
885 assert(G_008F04_STRIDE(desc[1]) == 0);
886 va = ((uint64_t)desc[1] << 32) | desc[0];
887
888 assert(va >= res->gpu_address && va + *size <= res->gpu_address + res->bo_size);
889 *offset = va - res->gpu_address;
890 }
891 }
892
893 /* VERTEX BUFFERS */
894
895 static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
896 {
897 struct si_descriptors *desc = &sctx->vertex_buffers;
898 int count = sctx->vertex_elements ? sctx->vertex_elements->count : 0;
899 int i;
900
901 for (i = 0; i < count; i++) {
902 int vb = sctx->vertex_elements->elements[i].vertex_buffer_index;
903
904 if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
905 continue;
906 if (!sctx->vertex_buffer[vb].buffer)
907 continue;
908
909 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
910 (struct r600_resource*)sctx->vertex_buffer[vb].buffer,
911 RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
912 }
913
914 if (!desc->buffer)
915 return;
916 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
917 desc->buffer, RADEON_USAGE_READ,
918 RADEON_PRIO_DESCRIPTORS);
919 }
920
921 bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
922 {
923 struct si_descriptors *desc = &sctx->vertex_buffers;
924 bool bound[SI_NUM_VERTEX_BUFFERS] = {};
925 unsigned i, count = sctx->vertex_elements->count;
926 uint64_t va;
927 uint32_t *ptr;
928
929 if (!sctx->vertex_buffers_dirty)
930 return true;
931 if (!count || !sctx->vertex_elements)
932 return true;
933
934 /* Vertex buffer descriptors are the only ones which are uploaded
935 * directly through a staging buffer and don't go through
936 * the fine-grained upload path.
937 */
938 u_upload_alloc(sctx->b.uploader, 0, count * 16, 256, &desc->buffer_offset,
939 (struct pipe_resource**)&desc->buffer, (void**)&ptr);
940 if (!desc->buffer)
941 return false;
942
943 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
944 desc->buffer, RADEON_USAGE_READ,
945 RADEON_PRIO_DESCRIPTORS);
946
947 assert(count <= SI_NUM_VERTEX_BUFFERS);
948
949 for (i = 0; i < count; i++) {
950 struct pipe_vertex_element *ve = &sctx->vertex_elements->elements[i];
951 struct pipe_vertex_buffer *vb;
952 struct r600_resource *rbuffer;
953 unsigned offset;
954 uint32_t *desc = &ptr[i*4];
955
956 if (ve->vertex_buffer_index >= ARRAY_SIZE(sctx->vertex_buffer)) {
957 memset(desc, 0, 16);
958 continue;
959 }
960
961 vb = &sctx->vertex_buffer[ve->vertex_buffer_index];
962 rbuffer = (struct r600_resource*)vb->buffer;
963 if (!rbuffer) {
964 memset(desc, 0, 16);
965 continue;
966 }
967
968 offset = vb->buffer_offset + ve->src_offset;
969 va = rbuffer->gpu_address + offset;
970
971 /* Fill in T# buffer resource description */
972 desc[0] = va;
973 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
974 S_008F04_STRIDE(vb->stride);
975
976 if (sctx->b.chip_class <= CIK && vb->stride) {
977 /* Round up by rounding down and adding 1 */
978 desc[2] = (vb->buffer->width0 - offset -
979 sctx->vertex_elements->format_size[i]) /
980 vb->stride + 1;
981 } else {
982 uint32_t size3;
983
984 desc[2] = vb->buffer->width0 - offset;
985
986 /* For attributes of size 3 with byte or short
987 * components, we use a 4-component data format.
988 *
989 * As a consequence, we have to round the buffer size
990 * up so that the hardware sees four components as
991 * being inside the buffer if and only if the first
992 * three components are in the buffer.
993 *
994 * Since the offset and stride are guaranteed to be
995 * 4-byte aligned, this alignment will never cross the
996 * winsys buffer boundary.
997 */
998 size3 = (sctx->vertex_elements->fix_size3 >> (2 * i)) & 3;
999 if (vb->stride && size3) {
1000 assert(offset % 4 == 0 && vb->stride % 4 == 0);
1001 assert(size3 <= 2);
1002 desc[2] = align(desc[2], size3 * 2);
1003 }
1004 }
1005
1006 desc[3] = sctx->vertex_elements->rsrc_word3[i];
1007
1008 if (!bound[ve->vertex_buffer_index]) {
1009 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1010 (struct r600_resource*)vb->buffer,
1011 RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
1012 bound[ve->vertex_buffer_index] = true;
1013 }
1014 }
1015
1016 /* Don't flush the const cache. It would have a very negative effect
1017 * on performance (confirmed by testing). New descriptors are always
1018 * uploaded to a fresh new buffer, so I don't think flushing the const
1019 * cache is needed. */
1020 desc->pointer_dirty = true;
1021 si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
1022 sctx->vertex_buffers_dirty = false;
1023 return true;
1024 }
1025
1026
1027 /* CONSTANT BUFFERS */
1028
1029 static unsigned
1030 si_const_buffer_descriptors_idx(unsigned shader)
1031 {
1032 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
1033 SI_SHADER_DESCS_CONST_BUFFERS;
1034 }
1035
1036 static struct si_descriptors *
1037 si_const_buffer_descriptors(struct si_context *sctx, unsigned shader)
1038 {
1039 return &sctx->descriptors[si_const_buffer_descriptors_idx(shader)];
1040 }
1041
1042 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
1043 const uint8_t *ptr, unsigned size, uint32_t *const_offset)
1044 {
1045 void *tmp;
1046
1047 u_upload_alloc(sctx->b.uploader, 0, size, 256, const_offset,
1048 (struct pipe_resource**)rbuffer, &tmp);
1049 if (*rbuffer)
1050 util_memcpy_cpu_to_le32(tmp, ptr, size);
1051 }
1052
1053 static void si_set_constant_buffer(struct si_context *sctx,
1054 struct si_buffer_resources *buffers,
1055 unsigned descriptors_idx,
1056 uint slot, const struct pipe_constant_buffer *input)
1057 {
1058 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1059 assert(slot < descs->num_elements);
1060 pipe_resource_reference(&buffers->buffers[slot], NULL);
1061
1062 /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
1063 * with a NULL buffer). We need to use a dummy buffer instead. */
1064 if (sctx->b.chip_class == CIK &&
1065 (!input || (!input->buffer && !input->user_buffer)))
1066 input = &sctx->null_const_buf;
1067
1068 if (input && (input->buffer || input->user_buffer)) {
1069 struct pipe_resource *buffer = NULL;
1070 uint64_t va;
1071
1072 /* Upload the user buffer if needed. */
1073 if (input->user_buffer) {
1074 unsigned buffer_offset;
1075
1076 si_upload_const_buffer(sctx,
1077 (struct r600_resource**)&buffer, input->user_buffer,
1078 input->buffer_size, &buffer_offset);
1079 if (!buffer) {
1080 /* Just unbind on failure. */
1081 si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, NULL);
1082 return;
1083 }
1084 va = r600_resource(buffer)->gpu_address + buffer_offset;
1085 } else {
1086 pipe_resource_reference(&buffer, input->buffer);
1087 va = r600_resource(buffer)->gpu_address + input->buffer_offset;
1088 /* Only track usage for non-user buffers. */
1089 r600_resource(buffer)->bind_history |= PIPE_BIND_CONSTANT_BUFFER;
1090 }
1091
1092 /* Set the descriptor. */
1093 uint32_t *desc = descs->list + slot*4;
1094 desc[0] = va;
1095 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1096 S_008F04_STRIDE(0);
1097 desc[2] = input->buffer_size;
1098 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1099 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1100 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1101 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1102 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1103 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1104
1105 buffers->buffers[slot] = buffer;
1106 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1107 (struct r600_resource*)buffer,
1108 buffers->shader_usage,
1109 buffers->priority, true);
1110 buffers->enabled_mask |= 1u << slot;
1111 } else {
1112 /* Clear the descriptor. */
1113 memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
1114 buffers->enabled_mask &= ~(1u << slot);
1115 }
1116
1117 descs->dirty_mask |= 1u << slot;
1118 sctx->descriptors_dirty |= 1u << descriptors_idx;
1119 }
1120
1121 void si_set_rw_buffer(struct si_context *sctx,
1122 uint slot, const struct pipe_constant_buffer *input)
1123 {
1124 si_set_constant_buffer(sctx, &sctx->rw_buffers,
1125 SI_DESCS_RW_BUFFERS, slot, input);
1126 }
1127
1128 static void si_pipe_set_constant_buffer(struct pipe_context *ctx,
1129 uint shader, uint slot,
1130 const struct pipe_constant_buffer *input)
1131 {
1132 struct si_context *sctx = (struct si_context *)ctx;
1133
1134 if (shader >= SI_NUM_SHADERS)
1135 return;
1136
1137 si_set_constant_buffer(sctx, &sctx->const_buffers[shader],
1138 si_const_buffer_descriptors_idx(shader),
1139 slot, input);
1140 }
1141
1142 void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader,
1143 uint slot, struct pipe_constant_buffer *cbuf)
1144 {
1145 cbuf->user_buffer = NULL;
1146 si_get_buffer_from_descriptors(
1147 &sctx->const_buffers[shader],
1148 si_const_buffer_descriptors(sctx, shader),
1149 slot, &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size);
1150 }
1151
1152 /* SHADER BUFFERS */
1153
1154 static unsigned
1155 si_shader_buffer_descriptors_idx(enum pipe_shader_type shader)
1156 {
1157 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
1158 SI_SHADER_DESCS_SHADER_BUFFERS;
1159 }
1160
1161 static struct si_descriptors *
1162 si_shader_buffer_descriptors(struct si_context *sctx,
1163 enum pipe_shader_type shader)
1164 {
1165 return &sctx->descriptors[si_shader_buffer_descriptors_idx(shader)];
1166 }
1167
1168 static void si_set_shader_buffers(struct pipe_context *ctx,
1169 enum pipe_shader_type shader,
1170 unsigned start_slot, unsigned count,
1171 const struct pipe_shader_buffer *sbuffers)
1172 {
1173 struct si_context *sctx = (struct si_context *)ctx;
1174 struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
1175 struct si_descriptors *descs = si_shader_buffer_descriptors(sctx, shader);
1176 unsigned i;
1177
1178 assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
1179
1180 for (i = 0; i < count; ++i) {
1181 const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
1182 struct r600_resource *buf;
1183 unsigned slot = start_slot + i;
1184 uint32_t *desc = descs->list + slot * 4;
1185 uint64_t va;
1186
1187 if (!sbuffer || !sbuffer->buffer) {
1188 pipe_resource_reference(&buffers->buffers[slot], NULL);
1189 memset(desc, 0, sizeof(uint32_t) * 4);
1190 buffers->enabled_mask &= ~(1u << slot);
1191 descs->dirty_mask |= 1u << slot;
1192 sctx->descriptors_dirty |=
1193 1u << si_shader_buffer_descriptors_idx(shader);
1194 continue;
1195 }
1196
1197 buf = (struct r600_resource *)sbuffer->buffer;
1198 va = buf->gpu_address + sbuffer->buffer_offset;
1199
1200 desc[0] = va;
1201 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1202 S_008F04_STRIDE(0);
1203 desc[2] = sbuffer->buffer_size;
1204 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1205 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1206 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1207 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1208 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1209 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1210
1211 pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
1212 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, buf,
1213 buffers->shader_usage,
1214 buffers->priority, true);
1215 buf->bind_history |= PIPE_BIND_SHADER_BUFFER;
1216
1217 buffers->enabled_mask |= 1u << slot;
1218 descs->dirty_mask |= 1u << slot;
1219 sctx->descriptors_dirty |=
1220 1u << si_shader_buffer_descriptors_idx(shader);
1221 }
1222 }
1223
1224 void si_get_shader_buffers(struct si_context *sctx, uint shader,
1225 uint start_slot, uint count,
1226 struct pipe_shader_buffer *sbuf)
1227 {
1228 struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
1229 struct si_descriptors *descs = si_shader_buffer_descriptors(sctx, shader);
1230
1231 for (unsigned i = 0; i < count; ++i) {
1232 si_get_buffer_from_descriptors(
1233 buffers, descs, start_slot + i,
1234 &sbuf[i].buffer, &sbuf[i].buffer_offset,
1235 &sbuf[i].buffer_size);
1236 }
1237 }
1238
1239 /* RING BUFFERS */
1240
1241 void si_set_ring_buffer(struct pipe_context *ctx, uint slot,
1242 struct pipe_resource *buffer,
1243 unsigned stride, unsigned num_records,
1244 bool add_tid, bool swizzle,
1245 unsigned element_size, unsigned index_stride, uint64_t offset)
1246 {
1247 struct si_context *sctx = (struct si_context *)ctx;
1248 struct si_buffer_resources *buffers = &sctx->rw_buffers;
1249 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1250
1251 /* The stride field in the resource descriptor has 14 bits */
1252 assert(stride < (1 << 14));
1253
1254 assert(slot < descs->num_elements);
1255 pipe_resource_reference(&buffers->buffers[slot], NULL);
1256
1257 if (buffer) {
1258 uint64_t va;
1259
1260 va = r600_resource(buffer)->gpu_address + offset;
1261
1262 switch (element_size) {
1263 default:
1264 assert(!"Unsupported ring buffer element size");
1265 case 0:
1266 case 2:
1267 element_size = 0;
1268 break;
1269 case 4:
1270 element_size = 1;
1271 break;
1272 case 8:
1273 element_size = 2;
1274 break;
1275 case 16:
1276 element_size = 3;
1277 break;
1278 }
1279
1280 switch (index_stride) {
1281 default:
1282 assert(!"Unsupported ring buffer index stride");
1283 case 0:
1284 case 8:
1285 index_stride = 0;
1286 break;
1287 case 16:
1288 index_stride = 1;
1289 break;
1290 case 32:
1291 index_stride = 2;
1292 break;
1293 case 64:
1294 index_stride = 3;
1295 break;
1296 }
1297
1298 if (sctx->b.chip_class >= VI && stride)
1299 num_records *= stride;
1300
1301 /* Set the descriptor. */
1302 uint32_t *desc = descs->list + slot*4;
1303 desc[0] = va;
1304 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1305 S_008F04_STRIDE(stride) |
1306 S_008F04_SWIZZLE_ENABLE(swizzle);
1307 desc[2] = num_records;
1308 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1309 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1310 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1311 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1312 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1313 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1314 S_008F0C_ELEMENT_SIZE(element_size) |
1315 S_008F0C_INDEX_STRIDE(index_stride) |
1316 S_008F0C_ADD_TID_ENABLE(add_tid);
1317
1318 pipe_resource_reference(&buffers->buffers[slot], buffer);
1319 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1320 (struct r600_resource*)buffer,
1321 buffers->shader_usage, buffers->priority);
1322 buffers->enabled_mask |= 1u << slot;
1323 } else {
1324 /* Clear the descriptor. */
1325 memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
1326 buffers->enabled_mask &= ~(1u << slot);
1327 }
1328
1329 descs->dirty_mask |= 1u << slot;
1330 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
1331 }
1332
1333 /* STREAMOUT BUFFERS */
1334
1335 static void si_set_streamout_targets(struct pipe_context *ctx,
1336 unsigned num_targets,
1337 struct pipe_stream_output_target **targets,
1338 const unsigned *offsets)
1339 {
1340 struct si_context *sctx = (struct si_context *)ctx;
1341 struct si_buffer_resources *buffers = &sctx->rw_buffers;
1342 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1343 unsigned old_num_targets = sctx->b.streamout.num_targets;
1344 unsigned i, bufidx;
1345
1346 /* We are going to unbind the buffers. Mark which caches need to be flushed. */
1347 if (sctx->b.streamout.num_targets && sctx->b.streamout.begin_emitted) {
1348 /* Since streamout uses vector writes which go through TC L2
1349 * and most other clients can use TC L2 as well, we don't need
1350 * to flush it.
1351 *
1352 * The only cases which requires flushing it is VGT DMA index
1353 * fetching (on <= CIK) and indirect draw data, which are rare
1354 * cases. Thus, flag the TC L2 dirtiness in the resource and
1355 * handle it at draw call time.
1356 */
1357 for (i = 0; i < sctx->b.streamout.num_targets; i++)
1358 if (sctx->b.streamout.targets[i])
1359 r600_resource(sctx->b.streamout.targets[i]->b.buffer)->TC_L2_dirty = true;
1360
1361 /* Invalidate the scalar cache in case a streamout buffer is
1362 * going to be used as a constant buffer.
1363 *
1364 * Invalidate TC L1, because streamout bypasses it (done by
1365 * setting GLC=1 in the store instruction), but it can contain
1366 * outdated data of streamout buffers.
1367 *
1368 * VS_PARTIAL_FLUSH is required if the buffers are going to be
1369 * used as an input immediately.
1370 */
1371 sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
1372 SI_CONTEXT_INV_VMEM_L1 |
1373 SI_CONTEXT_VS_PARTIAL_FLUSH;
1374 }
1375
1376 /* All readers of the streamout targets need to be finished before we can
1377 * start writing to the targets.
1378 */
1379 if (num_targets)
1380 sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
1381 SI_CONTEXT_CS_PARTIAL_FLUSH;
1382
1383 /* Streamout buffers must be bound in 2 places:
1384 * 1) in VGT by setting the VGT_STRMOUT registers
1385 * 2) as shader resources
1386 */
1387
1388 /* Set the VGT regs. */
1389 r600_set_streamout_targets(ctx, num_targets, targets, offsets);
1390
1391 /* Set the shader resources.*/
1392 for (i = 0; i < num_targets; i++) {
1393 bufidx = SI_VS_STREAMOUT_BUF0 + i;
1394
1395 if (targets[i]) {
1396 struct pipe_resource *buffer = targets[i]->buffer;
1397 uint64_t va = r600_resource(buffer)->gpu_address;
1398
1399 /* Set the descriptor.
1400 *
1401 * On VI, the format must be non-INVALID, otherwise
1402 * the buffer will be considered not bound and store
1403 * instructions will be no-ops.
1404 */
1405 uint32_t *desc = descs->list + bufidx*4;
1406 desc[0] = va;
1407 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
1408 desc[2] = 0xffffffff;
1409 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1410 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1411 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1412 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1413 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1414
1415 /* Set the resource. */
1416 pipe_resource_reference(&buffers->buffers[bufidx],
1417 buffer);
1418 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1419 (struct r600_resource*)buffer,
1420 buffers->shader_usage,
1421 RADEON_PRIO_SHADER_RW_BUFFER,
1422 true);
1423 r600_resource(buffer)->bind_history |= PIPE_BIND_STREAM_OUTPUT;
1424
1425 buffers->enabled_mask |= 1u << bufidx;
1426 } else {
1427 /* Clear the descriptor and unset the resource. */
1428 memset(descs->list + bufidx*4, 0,
1429 sizeof(uint32_t) * 4);
1430 pipe_resource_reference(&buffers->buffers[bufidx],
1431 NULL);
1432 buffers->enabled_mask &= ~(1u << bufidx);
1433 }
1434 descs->dirty_mask |= 1u << bufidx;
1435 }
1436 for (; i < old_num_targets; i++) {
1437 bufidx = SI_VS_STREAMOUT_BUF0 + i;
1438 /* Clear the descriptor and unset the resource. */
1439 memset(descs->list + bufidx*4, 0, sizeof(uint32_t) * 4);
1440 pipe_resource_reference(&buffers->buffers[bufidx], NULL);
1441 buffers->enabled_mask &= ~(1u << bufidx);
1442 descs->dirty_mask |= 1u << bufidx;
1443 }
1444
1445 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
1446 }
1447
1448 static void si_desc_reset_buffer_offset(struct pipe_context *ctx,
1449 uint32_t *desc, uint64_t old_buf_va,
1450 struct pipe_resource *new_buf)
1451 {
1452 /* Retrieve the buffer offset from the descriptor. */
1453 uint64_t old_desc_va =
1454 desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32);
1455
1456 assert(old_buf_va <= old_desc_va);
1457 uint64_t offset_within_buffer = old_desc_va - old_buf_va;
1458
1459 /* Update the descriptor. */
1460 si_set_buf_desc_address(r600_resource(new_buf), offset_within_buffer,
1461 desc);
1462 }
1463
1464 /* INTERNAL CONST BUFFERS */
1465
1466 static void si_set_polygon_stipple(struct pipe_context *ctx,
1467 const struct pipe_poly_stipple *state)
1468 {
1469 struct si_context *sctx = (struct si_context *)ctx;
1470 struct pipe_constant_buffer cb = {};
1471 unsigned stipple[32];
1472 int i;
1473
1474 for (i = 0; i < 32; i++)
1475 stipple[i] = util_bitreverse(state->stipple[i]);
1476
1477 cb.user_buffer = stipple;
1478 cb.buffer_size = sizeof(stipple);
1479
1480 si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &cb);
1481 }
1482
1483 /* TEXTURE METADATA ENABLE/DISABLE */
1484
1485 /* CMASK can be enabled (for fast clear) and disabled (for texture export)
1486 * while the texture is bound, possibly by a different context. In that case,
1487 * call this function to update compressed_colortex_masks.
1488 */
1489 void si_update_compressed_colortex_masks(struct si_context *sctx)
1490 {
1491 for (int i = 0; i < SI_NUM_SHADERS; ++i) {
1492 si_samplers_update_compressed_colortex_mask(&sctx->samplers[i]);
1493 si_images_update_compressed_colortex_mask(&sctx->images[i]);
1494 }
1495 }
1496
1497 /* BUFFER DISCARD/INVALIDATION */
1498
1499 /** Reset descriptors of buffer resources after \p buf has been invalidated. */
1500 static void si_reset_buffer_resources(struct si_context *sctx,
1501 struct si_buffer_resources *buffers,
1502 unsigned descriptors_idx,
1503 struct pipe_resource *buf,
1504 uint64_t old_va)
1505 {
1506 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1507 unsigned mask = buffers->enabled_mask;
1508
1509 while (mask) {
1510 unsigned i = u_bit_scan(&mask);
1511 if (buffers->buffers[i] == buf) {
1512 si_desc_reset_buffer_offset(&sctx->b.b,
1513 descs->list + i*4,
1514 old_va, buf);
1515 descs->dirty_mask |= 1u << i;
1516 sctx->descriptors_dirty |= 1u << descriptors_idx;
1517
1518 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1519 (struct r600_resource *)buf,
1520 buffers->shader_usage,
1521 buffers->priority, true);
1522 }
1523 }
1524 }
1525
1526 /* Reallocate a buffer a update all resource bindings where the buffer is
1527 * bound.
1528 *
1529 * This is used to avoid CPU-GPU synchronizations, because it makes the buffer
1530 * idle by discarding its contents. Apps usually tell us when to do this using
1531 * map_buffer flags, for example.
1532 */
1533 static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf)
1534 {
1535 struct si_context *sctx = (struct si_context*)ctx;
1536 struct r600_resource *rbuffer = r600_resource(buf);
1537 unsigned i, shader;
1538 uint64_t old_va = rbuffer->gpu_address;
1539 unsigned num_elems = sctx->vertex_elements ?
1540 sctx->vertex_elements->count : 0;
1541
1542 /* Reallocate the buffer in the same pipe_resource. */
1543 r600_alloc_resource(&sctx->screen->b, rbuffer);
1544
1545 /* We changed the buffer, now we need to bind it where the old one
1546 * was bound. This consists of 2 things:
1547 * 1) Updating the resource descriptor and dirtying it.
1548 * 2) Adding a relocation to the CS, so that it's usable.
1549 */
1550
1551 /* Vertex buffers. */
1552 if (rbuffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
1553 for (i = 0; i < num_elems; i++) {
1554 int vb = sctx->vertex_elements->elements[i].vertex_buffer_index;
1555
1556 if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
1557 continue;
1558 if (!sctx->vertex_buffer[vb].buffer)
1559 continue;
1560
1561 if (sctx->vertex_buffer[vb].buffer == buf) {
1562 sctx->vertex_buffers_dirty = true;
1563 break;
1564 }
1565 }
1566 }
1567
1568 /* Streamout buffers. (other internal buffers can't be invalidated) */
1569 if (rbuffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
1570 for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
1571 struct si_buffer_resources *buffers = &sctx->rw_buffers;
1572 struct si_descriptors *descs =
1573 &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1574
1575 if (buffers->buffers[i] != buf)
1576 continue;
1577
1578 si_desc_reset_buffer_offset(ctx, descs->list + i*4,
1579 old_va, buf);
1580 descs->dirty_mask |= 1u << i;
1581 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
1582
1583 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1584 rbuffer, buffers->shader_usage,
1585 RADEON_PRIO_SHADER_RW_BUFFER,
1586 true);
1587
1588 /* Update the streamout state. */
1589 if (sctx->b.streamout.begin_emitted)
1590 r600_emit_streamout_end(&sctx->b);
1591 sctx->b.streamout.append_bitmask =
1592 sctx->b.streamout.enabled_mask;
1593 r600_streamout_buffers_dirty(&sctx->b);
1594 }
1595 }
1596
1597 /* Constant and shader buffers. */
1598 if (rbuffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
1599 for (shader = 0; shader < SI_NUM_SHADERS; shader++)
1600 si_reset_buffer_resources(sctx, &sctx->const_buffers[shader],
1601 si_const_buffer_descriptors_idx(shader),
1602 buf, old_va);
1603 }
1604
1605 if (rbuffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
1606 for (shader = 0; shader < SI_NUM_SHADERS; shader++)
1607 si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader],
1608 si_shader_buffer_descriptors_idx(shader),
1609 buf, old_va);
1610 }
1611
1612 if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
1613 /* Texture buffers - update bindings. */
1614 for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
1615 struct si_sampler_views *views = &sctx->samplers[shader].views;
1616 struct si_descriptors *descs =
1617 si_sampler_descriptors(sctx, shader);
1618 unsigned mask = views->enabled_mask;
1619
1620 while (mask) {
1621 unsigned i = u_bit_scan(&mask);
1622 if (views->views[i]->texture == buf) {
1623 si_desc_reset_buffer_offset(ctx,
1624 descs->list +
1625 i * 16 + 4,
1626 old_va, buf);
1627 descs->dirty_mask |= 1u << i;
1628 sctx->descriptors_dirty |=
1629 1u << si_sampler_descriptors_idx(shader);
1630
1631 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1632 rbuffer, RADEON_USAGE_READ,
1633 RADEON_PRIO_SAMPLER_BUFFER,
1634 true);
1635 }
1636 }
1637 }
1638 }
1639
1640 /* Shader images */
1641 if (rbuffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
1642 for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
1643 struct si_images_info *images = &sctx->images[shader];
1644 struct si_descriptors *descs =
1645 si_image_descriptors(sctx, shader);
1646 unsigned mask = images->enabled_mask;
1647
1648 while (mask) {
1649 unsigned i = u_bit_scan(&mask);
1650
1651 if (images->views[i].resource == buf) {
1652 if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
1653 si_mark_image_range_valid(&images->views[i]);
1654
1655 si_desc_reset_buffer_offset(
1656 ctx, descs->list + i * 8 + 4,
1657 old_va, buf);
1658 descs->dirty_mask |= 1u << i;
1659 sctx->descriptors_dirty |=
1660 1u << si_image_descriptors_idx(shader);
1661
1662 radeon_add_to_buffer_list_check_mem(
1663 &sctx->b, &sctx->b.gfx, rbuffer,
1664 RADEON_USAGE_READWRITE,
1665 RADEON_PRIO_SAMPLER_BUFFER, true);
1666 }
1667 }
1668 }
1669 }
1670 }
1671
1672 /* Update mutable image descriptor fields of all bound textures. */
1673 void si_update_all_texture_descriptors(struct si_context *sctx)
1674 {
1675 unsigned shader;
1676
1677 for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
1678 struct si_sampler_views *samplers = &sctx->samplers[shader].views;
1679 struct si_images_info *images = &sctx->images[shader];
1680 unsigned mask;
1681
1682 /* Images. */
1683 mask = images->enabled_mask;
1684 while (mask) {
1685 unsigned i = u_bit_scan(&mask);
1686 struct pipe_image_view *view = &images->views[i];
1687
1688 if (!view->resource ||
1689 view->resource->target == PIPE_BUFFER)
1690 continue;
1691
1692 si_set_shader_image(sctx, shader, i, view);
1693 }
1694
1695 /* Sampler views. */
1696 mask = samplers->enabled_mask;
1697 while (mask) {
1698 unsigned i = u_bit_scan(&mask);
1699 struct pipe_sampler_view *view = samplers->views[i];
1700
1701 if (!view ||
1702 !view->texture ||
1703 view->texture->target == PIPE_BUFFER)
1704 continue;
1705
1706 si_set_sampler_view(sctx, shader, i,
1707 samplers->views[i], true);
1708 }
1709 }
1710 }
1711
1712 /* SHADER USER DATA */
1713
1714 static void si_mark_shader_pointers_dirty(struct si_context *sctx,
1715 unsigned shader)
1716 {
1717 struct si_descriptors *descs =
1718 &sctx->descriptors[SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS];
1719
1720 for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
1721 descs->pointer_dirty = true;
1722
1723 if (shader == PIPE_SHADER_VERTEX)
1724 sctx->vertex_buffers.pointer_dirty = true;
1725
1726 si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
1727 }
1728
1729 static void si_shader_userdata_begin_new_cs(struct si_context *sctx)
1730 {
1731 int i;
1732
1733 for (i = 0; i < SI_NUM_SHADERS; i++) {
1734 si_mark_shader_pointers_dirty(sctx, i);
1735 }
1736 sctx->descriptors[SI_DESCS_RW_BUFFERS].pointer_dirty = true;
1737 }
1738
1739 /* Set a base register address for user data constants in the given shader.
1740 * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*.
1741 */
1742 static void si_set_user_data_base(struct si_context *sctx,
1743 unsigned shader, uint32_t new_base)
1744 {
1745 uint32_t *base = &sctx->shader_userdata.sh_base[shader];
1746
1747 if (*base != new_base) {
1748 *base = new_base;
1749
1750 if (new_base)
1751 si_mark_shader_pointers_dirty(sctx, shader);
1752 }
1753 }
1754
1755 /* This must be called when these shaders are changed from non-NULL to NULL
1756 * and vice versa:
1757 * - geometry shader
1758 * - tessellation control shader
1759 * - tessellation evaluation shader
1760 */
1761 void si_shader_change_notify(struct si_context *sctx)
1762 {
1763 /* VS can be bound as VS, ES, or LS. */
1764 if (sctx->tes_shader.cso)
1765 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1766 R_00B530_SPI_SHADER_USER_DATA_LS_0);
1767 else if (sctx->gs_shader.cso)
1768 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1769 R_00B330_SPI_SHADER_USER_DATA_ES_0);
1770 else
1771 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1772 R_00B130_SPI_SHADER_USER_DATA_VS_0);
1773
1774 /* TES can be bound as ES, VS, or not bound. */
1775 if (sctx->tes_shader.cso) {
1776 if (sctx->gs_shader.cso)
1777 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
1778 R_00B330_SPI_SHADER_USER_DATA_ES_0);
1779 else
1780 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
1781 R_00B130_SPI_SHADER_USER_DATA_VS_0);
1782 } else {
1783 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0);
1784 }
1785 }
1786
1787 static void si_emit_shader_pointer(struct si_context *sctx,
1788 struct si_descriptors *desc,
1789 unsigned sh_base, bool keep_dirty)
1790 {
1791 struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
1792 uint64_t va;
1793
1794 if (!desc->pointer_dirty || !desc->buffer)
1795 return;
1796
1797 va = desc->buffer->gpu_address +
1798 desc->buffer_offset;
1799
1800 radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
1801 radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2);
1802 radeon_emit(cs, va);
1803 radeon_emit(cs, va >> 32);
1804
1805 desc->pointer_dirty = keep_dirty;
1806 }
1807
1808 void si_emit_graphics_shader_userdata(struct si_context *sctx,
1809 struct r600_atom *atom)
1810 {
1811 unsigned shader;
1812 uint32_t *sh_base = sctx->shader_userdata.sh_base;
1813 struct si_descriptors *descs;
1814
1815 descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1816
1817 if (descs->pointer_dirty) {
1818 si_emit_shader_pointer(sctx, descs,
1819 R_00B030_SPI_SHADER_USER_DATA_PS_0, true);
1820 si_emit_shader_pointer(sctx, descs,
1821 R_00B130_SPI_SHADER_USER_DATA_VS_0, true);
1822 si_emit_shader_pointer(sctx, descs,
1823 R_00B230_SPI_SHADER_USER_DATA_GS_0, true);
1824 si_emit_shader_pointer(sctx, descs,
1825 R_00B330_SPI_SHADER_USER_DATA_ES_0, true);
1826 si_emit_shader_pointer(sctx, descs,
1827 R_00B430_SPI_SHADER_USER_DATA_HS_0, true);
1828 descs->pointer_dirty = false;
1829 }
1830
1831 descs = &sctx->descriptors[SI_DESCS_FIRST_SHADER];
1832
1833 for (shader = 0; shader < SI_NUM_GRAPHICS_SHADERS; shader++) {
1834 unsigned base = sh_base[shader];
1835 unsigned i;
1836
1837 if (!base)
1838 continue;
1839
1840 for (i = 0; i < SI_NUM_SHADER_DESCS; i++, descs++)
1841 si_emit_shader_pointer(sctx, descs, base, false);
1842 }
1843 si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false);
1844 }
1845
1846 void si_emit_compute_shader_userdata(struct si_context *sctx)
1847 {
1848 unsigned base = R_00B900_COMPUTE_USER_DATA_0;
1849 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_FIRST_COMPUTE];
1850
1851 for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
1852 si_emit_shader_pointer(sctx, descs, base, false);
1853 }
1854
1855 /* INIT/DEINIT/UPLOAD */
1856
1857 void si_init_all_descriptors(struct si_context *sctx)
1858 {
1859 int i;
1860 unsigned ce_offset = 0;
1861
1862 for (i = 0; i < SI_NUM_SHADERS; i++) {
1863 si_init_buffer_resources(&sctx->const_buffers[i],
1864 si_const_buffer_descriptors(sctx, i),
1865 SI_NUM_CONST_BUFFERS, SI_SGPR_CONST_BUFFERS,
1866 RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER,
1867 &ce_offset);
1868 si_init_buffer_resources(&sctx->shader_buffers[i],
1869 si_shader_buffer_descriptors(sctx, i),
1870 SI_NUM_SHADER_BUFFERS, SI_SGPR_SHADER_BUFFERS,
1871 RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER,
1872 &ce_offset);
1873
1874 si_init_descriptors(si_sampler_descriptors(sctx, i),
1875 SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS,
1876 null_texture_descriptor, &ce_offset);
1877
1878 si_init_descriptors(si_image_descriptors(sctx, i),
1879 SI_SGPR_IMAGES, 8, SI_NUM_IMAGES,
1880 null_image_descriptor, &ce_offset);
1881 }
1882
1883 si_init_buffer_resources(&sctx->rw_buffers,
1884 &sctx->descriptors[SI_DESCS_RW_BUFFERS],
1885 SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
1886 RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS,
1887 &ce_offset);
1888 si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
1889 4, SI_NUM_VERTEX_BUFFERS, NULL, NULL);
1890
1891 sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
1892
1893 assert(ce_offset <= 32768);
1894
1895 /* Set pipe_context functions. */
1896 sctx->b.b.bind_sampler_states = si_bind_sampler_states;
1897 sctx->b.b.set_shader_images = si_set_shader_images;
1898 sctx->b.b.set_constant_buffer = si_pipe_set_constant_buffer;
1899 sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
1900 sctx->b.b.set_shader_buffers = si_set_shader_buffers;
1901 sctx->b.b.set_sampler_views = si_set_sampler_views;
1902 sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
1903 sctx->b.invalidate_buffer = si_invalidate_buffer;
1904
1905 /* Shader user data. */
1906 si_init_atom(sctx, &sctx->shader_userdata.atom, &sctx->atoms.s.shader_userdata,
1907 si_emit_graphics_shader_userdata);
1908
1909 /* Set default and immutable mappings. */
1910 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
1911 si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL, R_00B430_SPI_SHADER_USER_DATA_HS_0);
1912 si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY, R_00B230_SPI_SHADER_USER_DATA_GS_0);
1913 si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
1914 }
1915
1916 bool si_upload_graphics_shader_descriptors(struct si_context *sctx)
1917 {
1918 const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE);
1919 unsigned dirty = sctx->descriptors_dirty & mask;
1920
1921 while (dirty) {
1922 unsigned i = u_bit_scan(&dirty);
1923
1924 if (!si_upload_descriptors(sctx, &sctx->descriptors[i],
1925 &sctx->shader_userdata.atom))
1926 return false;
1927 }
1928
1929 sctx->descriptors_dirty &= ~mask;
1930 return true;
1931 }
1932
1933 bool si_upload_compute_shader_descriptors(struct si_context *sctx)
1934 {
1935 /* Does not update rw_buffers as that is not needed for compute shaders
1936 * and the input buffer is using the same SGPR's anyway.
1937 */
1938 const unsigned mask = u_bit_consecutive(SI_DESCS_FIRST_COMPUTE,
1939 SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE);
1940 unsigned dirty = sctx->descriptors_dirty & mask;
1941
1942 while (dirty) {
1943 unsigned i = u_bit_scan(&dirty);
1944
1945 if (!si_upload_descriptors(sctx, &sctx->descriptors[i], NULL))
1946 return false;
1947 }
1948
1949 sctx->descriptors_dirty &= ~mask;
1950
1951 return true;
1952 }
1953
1954 void si_release_all_descriptors(struct si_context *sctx)
1955 {
1956 int i;
1957
1958 for (i = 0; i < SI_NUM_SHADERS; i++) {
1959 si_release_buffer_resources(&sctx->const_buffers[i],
1960 si_const_buffer_descriptors(sctx, i));
1961 si_release_buffer_resources(&sctx->shader_buffers[i],
1962 si_shader_buffer_descriptors(sctx, i));
1963 si_release_sampler_views(&sctx->samplers[i].views);
1964 si_release_image_views(&sctx->images[i]);
1965 }
1966 si_release_buffer_resources(&sctx->rw_buffers,
1967 &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
1968
1969 for (i = 0; i < SI_NUM_DESCS; ++i)
1970 si_release_descriptors(&sctx->descriptors[i]);
1971 si_release_descriptors(&sctx->vertex_buffers);
1972 }
1973
1974 void si_all_descriptors_begin_new_cs(struct si_context *sctx)
1975 {
1976 int i;
1977
1978 for (i = 0; i < SI_NUM_SHADERS; i++) {
1979 si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
1980 si_buffer_resources_begin_new_cs(sctx, &sctx->shader_buffers[i]);
1981 si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
1982 si_image_views_begin_new_cs(sctx, &sctx->images[i]);
1983 }
1984 si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers);
1985 si_vertex_buffers_begin_new_cs(sctx);
1986
1987 for (i = 0; i < SI_NUM_DESCS; ++i)
1988 si_descriptors_begin_new_cs(sctx, &sctx->descriptors[i]);
1989
1990 si_shader_userdata_begin_new_cs(sctx);
1991 }