gallium/radeon: add and use a new helper vi_dcc_enabled
[mesa.git] / src / gallium / drivers / radeonsi / si_descriptors.c
1 /*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Marek Olšák <marek.olsak@amd.com>
25 */
26
27 /* Resource binding slots and sampler states (each described with 8 or
28 * 4 dwords) are stored in lists in memory which is accessed by shaders
29 * using scalar load instructions.
30 *
31 * This file is responsible for managing such lists. It keeps a copy of all
32 * descriptors in CPU memory and re-uploads a whole list if some slots have
33 * been changed.
34 *
35 * This code is also reponsible for updating shader pointers to those lists.
36 *
37 * Note that CP DMA can't be used for updating the lists, because a GPU hang
38 * could leave the list in a mid-IB state and the next IB would get wrong
39 * descriptors and the whole context would be unusable at that point.
40 * (Note: The register shadowing can't be used due to the same reason)
41 *
42 * Also, uploading descriptors to newly allocated memory doesn't require
43 * a KCACHE flush.
44 *
45 *
46 * Possible scenarios for one 16 dword image+sampler slot:
47 *
48 * | Image | w/ FMASK | Buffer | NULL
49 * [ 0: 3] Image[0:3] | Image[0:3] | Null[0:3] | Null[0:3]
50 * [ 4: 7] Image[4:7] | Image[4:7] | Buffer[0:3] | 0
51 * [ 8:11] Null[0:3] | Fmask[0:3] | Null[0:3] | Null[0:3]
52 * [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3]
53 *
54 * FMASK implies MSAA, therefore no sampler state.
55 * Sampler states are never unbound except when FMASK is bound.
56 */
57
58 #include "radeon/r600_cs.h"
59 #include "si_pipe.h"
60 #include "sid.h"
61 #include "gfx9d.h"
62
63 #include "util/u_format.h"
64 #include "util/u_memory.h"
65 #include "util/u_upload_mgr.h"
66
67
68 /* NULL image and buffer descriptor for textures (alpha = 1) and images
69 * (alpha = 0).
70 *
71 * For images, all fields must be zero except for the swizzle, which
72 * supports arbitrary combinations of 0s and 1s. The texture type must be
73 * any valid type (e.g. 1D). If the texture type isn't set, the hw hangs.
74 *
75 * For buffers, all fields must be zero. If they are not, the hw hangs.
76 *
77 * This is the only reason why the buffer descriptor must be in words [4:7].
78 */
79 static uint32_t null_texture_descriptor[8] = {
80 0,
81 0,
82 0,
83 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_1) |
84 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
85 /* the rest must contain zeros, which is also used by the buffer
86 * descriptor */
87 };
88
89 static uint32_t null_image_descriptor[8] = {
90 0,
91 0,
92 0,
93 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
94 /* the rest must contain zeros, which is also used by the buffer
95 * descriptor */
96 };
97
98 static void si_init_descriptors(struct si_descriptors *desc,
99 unsigned shader_userdata_index,
100 unsigned element_dw_size,
101 unsigned num_elements,
102 const uint32_t *null_descriptor,
103 unsigned *ce_offset)
104 {
105 int i;
106
107 assert(num_elements <= sizeof(desc->dirty_mask)*8);
108
109 desc->list = CALLOC(num_elements, element_dw_size * 4);
110 desc->element_dw_size = element_dw_size;
111 desc->num_elements = num_elements;
112 desc->dirty_mask = num_elements == 32 ? ~0u : (1u << num_elements) - 1;
113 desc->shader_userdata_offset = shader_userdata_index * 4;
114
115 if (ce_offset) {
116 desc->uses_ce = true;
117 desc->ce_offset = *ce_offset;
118
119 /* make sure that ce_offset stays 32 byte aligned */
120 *ce_offset += align(element_dw_size * num_elements * 4, 32);
121 }
122
123 /* Initialize the array to NULL descriptors if the element size is 8. */
124 if (null_descriptor) {
125 assert(element_dw_size % 8 == 0);
126 for (i = 0; i < num_elements * element_dw_size / 8; i++)
127 memcpy(desc->list + i * 8, null_descriptor,
128 8 * 4);
129 }
130 }
131
132 static void si_release_descriptors(struct si_descriptors *desc)
133 {
134 r600_resource_reference(&desc->buffer, NULL);
135 FREE(desc->list);
136 }
137
138 static bool si_ce_upload(struct si_context *sctx, unsigned ce_offset, unsigned size,
139 unsigned *out_offset, struct r600_resource **out_buf) {
140 uint64_t va;
141
142 u_suballocator_alloc(sctx->ce_suballocator, size,
143 sctx->screen->b.info.tcc_cache_line_size,
144 out_offset, (struct pipe_resource**)out_buf);
145 if (!out_buf)
146 return false;
147
148 va = (*out_buf)->gpu_address + *out_offset;
149
150 radeon_emit(sctx->ce_ib, PKT3(PKT3_DUMP_CONST_RAM, 3, 0));
151 radeon_emit(sctx->ce_ib, ce_offset);
152 radeon_emit(sctx->ce_ib, size / 4);
153 radeon_emit(sctx->ce_ib, va);
154 radeon_emit(sctx->ce_ib, va >> 32);
155
156 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, *out_buf,
157 RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
158
159 sctx->ce_need_synchronization = true;
160 return true;
161 }
162
163 static void si_ce_reinitialize_descriptors(struct si_context *sctx,
164 struct si_descriptors *desc)
165 {
166 if (desc->buffer) {
167 struct r600_resource *buffer = (struct r600_resource*)desc->buffer;
168 unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
169 uint64_t va = buffer->gpu_address + desc->buffer_offset;
170 struct radeon_winsys_cs *ib = sctx->ce_preamble_ib;
171
172 if (!ib)
173 ib = sctx->ce_ib;
174
175 list_size = align(list_size, 32);
176
177 radeon_emit(ib, PKT3(PKT3_LOAD_CONST_RAM, 3, 0));
178 radeon_emit(ib, va);
179 radeon_emit(ib, va >> 32);
180 radeon_emit(ib, list_size / 4);
181 radeon_emit(ib, desc->ce_offset);
182
183 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
184 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
185 }
186 desc->ce_ram_dirty = false;
187 }
188
189 void si_ce_reinitialize_all_descriptors(struct si_context *sctx)
190 {
191 int i;
192
193 for (i = 0; i < SI_NUM_DESCS; ++i)
194 si_ce_reinitialize_descriptors(sctx, &sctx->descriptors[i]);
195 }
196
197 void si_ce_enable_loads(struct radeon_winsys_cs *ib)
198 {
199 radeon_emit(ib, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
200 radeon_emit(ib, CONTEXT_CONTROL_LOAD_ENABLE(1) |
201 CONTEXT_CONTROL_LOAD_CE_RAM(1));
202 radeon_emit(ib, CONTEXT_CONTROL_SHADOW_ENABLE(1));
203 }
204
205 static bool si_upload_descriptors(struct si_context *sctx,
206 struct si_descriptors *desc,
207 struct r600_atom * atom)
208 {
209 unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
210
211 if (!desc->dirty_mask)
212 return true;
213
214 if (sctx->ce_ib && desc->uses_ce) {
215 uint32_t const* list = (uint32_t const*)desc->list;
216
217 if (desc->ce_ram_dirty)
218 si_ce_reinitialize_descriptors(sctx, desc);
219
220 while(desc->dirty_mask) {
221 int begin, count;
222 u_bit_scan_consecutive_range(&desc->dirty_mask, &begin,
223 &count);
224
225 begin *= desc->element_dw_size;
226 count *= desc->element_dw_size;
227
228 radeon_emit(sctx->ce_ib,
229 PKT3(PKT3_WRITE_CONST_RAM, count, 0));
230 radeon_emit(sctx->ce_ib, desc->ce_offset + begin * 4);
231 radeon_emit_array(sctx->ce_ib, list + begin, count);
232 }
233
234 if (!si_ce_upload(sctx, desc->ce_offset, list_size,
235 &desc->buffer_offset, &desc->buffer))
236 return false;
237 } else {
238 void *ptr;
239
240 u_upload_alloc(sctx->b.b.const_uploader, 0, list_size,
241 sctx->screen->b.info.tcc_cache_line_size,
242 &desc->buffer_offset,
243 (struct pipe_resource**)&desc->buffer, &ptr);
244 if (!desc->buffer)
245 return false; /* skip the draw call */
246
247 util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
248 desc->gpu_list = ptr;
249
250 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
251 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
252 }
253 desc->dirty_mask = 0;
254
255 if (atom)
256 si_mark_atom_dirty(sctx, atom);
257
258 return true;
259 }
260
261 static void
262 si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc)
263 {
264 desc->ce_ram_dirty = true;
265
266 if (!desc->buffer)
267 return;
268
269 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
270 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
271 }
272
273 /* SAMPLER VIEWS */
274
275 static unsigned
276 si_sampler_descriptors_idx(unsigned shader)
277 {
278 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
279 SI_SHADER_DESCS_SAMPLERS;
280 }
281
282 static struct si_descriptors *
283 si_sampler_descriptors(struct si_context *sctx, unsigned shader)
284 {
285 return &sctx->descriptors[si_sampler_descriptors_idx(shader)];
286 }
287
288 static void si_release_sampler_views(struct si_sampler_views *views)
289 {
290 int i;
291
292 for (i = 0; i < ARRAY_SIZE(views->views); i++) {
293 pipe_sampler_view_reference(&views->views[i], NULL);
294 }
295 }
296
297 static void si_sampler_view_add_buffer(struct si_context *sctx,
298 struct pipe_resource *resource,
299 enum radeon_bo_usage usage,
300 bool is_stencil_sampler,
301 bool check_mem)
302 {
303 struct r600_resource *rres;
304 struct r600_texture *rtex;
305 enum radeon_bo_priority priority;
306
307 if (!resource)
308 return;
309
310 if (resource->target != PIPE_BUFFER) {
311 struct r600_texture *tex = (struct r600_texture*)resource;
312
313 if (tex->is_depth && !r600_can_sample_zs(tex, is_stencil_sampler))
314 resource = &tex->flushed_depth_texture->resource.b.b;
315 }
316
317 rres = (struct r600_resource*)resource;
318 priority = r600_get_sampler_view_priority(rres);
319
320 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
321 rres, usage, priority,
322 check_mem);
323
324 if (resource->target == PIPE_BUFFER)
325 return;
326
327 /* Now add separate DCC or HTILE. */
328 rtex = (struct r600_texture*)resource;
329 if (rtex->dcc_separate_buffer) {
330 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
331 rtex->dcc_separate_buffer, usage,
332 RADEON_PRIO_DCC, check_mem);
333 }
334
335 if (rtex->htile_buffer &&
336 rtex->tc_compatible_htile &&
337 !is_stencil_sampler) {
338 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
339 rtex->htile_buffer, usage,
340 RADEON_PRIO_HTILE, check_mem);
341 }
342 }
343
344 static void si_sampler_views_begin_new_cs(struct si_context *sctx,
345 struct si_sampler_views *views)
346 {
347 unsigned mask = views->enabled_mask;
348
349 /* Add buffers to the CS. */
350 while (mask) {
351 int i = u_bit_scan(&mask);
352 struct si_sampler_view *sview = (struct si_sampler_view *)views->views[i];
353
354 si_sampler_view_add_buffer(sctx, sview->base.texture,
355 RADEON_USAGE_READ,
356 sview->is_stencil_sampler, false);
357 }
358 }
359
360 /* Set buffer descriptor fields that can be changed by reallocations. */
361 static void si_set_buf_desc_address(struct r600_resource *buf,
362 uint64_t offset, uint32_t *state)
363 {
364 uint64_t va = buf->gpu_address + offset;
365
366 state[0] = va;
367 state[1] &= C_008F04_BASE_ADDRESS_HI;
368 state[1] |= S_008F04_BASE_ADDRESS_HI(va >> 32);
369 }
370
371 /* Set texture descriptor fields that can be changed by reallocations.
372 *
373 * \param tex texture
374 * \param base_level_info information of the level of BASE_ADDRESS
375 * \param base_level the level of BASE_ADDRESS
376 * \param first_level pipe_sampler_view.u.tex.first_level
377 * \param block_width util_format_get_blockwidth()
378 * \param is_stencil select between separate Z & Stencil
379 * \param state descriptor to update
380 */
381 void si_set_mutable_tex_desc_fields(struct si_screen *sscreen,
382 struct r600_texture *tex,
383 const struct legacy_surf_level *base_level_info,
384 unsigned base_level, unsigned first_level,
385 unsigned block_width, bool is_stencil,
386 uint32_t *state)
387 {
388 uint64_t va, meta_va = 0;
389
390 if (tex->is_depth && !r600_can_sample_zs(tex, is_stencil)) {
391 tex = tex->flushed_depth_texture;
392 is_stencil = false;
393 }
394
395 va = tex->resource.gpu_address;
396
397 if (sscreen->b.chip_class >= GFX9) {
398 /* Only stencil_offset needs to be added here. */
399 if (is_stencil)
400 va += tex->surface.u.gfx9.stencil_offset;
401 else
402 va += tex->surface.u.gfx9.surf_offset;
403 } else {
404 va += base_level_info->offset;
405 }
406
407 if (vi_dcc_enabled(tex, first_level)) {
408 meta_va = (!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
409 tex->dcc_offset;
410
411 if (sscreen->b.chip_class <= VI)
412 meta_va += base_level_info->dcc_offset;
413 } else if (tex->tc_compatible_htile && !is_stencil) {
414 meta_va = tex->htile_buffer->gpu_address;
415 }
416
417 state[0] = va >> 8;
418 state[1] &= C_008F14_BASE_ADDRESS_HI;
419 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
420
421 state[6] &= C_008F28_COMPRESSION_EN;
422 state[7] = 0;
423
424 if (meta_va) {
425 state[6] |= S_008F28_COMPRESSION_EN(1);
426 state[7] = meta_va >> 8;
427 }
428
429 if (sscreen->b.chip_class >= GFX9) {
430 state[3] &= C_008F1C_SW_MODE;
431 state[4] &= C_008F20_PITCH_GFX9;
432
433 if (is_stencil) {
434 state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.stencil.swizzle_mode);
435 state[4] |= S_008F20_PITCH_GFX9(tex->surface.u.gfx9.stencil.epitch);
436 } else {
437 state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode);
438 state[4] |= S_008F20_PITCH_GFX9(tex->surface.u.gfx9.surf.epitch);
439 }
440
441 state[5] &= C_008F24_META_DATA_ADDRESS &
442 C_008F24_META_PIPE_ALIGNED &
443 C_008F24_META_RB_ALIGNED;
444 if (meta_va) {
445 struct gfx9_surf_meta_flags meta;
446
447 if (tex->dcc_offset)
448 meta = tex->surface.u.gfx9.dcc;
449 else
450 meta = tex->surface.u.gfx9.htile;
451
452 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
453 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
454 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
455 }
456 } else {
457 /* SI-CI-VI */
458 unsigned pitch = base_level_info->nblk_x * block_width;
459 unsigned index = si_tile_mode_index(tex, base_level, is_stencil);
460
461 state[3] &= C_008F1C_TILING_INDEX;
462 state[3] |= S_008F1C_TILING_INDEX(index);
463 state[4] &= C_008F20_PITCH_GFX6;
464 state[4] |= S_008F20_PITCH_GFX6(pitch - 1);
465 }
466 }
467
468 static void si_set_sampler_view(struct si_context *sctx,
469 unsigned shader,
470 unsigned slot, struct pipe_sampler_view *view,
471 bool disallow_early_out)
472 {
473 struct si_sampler_views *views = &sctx->samplers[shader].views;
474 struct si_sampler_view *rview = (struct si_sampler_view*)view;
475 struct si_descriptors *descs = si_sampler_descriptors(sctx, shader);
476 uint32_t *desc = descs->list + slot * 16;
477
478 if (views->views[slot] == view && !disallow_early_out)
479 return;
480
481 if (view) {
482 struct r600_texture *rtex = (struct r600_texture *)view->texture;
483
484 assert(rtex); /* views with texture == NULL aren't supported */
485 pipe_sampler_view_reference(&views->views[slot], view);
486 memcpy(desc, rview->state, 8*4);
487
488 if (rtex->resource.b.b.target == PIPE_BUFFER) {
489 rtex->resource.bind_history |= PIPE_BIND_SAMPLER_VIEW;
490
491 si_set_buf_desc_address(&rtex->resource,
492 view->u.buf.offset,
493 desc + 4);
494 } else {
495 bool is_separate_stencil =
496 rtex->db_compatible &&
497 rview->is_stencil_sampler;
498
499 si_set_mutable_tex_desc_fields(sctx->screen, rtex,
500 rview->base_level_info,
501 rview->base_level,
502 rview->base.u.tex.first_level,
503 rview->block_width,
504 is_separate_stencil,
505 desc);
506 }
507
508 if (rtex->resource.b.b.target != PIPE_BUFFER &&
509 rtex->fmask.size) {
510 memcpy(desc + 8,
511 rview->fmask_state, 8*4);
512 } else {
513 /* Disable FMASK and bind sampler state in [12:15]. */
514 memcpy(desc + 8,
515 null_texture_descriptor, 4*4);
516
517 if (views->sampler_states[slot])
518 memcpy(desc + 12,
519 views->sampler_states[slot]->val, 4*4);
520 }
521
522 views->enabled_mask |= 1u << slot;
523
524 /* Since this can flush, it must be done after enabled_mask is
525 * updated. */
526 si_sampler_view_add_buffer(sctx, view->texture,
527 RADEON_USAGE_READ,
528 rview->is_stencil_sampler, true);
529 } else {
530 pipe_sampler_view_reference(&views->views[slot], NULL);
531 memcpy(desc, null_texture_descriptor, 8*4);
532 /* Only clear the lower dwords of FMASK. */
533 memcpy(desc + 8, null_texture_descriptor, 4*4);
534 /* Re-set the sampler state if we are transitioning from FMASK. */
535 if (views->sampler_states[slot])
536 memcpy(desc + 12,
537 views->sampler_states[slot]->val, 4*4);
538
539 views->enabled_mask &= ~(1u << slot);
540 }
541
542 descs->dirty_mask |= 1u << slot;
543 sctx->descriptors_dirty |= 1u << si_sampler_descriptors_idx(shader);
544 }
545
546 static bool is_compressed_colortex(struct r600_texture *rtex)
547 {
548 return rtex->cmask.size || rtex->fmask.size ||
549 (rtex->dcc_offset && rtex->dirty_level_mask);
550 }
551
552 static void si_update_compressed_tex_shader_mask(struct si_context *sctx,
553 unsigned shader)
554 {
555 struct si_textures_info *samplers = &sctx->samplers[shader];
556 unsigned shader_bit = 1 << shader;
557
558 if (samplers->depth_texture_mask ||
559 samplers->compressed_colortex_mask ||
560 sctx->images[shader].compressed_colortex_mask)
561 sctx->compressed_tex_shader_mask |= shader_bit;
562 else
563 sctx->compressed_tex_shader_mask &= ~shader_bit;
564 }
565
566 static void si_set_sampler_views(struct pipe_context *ctx,
567 enum pipe_shader_type shader, unsigned start,
568 unsigned count,
569 struct pipe_sampler_view **views)
570 {
571 struct si_context *sctx = (struct si_context *)ctx;
572 struct si_textures_info *samplers = &sctx->samplers[shader];
573 int i;
574
575 if (!count || shader >= SI_NUM_SHADERS)
576 return;
577
578 for (i = 0; i < count; i++) {
579 unsigned slot = start + i;
580
581 if (!views || !views[i]) {
582 samplers->depth_texture_mask &= ~(1u << slot);
583 samplers->compressed_colortex_mask &= ~(1u << slot);
584 si_set_sampler_view(sctx, shader, slot, NULL, false);
585 continue;
586 }
587
588 si_set_sampler_view(sctx, shader, slot, views[i], false);
589
590 if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) {
591 struct r600_texture *rtex =
592 (struct r600_texture*)views[i]->texture;
593 struct si_sampler_view *rview = (struct si_sampler_view *)views[i];
594
595 if (rtex->db_compatible &&
596 (!rtex->tc_compatible_htile || rview->is_stencil_sampler)) {
597 samplers->depth_texture_mask |= 1u << slot;
598 } else {
599 samplers->depth_texture_mask &= ~(1u << slot);
600 }
601 if (is_compressed_colortex(rtex)) {
602 samplers->compressed_colortex_mask |= 1u << slot;
603 } else {
604 samplers->compressed_colortex_mask &= ~(1u << slot);
605 }
606
607 if (rtex->dcc_offset &&
608 p_atomic_read(&rtex->framebuffers_bound))
609 sctx->need_check_render_feedback = true;
610 } else {
611 samplers->depth_texture_mask &= ~(1u << slot);
612 samplers->compressed_colortex_mask &= ~(1u << slot);
613 }
614 }
615
616 si_update_compressed_tex_shader_mask(sctx, shader);
617 }
618
619 static void
620 si_samplers_update_compressed_colortex_mask(struct si_textures_info *samplers)
621 {
622 unsigned mask = samplers->views.enabled_mask;
623
624 while (mask) {
625 int i = u_bit_scan(&mask);
626 struct pipe_resource *res = samplers->views.views[i]->texture;
627
628 if (res && res->target != PIPE_BUFFER) {
629 struct r600_texture *rtex = (struct r600_texture *)res;
630
631 if (is_compressed_colortex(rtex)) {
632 samplers->compressed_colortex_mask |= 1u << i;
633 } else {
634 samplers->compressed_colortex_mask &= ~(1u << i);
635 }
636 }
637 }
638 }
639
640 /* IMAGE VIEWS */
641
642 static unsigned
643 si_image_descriptors_idx(unsigned shader)
644 {
645 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
646 SI_SHADER_DESCS_IMAGES;
647 }
648
649 static struct si_descriptors*
650 si_image_descriptors(struct si_context *sctx, unsigned shader)
651 {
652 return &sctx->descriptors[si_image_descriptors_idx(shader)];
653 }
654
655 static void
656 si_release_image_views(struct si_images_info *images)
657 {
658 unsigned i;
659
660 for (i = 0; i < SI_NUM_IMAGES; ++i) {
661 struct pipe_image_view *view = &images->views[i];
662
663 pipe_resource_reference(&view->resource, NULL);
664 }
665 }
666
667 static void
668 si_image_views_begin_new_cs(struct si_context *sctx, struct si_images_info *images)
669 {
670 uint mask = images->enabled_mask;
671
672 /* Add buffers to the CS. */
673 while (mask) {
674 int i = u_bit_scan(&mask);
675 struct pipe_image_view *view = &images->views[i];
676
677 assert(view->resource);
678
679 si_sampler_view_add_buffer(sctx, view->resource,
680 RADEON_USAGE_READWRITE, false, false);
681 }
682 }
683
684 static void
685 si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot)
686 {
687 struct si_images_info *images = &ctx->images[shader];
688
689 if (images->enabled_mask & (1u << slot)) {
690 struct si_descriptors *descs = si_image_descriptors(ctx, shader);
691
692 pipe_resource_reference(&images->views[slot].resource, NULL);
693 images->compressed_colortex_mask &= ~(1 << slot);
694
695 memcpy(descs->list + slot*8, null_image_descriptor, 8*4);
696 images->enabled_mask &= ~(1u << slot);
697 descs->dirty_mask |= 1u << slot;
698 ctx->descriptors_dirty |= 1u << si_image_descriptors_idx(shader);
699 }
700 }
701
702 static void
703 si_mark_image_range_valid(const struct pipe_image_view *view)
704 {
705 struct r600_resource *res = (struct r600_resource *)view->resource;
706
707 assert(res && res->b.b.target == PIPE_BUFFER);
708
709 util_range_add(&res->valid_buffer_range,
710 view->u.buf.offset,
711 view->u.buf.offset + view->u.buf.size);
712 }
713
714 static void si_set_shader_image(struct si_context *ctx,
715 unsigned shader,
716 unsigned slot, const struct pipe_image_view *view,
717 bool skip_decompress)
718 {
719 struct si_screen *screen = ctx->screen;
720 struct si_images_info *images = &ctx->images[shader];
721 struct si_descriptors *descs = si_image_descriptors(ctx, shader);
722 struct r600_resource *res;
723 uint32_t *desc = descs->list + slot * 8;
724
725 if (!view || !view->resource) {
726 si_disable_shader_image(ctx, shader, slot);
727 return;
728 }
729
730 res = (struct r600_resource *)view->resource;
731
732 if (&images->views[slot] != view)
733 util_copy_image_view(&images->views[slot], view);
734
735 if (res->b.b.target == PIPE_BUFFER) {
736 if (view->access & PIPE_IMAGE_ACCESS_WRITE)
737 si_mark_image_range_valid(view);
738
739 si_make_buffer_descriptor(screen, res,
740 view->format,
741 view->u.buf.offset,
742 view->u.buf.size,
743 descs->list + slot * 8);
744 si_set_buf_desc_address(res, view->u.buf.offset, desc + 4);
745
746 images->compressed_colortex_mask &= ~(1 << slot);
747 res->bind_history |= PIPE_BIND_SHADER_IMAGE;
748 } else {
749 static const unsigned char swizzle[4] = { 0, 1, 2, 3 };
750 struct r600_texture *tex = (struct r600_texture *)res;
751 unsigned level = view->u.tex.level;
752 unsigned width, height, depth;
753 bool uses_dcc = vi_dcc_enabled(tex, level);
754
755 assert(!tex->is_depth);
756 assert(tex->fmask.size == 0);
757
758 if (uses_dcc && !skip_decompress &&
759 (view->access & PIPE_IMAGE_ACCESS_WRITE ||
760 !vi_dcc_formats_compatible(res->b.b.format, view->format))) {
761 /* If DCC can't be disabled, at least decompress it.
762 * The decompression is relatively cheap if the surface
763 * has been decompressed already.
764 */
765 if (r600_texture_disable_dcc(&ctx->b, tex))
766 uses_dcc = false;
767 else
768 ctx->b.decompress_dcc(&ctx->b.b, tex);
769 }
770
771 if (is_compressed_colortex(tex)) {
772 images->compressed_colortex_mask |= 1 << slot;
773 } else {
774 images->compressed_colortex_mask &= ~(1 << slot);
775 }
776
777 if (uses_dcc &&
778 p_atomic_read(&tex->framebuffers_bound))
779 ctx->need_check_render_feedback = true;
780
781 /* Always force the base level to the selected level.
782 *
783 * This is required for 3D textures, where otherwise
784 * selecting a single slice for non-layered bindings
785 * fails. It doesn't hurt the other targets.
786 */
787 width = u_minify(res->b.b.width0, level);
788 height = u_minify(res->b.b.height0, level);
789 depth = u_minify(res->b.b.depth0, level);
790
791 si_make_texture_descriptor(screen, tex,
792 false, res->b.b.target,
793 view->format, swizzle,
794 0, 0,
795 view->u.tex.first_layer,
796 view->u.tex.last_layer,
797 width, height, depth,
798 desc, NULL);
799 si_set_mutable_tex_desc_fields(screen, tex,
800 &tex->surface.u.legacy.level[level],
801 level, level,
802 util_format_get_blockwidth(view->format),
803 false, desc);
804 }
805
806 images->enabled_mask |= 1u << slot;
807 descs->dirty_mask |= 1u << slot;
808 ctx->descriptors_dirty |= 1u << si_image_descriptors_idx(shader);
809
810 /* Since this can flush, it must be done after enabled_mask is updated. */
811 si_sampler_view_add_buffer(ctx, &res->b.b,
812 RADEON_USAGE_READWRITE, false, true);
813 }
814
815 static void
816 si_set_shader_images(struct pipe_context *pipe,
817 enum pipe_shader_type shader,
818 unsigned start_slot, unsigned count,
819 const struct pipe_image_view *views)
820 {
821 struct si_context *ctx = (struct si_context *)pipe;
822 unsigned i, slot;
823
824 assert(shader < SI_NUM_SHADERS);
825
826 if (!count)
827 return;
828
829 assert(start_slot + count <= SI_NUM_IMAGES);
830
831 if (views) {
832 for (i = 0, slot = start_slot; i < count; ++i, ++slot)
833 si_set_shader_image(ctx, shader, slot, &views[i], false);
834 } else {
835 for (i = 0, slot = start_slot; i < count; ++i, ++slot)
836 si_set_shader_image(ctx, shader, slot, NULL, false);
837 }
838
839 si_update_compressed_tex_shader_mask(ctx, shader);
840 }
841
842 static void
843 si_images_update_compressed_colortex_mask(struct si_images_info *images)
844 {
845 unsigned mask = images->enabled_mask;
846
847 while (mask) {
848 int i = u_bit_scan(&mask);
849 struct pipe_resource *res = images->views[i].resource;
850
851 if (res && res->target != PIPE_BUFFER) {
852 struct r600_texture *rtex = (struct r600_texture *)res;
853
854 if (is_compressed_colortex(rtex)) {
855 images->compressed_colortex_mask |= 1 << i;
856 } else {
857 images->compressed_colortex_mask &= ~(1 << i);
858 }
859 }
860 }
861 }
862
863 /* SAMPLER STATES */
864
865 static void si_bind_sampler_states(struct pipe_context *ctx,
866 enum pipe_shader_type shader,
867 unsigned start, unsigned count, void **states)
868 {
869 struct si_context *sctx = (struct si_context *)ctx;
870 struct si_textures_info *samplers = &sctx->samplers[shader];
871 struct si_descriptors *desc = si_sampler_descriptors(sctx, shader);
872 struct si_sampler_state **sstates = (struct si_sampler_state**)states;
873 int i;
874
875 if (!count || shader >= SI_NUM_SHADERS)
876 return;
877
878 for (i = 0; i < count; i++) {
879 unsigned slot = start + i;
880
881 if (!sstates[i] ||
882 sstates[i] == samplers->views.sampler_states[slot])
883 continue;
884
885 #ifdef DEBUG
886 assert(sstates[i]->magic == SI_SAMPLER_STATE_MAGIC);
887 #endif
888 samplers->views.sampler_states[slot] = sstates[i];
889
890 /* If FMASK is bound, don't overwrite it.
891 * The sampler state will be set after FMASK is unbound.
892 */
893 if (samplers->views.views[slot] &&
894 samplers->views.views[slot]->texture &&
895 samplers->views.views[slot]->texture->target != PIPE_BUFFER &&
896 ((struct r600_texture*)samplers->views.views[slot]->texture)->fmask.size)
897 continue;
898
899 memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4);
900 desc->dirty_mask |= 1u << slot;
901 sctx->descriptors_dirty |= 1u << si_sampler_descriptors_idx(shader);
902 }
903 }
904
905 /* BUFFER RESOURCES */
906
907 static void si_init_buffer_resources(struct si_buffer_resources *buffers,
908 struct si_descriptors *descs,
909 unsigned num_buffers,
910 unsigned shader_userdata_index,
911 enum radeon_bo_usage shader_usage,
912 enum radeon_bo_priority priority,
913 unsigned *ce_offset)
914 {
915 buffers->shader_usage = shader_usage;
916 buffers->priority = priority;
917 buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
918
919 si_init_descriptors(descs, shader_userdata_index, 4,
920 num_buffers, NULL, ce_offset);
921 }
922
923 static void si_release_buffer_resources(struct si_buffer_resources *buffers,
924 struct si_descriptors *descs)
925 {
926 int i;
927
928 for (i = 0; i < descs->num_elements; i++) {
929 pipe_resource_reference(&buffers->buffers[i], NULL);
930 }
931
932 FREE(buffers->buffers);
933 }
934
935 static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
936 struct si_buffer_resources *buffers)
937 {
938 unsigned mask = buffers->enabled_mask;
939
940 /* Add buffers to the CS. */
941 while (mask) {
942 int i = u_bit_scan(&mask);
943
944 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
945 (struct r600_resource*)buffers->buffers[i],
946 buffers->shader_usage, buffers->priority);
947 }
948 }
949
950 static void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers,
951 struct si_descriptors *descs,
952 unsigned idx, struct pipe_resource **buf,
953 unsigned *offset, unsigned *size)
954 {
955 pipe_resource_reference(buf, buffers->buffers[idx]);
956 if (*buf) {
957 struct r600_resource *res = r600_resource(*buf);
958 const uint32_t *desc = descs->list + idx * 4;
959 uint64_t va;
960
961 *size = desc[2];
962
963 assert(G_008F04_STRIDE(desc[1]) == 0);
964 va = ((uint64_t)desc[1] << 32) | desc[0];
965
966 assert(va >= res->gpu_address && va + *size <= res->gpu_address + res->bo_size);
967 *offset = va - res->gpu_address;
968 }
969 }
970
971 /* VERTEX BUFFERS */
972
973 static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
974 {
975 struct si_descriptors *desc = &sctx->vertex_buffers;
976 int count = sctx->vertex_elements ? sctx->vertex_elements->count : 0;
977 int i;
978
979 for (i = 0; i < count; i++) {
980 int vb = sctx->vertex_elements->elements[i].vertex_buffer_index;
981
982 if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
983 continue;
984 if (!sctx->vertex_buffer[vb].buffer)
985 continue;
986
987 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
988 (struct r600_resource*)sctx->vertex_buffer[vb].buffer,
989 RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
990 }
991
992 if (!desc->buffer)
993 return;
994 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
995 desc->buffer, RADEON_USAGE_READ,
996 RADEON_PRIO_DESCRIPTORS);
997 }
998
999 bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
1000 {
1001 struct si_vertex_element *velems = sctx->vertex_elements;
1002 struct si_descriptors *desc = &sctx->vertex_buffers;
1003 unsigned i, count;
1004 unsigned desc_list_byte_size;
1005 unsigned first_vb_use_mask;
1006 uint64_t va;
1007 uint32_t *ptr;
1008
1009 if (!sctx->vertex_buffers_dirty || !velems)
1010 return true;
1011
1012 count = velems->count;
1013
1014 if (!count)
1015 return true;
1016
1017 desc_list_byte_size = velems->desc_list_byte_size;
1018 first_vb_use_mask = velems->first_vb_use_mask;
1019
1020 /* Vertex buffer descriptors are the only ones which are uploaded
1021 * directly through a staging buffer and don't go through
1022 * the fine-grained upload path.
1023 */
1024 u_upload_alloc(sctx->b.b.const_uploader, 0,
1025 desc_list_byte_size,
1026 si_optimal_tcc_alignment(sctx, desc_list_byte_size),
1027 &desc->buffer_offset,
1028 (struct pipe_resource**)&desc->buffer, (void**)&ptr);
1029 if (!desc->buffer)
1030 return false;
1031
1032 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1033 desc->buffer, RADEON_USAGE_READ,
1034 RADEON_PRIO_DESCRIPTORS);
1035
1036 assert(count <= SI_MAX_ATTRIBS);
1037
1038 for (i = 0; i < count; i++) {
1039 struct pipe_vertex_element *ve = &velems->elements[i];
1040 struct pipe_vertex_buffer *vb;
1041 struct r600_resource *rbuffer;
1042 unsigned offset;
1043 unsigned vbo_index = ve->vertex_buffer_index;
1044 uint32_t *desc = &ptr[i*4];
1045
1046 vb = &sctx->vertex_buffer[vbo_index];
1047 rbuffer = (struct r600_resource*)vb->buffer;
1048 if (!rbuffer) {
1049 memset(desc, 0, 16);
1050 continue;
1051 }
1052
1053 offset = vb->buffer_offset + ve->src_offset;
1054 va = rbuffer->gpu_address + offset;
1055
1056 /* Fill in T# buffer resource description */
1057 desc[0] = va;
1058 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1059 S_008F04_STRIDE(vb->stride);
1060
1061 if (sctx->b.chip_class != VI && vb->stride) {
1062 /* Round up by rounding down and adding 1 */
1063 desc[2] = (vb->buffer->width0 - offset -
1064 velems->format_size[i]) /
1065 vb->stride + 1;
1066 } else {
1067 desc[2] = vb->buffer->width0 - offset;
1068 }
1069
1070 desc[3] = velems->rsrc_word3[i];
1071
1072 if (first_vb_use_mask & (1 << i)) {
1073 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1074 (struct r600_resource*)vb->buffer,
1075 RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
1076 }
1077 }
1078
1079 /* Don't flush the const cache. It would have a very negative effect
1080 * on performance (confirmed by testing). New descriptors are always
1081 * uploaded to a fresh new buffer, so I don't think flushing the const
1082 * cache is needed. */
1083 si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
1084 if (sctx->b.chip_class >= CIK)
1085 si_mark_atom_dirty(sctx, &sctx->prefetch_L2);
1086 sctx->vertex_buffers_dirty = false;
1087 sctx->vertex_buffer_pointer_dirty = true;
1088 return true;
1089 }
1090
1091
1092 /* CONSTANT BUFFERS */
1093
1094 static unsigned
1095 si_const_buffer_descriptors_idx(unsigned shader)
1096 {
1097 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
1098 SI_SHADER_DESCS_CONST_BUFFERS;
1099 }
1100
1101 static struct si_descriptors *
1102 si_const_buffer_descriptors(struct si_context *sctx, unsigned shader)
1103 {
1104 return &sctx->descriptors[si_const_buffer_descriptors_idx(shader)];
1105 }
1106
1107 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
1108 const uint8_t *ptr, unsigned size, uint32_t *const_offset)
1109 {
1110 void *tmp;
1111
1112 u_upload_alloc(sctx->b.b.const_uploader, 0, size,
1113 si_optimal_tcc_alignment(sctx, size),
1114 const_offset,
1115 (struct pipe_resource**)rbuffer, &tmp);
1116 if (*rbuffer)
1117 util_memcpy_cpu_to_le32(tmp, ptr, size);
1118 }
1119
1120 static void si_set_constant_buffer(struct si_context *sctx,
1121 struct si_buffer_resources *buffers,
1122 unsigned descriptors_idx,
1123 uint slot, const struct pipe_constant_buffer *input)
1124 {
1125 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1126 assert(slot < descs->num_elements);
1127 pipe_resource_reference(&buffers->buffers[slot], NULL);
1128
1129 /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
1130 * with a NULL buffer). We need to use a dummy buffer instead. */
1131 if (sctx->b.chip_class == CIK &&
1132 (!input || (!input->buffer && !input->user_buffer)))
1133 input = &sctx->null_const_buf;
1134
1135 if (input && (input->buffer || input->user_buffer)) {
1136 struct pipe_resource *buffer = NULL;
1137 uint64_t va;
1138
1139 /* Upload the user buffer if needed. */
1140 if (input->user_buffer) {
1141 unsigned buffer_offset;
1142
1143 si_upload_const_buffer(sctx,
1144 (struct r600_resource**)&buffer, input->user_buffer,
1145 input->buffer_size, &buffer_offset);
1146 if (!buffer) {
1147 /* Just unbind on failure. */
1148 si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, NULL);
1149 return;
1150 }
1151 va = r600_resource(buffer)->gpu_address + buffer_offset;
1152 } else {
1153 pipe_resource_reference(&buffer, input->buffer);
1154 va = r600_resource(buffer)->gpu_address + input->buffer_offset;
1155 /* Only track usage for non-user buffers. */
1156 r600_resource(buffer)->bind_history |= PIPE_BIND_CONSTANT_BUFFER;
1157 }
1158
1159 /* Set the descriptor. */
1160 uint32_t *desc = descs->list + slot*4;
1161 desc[0] = va;
1162 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1163 S_008F04_STRIDE(0);
1164 desc[2] = input->buffer_size;
1165 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1166 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1167 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1168 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1169 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1170 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1171
1172 buffers->buffers[slot] = buffer;
1173 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1174 (struct r600_resource*)buffer,
1175 buffers->shader_usage,
1176 buffers->priority, true);
1177 buffers->enabled_mask |= 1u << slot;
1178 } else {
1179 /* Clear the descriptor. */
1180 memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
1181 buffers->enabled_mask &= ~(1u << slot);
1182 }
1183
1184 descs->dirty_mask |= 1u << slot;
1185 sctx->descriptors_dirty |= 1u << descriptors_idx;
1186 }
1187
1188 void si_set_rw_buffer(struct si_context *sctx,
1189 uint slot, const struct pipe_constant_buffer *input)
1190 {
1191 si_set_constant_buffer(sctx, &sctx->rw_buffers,
1192 SI_DESCS_RW_BUFFERS, slot, input);
1193 }
1194
1195 static void si_pipe_set_constant_buffer(struct pipe_context *ctx,
1196 enum pipe_shader_type shader, uint slot,
1197 const struct pipe_constant_buffer *input)
1198 {
1199 struct si_context *sctx = (struct si_context *)ctx;
1200
1201 if (shader >= SI_NUM_SHADERS)
1202 return;
1203
1204 si_set_constant_buffer(sctx, &sctx->const_buffers[shader],
1205 si_const_buffer_descriptors_idx(shader),
1206 slot, input);
1207 }
1208
1209 void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader,
1210 uint slot, struct pipe_constant_buffer *cbuf)
1211 {
1212 cbuf->user_buffer = NULL;
1213 si_get_buffer_from_descriptors(
1214 &sctx->const_buffers[shader],
1215 si_const_buffer_descriptors(sctx, shader),
1216 slot, &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size);
1217 }
1218
1219 /* SHADER BUFFERS */
1220
1221 static unsigned
1222 si_shader_buffer_descriptors_idx(enum pipe_shader_type shader)
1223 {
1224 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
1225 SI_SHADER_DESCS_SHADER_BUFFERS;
1226 }
1227
1228 static struct si_descriptors *
1229 si_shader_buffer_descriptors(struct si_context *sctx,
1230 enum pipe_shader_type shader)
1231 {
1232 return &sctx->descriptors[si_shader_buffer_descriptors_idx(shader)];
1233 }
1234
1235 static void si_set_shader_buffers(struct pipe_context *ctx,
1236 enum pipe_shader_type shader,
1237 unsigned start_slot, unsigned count,
1238 const struct pipe_shader_buffer *sbuffers)
1239 {
1240 struct si_context *sctx = (struct si_context *)ctx;
1241 struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
1242 struct si_descriptors *descs = si_shader_buffer_descriptors(sctx, shader);
1243 unsigned i;
1244
1245 assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
1246
1247 for (i = 0; i < count; ++i) {
1248 const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
1249 struct r600_resource *buf;
1250 unsigned slot = start_slot + i;
1251 uint32_t *desc = descs->list + slot * 4;
1252 uint64_t va;
1253
1254 if (!sbuffer || !sbuffer->buffer) {
1255 pipe_resource_reference(&buffers->buffers[slot], NULL);
1256 memset(desc, 0, sizeof(uint32_t) * 4);
1257 buffers->enabled_mask &= ~(1u << slot);
1258 descs->dirty_mask |= 1u << slot;
1259 sctx->descriptors_dirty |=
1260 1u << si_shader_buffer_descriptors_idx(shader);
1261 continue;
1262 }
1263
1264 buf = (struct r600_resource *)sbuffer->buffer;
1265 va = buf->gpu_address + sbuffer->buffer_offset;
1266
1267 desc[0] = va;
1268 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1269 S_008F04_STRIDE(0);
1270 desc[2] = sbuffer->buffer_size;
1271 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1272 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1273 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1274 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1275 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1276 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1277
1278 pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
1279 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, buf,
1280 buffers->shader_usage,
1281 buffers->priority, true);
1282 buf->bind_history |= PIPE_BIND_SHADER_BUFFER;
1283
1284 buffers->enabled_mask |= 1u << slot;
1285 descs->dirty_mask |= 1u << slot;
1286 sctx->descriptors_dirty |=
1287 1u << si_shader_buffer_descriptors_idx(shader);
1288
1289 util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset,
1290 sbuffer->buffer_offset + sbuffer->buffer_size);
1291 }
1292 }
1293
1294 void si_get_shader_buffers(struct si_context *sctx,
1295 enum pipe_shader_type shader,
1296 uint start_slot, uint count,
1297 struct pipe_shader_buffer *sbuf)
1298 {
1299 struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
1300 struct si_descriptors *descs = si_shader_buffer_descriptors(sctx, shader);
1301
1302 for (unsigned i = 0; i < count; ++i) {
1303 si_get_buffer_from_descriptors(
1304 buffers, descs, start_slot + i,
1305 &sbuf[i].buffer, &sbuf[i].buffer_offset,
1306 &sbuf[i].buffer_size);
1307 }
1308 }
1309
1310 /* RING BUFFERS */
1311
1312 void si_set_ring_buffer(struct pipe_context *ctx, uint slot,
1313 struct pipe_resource *buffer,
1314 unsigned stride, unsigned num_records,
1315 bool add_tid, bool swizzle,
1316 unsigned element_size, unsigned index_stride, uint64_t offset)
1317 {
1318 struct si_context *sctx = (struct si_context *)ctx;
1319 struct si_buffer_resources *buffers = &sctx->rw_buffers;
1320 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1321
1322 /* The stride field in the resource descriptor has 14 bits */
1323 assert(stride < (1 << 14));
1324
1325 assert(slot < descs->num_elements);
1326 pipe_resource_reference(&buffers->buffers[slot], NULL);
1327
1328 if (buffer) {
1329 uint64_t va;
1330
1331 va = r600_resource(buffer)->gpu_address + offset;
1332
1333 switch (element_size) {
1334 default:
1335 assert(!"Unsupported ring buffer element size");
1336 case 0:
1337 case 2:
1338 element_size = 0;
1339 break;
1340 case 4:
1341 element_size = 1;
1342 break;
1343 case 8:
1344 element_size = 2;
1345 break;
1346 case 16:
1347 element_size = 3;
1348 break;
1349 }
1350
1351 switch (index_stride) {
1352 default:
1353 assert(!"Unsupported ring buffer index stride");
1354 case 0:
1355 case 8:
1356 index_stride = 0;
1357 break;
1358 case 16:
1359 index_stride = 1;
1360 break;
1361 case 32:
1362 index_stride = 2;
1363 break;
1364 case 64:
1365 index_stride = 3;
1366 break;
1367 }
1368
1369 if (sctx->b.chip_class >= VI && stride)
1370 num_records *= stride;
1371
1372 /* Set the descriptor. */
1373 uint32_t *desc = descs->list + slot*4;
1374 desc[0] = va;
1375 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1376 S_008F04_STRIDE(stride) |
1377 S_008F04_SWIZZLE_ENABLE(swizzle);
1378 desc[2] = num_records;
1379 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1380 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1381 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1382 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1383 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1384 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1385 S_008F0C_INDEX_STRIDE(index_stride) |
1386 S_008F0C_ADD_TID_ENABLE(add_tid);
1387
1388 if (sctx->b.chip_class >= GFX9)
1389 assert(!swizzle || element_size == 1); /* always 4 bytes on GFX9 */
1390 else
1391 desc[3] |= S_008F0C_ELEMENT_SIZE(element_size);
1392
1393 pipe_resource_reference(&buffers->buffers[slot], buffer);
1394 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1395 (struct r600_resource*)buffer,
1396 buffers->shader_usage, buffers->priority);
1397 buffers->enabled_mask |= 1u << slot;
1398 } else {
1399 /* Clear the descriptor. */
1400 memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
1401 buffers->enabled_mask &= ~(1u << slot);
1402 }
1403
1404 descs->dirty_mask |= 1u << slot;
1405 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
1406 }
1407
1408 /* STREAMOUT BUFFERS */
1409
1410 static void si_set_streamout_targets(struct pipe_context *ctx,
1411 unsigned num_targets,
1412 struct pipe_stream_output_target **targets,
1413 const unsigned *offsets)
1414 {
1415 struct si_context *sctx = (struct si_context *)ctx;
1416 struct si_buffer_resources *buffers = &sctx->rw_buffers;
1417 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1418 unsigned old_num_targets = sctx->b.streamout.num_targets;
1419 unsigned i, bufidx;
1420
1421 /* We are going to unbind the buffers. Mark which caches need to be flushed. */
1422 if (sctx->b.streamout.num_targets && sctx->b.streamout.begin_emitted) {
1423 /* Since streamout uses vector writes which go through TC L2
1424 * and most other clients can use TC L2 as well, we don't need
1425 * to flush it.
1426 *
1427 * The only cases which requires flushing it is VGT DMA index
1428 * fetching (on <= CIK) and indirect draw data, which are rare
1429 * cases. Thus, flag the TC L2 dirtiness in the resource and
1430 * handle it at draw call time.
1431 */
1432 for (i = 0; i < sctx->b.streamout.num_targets; i++)
1433 if (sctx->b.streamout.targets[i])
1434 r600_resource(sctx->b.streamout.targets[i]->b.buffer)->TC_L2_dirty = true;
1435
1436 /* Invalidate the scalar cache in case a streamout buffer is
1437 * going to be used as a constant buffer.
1438 *
1439 * Invalidate TC L1, because streamout bypasses it (done by
1440 * setting GLC=1 in the store instruction), but it can contain
1441 * outdated data of streamout buffers.
1442 *
1443 * VS_PARTIAL_FLUSH is required if the buffers are going to be
1444 * used as an input immediately.
1445 */
1446 sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
1447 SI_CONTEXT_INV_VMEM_L1 |
1448 SI_CONTEXT_VS_PARTIAL_FLUSH;
1449 }
1450
1451 /* All readers of the streamout targets need to be finished before we can
1452 * start writing to the targets.
1453 */
1454 if (num_targets)
1455 sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
1456 SI_CONTEXT_CS_PARTIAL_FLUSH;
1457
1458 /* Streamout buffers must be bound in 2 places:
1459 * 1) in VGT by setting the VGT_STRMOUT registers
1460 * 2) as shader resources
1461 */
1462
1463 /* Set the VGT regs. */
1464 r600_set_streamout_targets(ctx, num_targets, targets, offsets);
1465
1466 /* Set the shader resources.*/
1467 for (i = 0; i < num_targets; i++) {
1468 bufidx = SI_VS_STREAMOUT_BUF0 + i;
1469
1470 if (targets[i]) {
1471 struct pipe_resource *buffer = targets[i]->buffer;
1472 uint64_t va = r600_resource(buffer)->gpu_address;
1473
1474 /* Set the descriptor.
1475 *
1476 * On VI, the format must be non-INVALID, otherwise
1477 * the buffer will be considered not bound and store
1478 * instructions will be no-ops.
1479 */
1480 uint32_t *desc = descs->list + bufidx*4;
1481 desc[0] = va;
1482 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
1483 desc[2] = 0xffffffff;
1484 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1485 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1486 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1487 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1488 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1489
1490 /* Set the resource. */
1491 pipe_resource_reference(&buffers->buffers[bufidx],
1492 buffer);
1493 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1494 (struct r600_resource*)buffer,
1495 buffers->shader_usage,
1496 RADEON_PRIO_SHADER_RW_BUFFER,
1497 true);
1498 r600_resource(buffer)->bind_history |= PIPE_BIND_STREAM_OUTPUT;
1499
1500 buffers->enabled_mask |= 1u << bufidx;
1501 } else {
1502 /* Clear the descriptor and unset the resource. */
1503 memset(descs->list + bufidx*4, 0,
1504 sizeof(uint32_t) * 4);
1505 pipe_resource_reference(&buffers->buffers[bufidx],
1506 NULL);
1507 buffers->enabled_mask &= ~(1u << bufidx);
1508 }
1509 descs->dirty_mask |= 1u << bufidx;
1510 }
1511 for (; i < old_num_targets; i++) {
1512 bufidx = SI_VS_STREAMOUT_BUF0 + i;
1513 /* Clear the descriptor and unset the resource. */
1514 memset(descs->list + bufidx*4, 0, sizeof(uint32_t) * 4);
1515 pipe_resource_reference(&buffers->buffers[bufidx], NULL);
1516 buffers->enabled_mask &= ~(1u << bufidx);
1517 descs->dirty_mask |= 1u << bufidx;
1518 }
1519
1520 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
1521 }
1522
1523 static void si_desc_reset_buffer_offset(struct pipe_context *ctx,
1524 uint32_t *desc, uint64_t old_buf_va,
1525 struct pipe_resource *new_buf)
1526 {
1527 /* Retrieve the buffer offset from the descriptor. */
1528 uint64_t old_desc_va =
1529 desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32);
1530
1531 assert(old_buf_va <= old_desc_va);
1532 uint64_t offset_within_buffer = old_desc_va - old_buf_va;
1533
1534 /* Update the descriptor. */
1535 si_set_buf_desc_address(r600_resource(new_buf), offset_within_buffer,
1536 desc);
1537 }
1538
1539 /* INTERNAL CONST BUFFERS */
1540
1541 static void si_set_polygon_stipple(struct pipe_context *ctx,
1542 const struct pipe_poly_stipple *state)
1543 {
1544 struct si_context *sctx = (struct si_context *)ctx;
1545 struct pipe_constant_buffer cb = {};
1546 unsigned stipple[32];
1547 int i;
1548
1549 for (i = 0; i < 32; i++)
1550 stipple[i] = util_bitreverse(state->stipple[i]);
1551
1552 cb.user_buffer = stipple;
1553 cb.buffer_size = sizeof(stipple);
1554
1555 si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &cb);
1556 }
1557
1558 /* TEXTURE METADATA ENABLE/DISABLE */
1559
1560 /* CMASK can be enabled (for fast clear) and disabled (for texture export)
1561 * while the texture is bound, possibly by a different context. In that case,
1562 * call this function to update compressed_colortex_masks.
1563 */
1564 void si_update_compressed_colortex_masks(struct si_context *sctx)
1565 {
1566 for (int i = 0; i < SI_NUM_SHADERS; ++i) {
1567 si_samplers_update_compressed_colortex_mask(&sctx->samplers[i]);
1568 si_images_update_compressed_colortex_mask(&sctx->images[i]);
1569 si_update_compressed_tex_shader_mask(sctx, i);
1570 }
1571 }
1572
1573 /* BUFFER DISCARD/INVALIDATION */
1574
1575 /** Reset descriptors of buffer resources after \p buf has been invalidated. */
1576 static void si_reset_buffer_resources(struct si_context *sctx,
1577 struct si_buffer_resources *buffers,
1578 unsigned descriptors_idx,
1579 struct pipe_resource *buf,
1580 uint64_t old_va)
1581 {
1582 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1583 unsigned mask = buffers->enabled_mask;
1584
1585 while (mask) {
1586 unsigned i = u_bit_scan(&mask);
1587 if (buffers->buffers[i] == buf) {
1588 si_desc_reset_buffer_offset(&sctx->b.b,
1589 descs->list + i*4,
1590 old_va, buf);
1591 descs->dirty_mask |= 1u << i;
1592 sctx->descriptors_dirty |= 1u << descriptors_idx;
1593
1594 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1595 (struct r600_resource *)buf,
1596 buffers->shader_usage,
1597 buffers->priority, true);
1598 }
1599 }
1600 }
1601
1602 /* Reallocate a buffer a update all resource bindings where the buffer is
1603 * bound.
1604 *
1605 * This is used to avoid CPU-GPU synchronizations, because it makes the buffer
1606 * idle by discarding its contents. Apps usually tell us when to do this using
1607 * map_buffer flags, for example.
1608 */
1609 static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf)
1610 {
1611 struct si_context *sctx = (struct si_context*)ctx;
1612 struct r600_resource *rbuffer = r600_resource(buf);
1613 unsigned i, shader;
1614 uint64_t old_va = rbuffer->gpu_address;
1615 unsigned num_elems = sctx->vertex_elements ?
1616 sctx->vertex_elements->count : 0;
1617
1618 /* Reallocate the buffer in the same pipe_resource. */
1619 r600_alloc_resource(&sctx->screen->b, rbuffer);
1620
1621 /* We changed the buffer, now we need to bind it where the old one
1622 * was bound. This consists of 2 things:
1623 * 1) Updating the resource descriptor and dirtying it.
1624 * 2) Adding a relocation to the CS, so that it's usable.
1625 */
1626
1627 /* Vertex buffers. */
1628 if (rbuffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
1629 for (i = 0; i < num_elems; i++) {
1630 int vb = sctx->vertex_elements->elements[i].vertex_buffer_index;
1631
1632 if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
1633 continue;
1634 if (!sctx->vertex_buffer[vb].buffer)
1635 continue;
1636
1637 if (sctx->vertex_buffer[vb].buffer == buf) {
1638 sctx->vertex_buffers_dirty = true;
1639 break;
1640 }
1641 }
1642 }
1643
1644 /* Streamout buffers. (other internal buffers can't be invalidated) */
1645 if (rbuffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
1646 for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
1647 struct si_buffer_resources *buffers = &sctx->rw_buffers;
1648 struct si_descriptors *descs =
1649 &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1650
1651 if (buffers->buffers[i] != buf)
1652 continue;
1653
1654 si_desc_reset_buffer_offset(ctx, descs->list + i*4,
1655 old_va, buf);
1656 descs->dirty_mask |= 1u << i;
1657 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
1658
1659 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1660 rbuffer, buffers->shader_usage,
1661 RADEON_PRIO_SHADER_RW_BUFFER,
1662 true);
1663
1664 /* Update the streamout state. */
1665 if (sctx->b.streamout.begin_emitted)
1666 r600_emit_streamout_end(&sctx->b);
1667 sctx->b.streamout.append_bitmask =
1668 sctx->b.streamout.enabled_mask;
1669 r600_streamout_buffers_dirty(&sctx->b);
1670 }
1671 }
1672
1673 /* Constant and shader buffers. */
1674 if (rbuffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
1675 for (shader = 0; shader < SI_NUM_SHADERS; shader++)
1676 si_reset_buffer_resources(sctx, &sctx->const_buffers[shader],
1677 si_const_buffer_descriptors_idx(shader),
1678 buf, old_va);
1679 }
1680
1681 if (rbuffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
1682 for (shader = 0; shader < SI_NUM_SHADERS; shader++)
1683 si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader],
1684 si_shader_buffer_descriptors_idx(shader),
1685 buf, old_va);
1686 }
1687
1688 if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
1689 /* Texture buffers - update bindings. */
1690 for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
1691 struct si_sampler_views *views = &sctx->samplers[shader].views;
1692 struct si_descriptors *descs =
1693 si_sampler_descriptors(sctx, shader);
1694 unsigned mask = views->enabled_mask;
1695
1696 while (mask) {
1697 unsigned i = u_bit_scan(&mask);
1698 if (views->views[i]->texture == buf) {
1699 si_desc_reset_buffer_offset(ctx,
1700 descs->list +
1701 i * 16 + 4,
1702 old_va, buf);
1703 descs->dirty_mask |= 1u << i;
1704 sctx->descriptors_dirty |=
1705 1u << si_sampler_descriptors_idx(shader);
1706
1707 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1708 rbuffer, RADEON_USAGE_READ,
1709 RADEON_PRIO_SAMPLER_BUFFER,
1710 true);
1711 }
1712 }
1713 }
1714 }
1715
1716 /* Shader images */
1717 if (rbuffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
1718 for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
1719 struct si_images_info *images = &sctx->images[shader];
1720 struct si_descriptors *descs =
1721 si_image_descriptors(sctx, shader);
1722 unsigned mask = images->enabled_mask;
1723
1724 while (mask) {
1725 unsigned i = u_bit_scan(&mask);
1726
1727 if (images->views[i].resource == buf) {
1728 if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
1729 si_mark_image_range_valid(&images->views[i]);
1730
1731 si_desc_reset_buffer_offset(
1732 ctx, descs->list + i * 8 + 4,
1733 old_va, buf);
1734 descs->dirty_mask |= 1u << i;
1735 sctx->descriptors_dirty |=
1736 1u << si_image_descriptors_idx(shader);
1737
1738 radeon_add_to_buffer_list_check_mem(
1739 &sctx->b, &sctx->b.gfx, rbuffer,
1740 RADEON_USAGE_READWRITE,
1741 RADEON_PRIO_SAMPLER_BUFFER, true);
1742 }
1743 }
1744 }
1745 }
1746 }
1747
1748 /* Update mutable image descriptor fields of all bound textures. */
1749 void si_update_all_texture_descriptors(struct si_context *sctx)
1750 {
1751 unsigned shader;
1752
1753 for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
1754 struct si_sampler_views *samplers = &sctx->samplers[shader].views;
1755 struct si_images_info *images = &sctx->images[shader];
1756 unsigned mask;
1757
1758 /* Images. */
1759 mask = images->enabled_mask;
1760 while (mask) {
1761 unsigned i = u_bit_scan(&mask);
1762 struct pipe_image_view *view = &images->views[i];
1763
1764 if (!view->resource ||
1765 view->resource->target == PIPE_BUFFER)
1766 continue;
1767
1768 si_set_shader_image(sctx, shader, i, view, true);
1769 }
1770
1771 /* Sampler views. */
1772 mask = samplers->enabled_mask;
1773 while (mask) {
1774 unsigned i = u_bit_scan(&mask);
1775 struct pipe_sampler_view *view = samplers->views[i];
1776
1777 if (!view ||
1778 !view->texture ||
1779 view->texture->target == PIPE_BUFFER)
1780 continue;
1781
1782 si_set_sampler_view(sctx, shader, i,
1783 samplers->views[i], true);
1784 }
1785
1786 si_update_compressed_tex_shader_mask(sctx, shader);
1787 }
1788 }
1789
1790 /* SHADER USER DATA */
1791
1792 static void si_mark_shader_pointers_dirty(struct si_context *sctx,
1793 unsigned shader)
1794 {
1795 sctx->shader_pointers_dirty |=
1796 u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS,
1797 SI_NUM_SHADER_DESCS);
1798
1799 if (shader == PIPE_SHADER_VERTEX)
1800 sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
1801
1802 si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
1803 }
1804
1805 static void si_shader_userdata_begin_new_cs(struct si_context *sctx)
1806 {
1807 sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
1808 sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
1809 si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
1810 }
1811
1812 /* Set a base register address for user data constants in the given shader.
1813 * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*.
1814 */
1815 static void si_set_user_data_base(struct si_context *sctx,
1816 unsigned shader, uint32_t new_base)
1817 {
1818 uint32_t *base = &sctx->shader_userdata.sh_base[shader];
1819
1820 if (*base != new_base) {
1821 *base = new_base;
1822
1823 if (new_base)
1824 si_mark_shader_pointers_dirty(sctx, shader);
1825 }
1826 }
1827
1828 /* This must be called when these shaders are changed from non-NULL to NULL
1829 * and vice versa:
1830 * - geometry shader
1831 * - tessellation control shader
1832 * - tessellation evaluation shader
1833 */
1834 void si_shader_change_notify(struct si_context *sctx)
1835 {
1836 /* VS can be bound as VS, ES, or LS. */
1837 if (sctx->tes_shader.cso) {
1838 if (sctx->b.chip_class >= GFX9) {
1839 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1840 R_00B430_SPI_SHADER_USER_DATA_LS_0);
1841 } else {
1842 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1843 R_00B530_SPI_SHADER_USER_DATA_LS_0);
1844 }
1845 } else if (sctx->gs_shader.cso) {
1846 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1847 R_00B330_SPI_SHADER_USER_DATA_ES_0);
1848 } else {
1849 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1850 R_00B130_SPI_SHADER_USER_DATA_VS_0);
1851 }
1852
1853 /* TES can be bound as ES, VS, or not bound. */
1854 if (sctx->tes_shader.cso) {
1855 if (sctx->gs_shader.cso)
1856 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
1857 R_00B330_SPI_SHADER_USER_DATA_ES_0);
1858 else
1859 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
1860 R_00B130_SPI_SHADER_USER_DATA_VS_0);
1861 } else {
1862 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0);
1863 }
1864 }
1865
1866 static void si_emit_shader_pointer(struct si_context *sctx,
1867 struct si_descriptors *desc,
1868 unsigned sh_base)
1869 {
1870 struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
1871 uint64_t va;
1872
1873 assert(desc->buffer);
1874
1875 va = desc->buffer->gpu_address +
1876 desc->buffer_offset;
1877
1878 radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
1879 radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2);
1880 radeon_emit(cs, va);
1881 radeon_emit(cs, va >> 32);
1882 }
1883
1884 void si_emit_graphics_shader_userdata(struct si_context *sctx,
1885 struct r600_atom *atom)
1886 {
1887 unsigned mask;
1888 uint32_t *sh_base = sctx->shader_userdata.sh_base;
1889 struct si_descriptors *descs;
1890
1891 descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1892
1893 if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) {
1894 si_emit_shader_pointer(sctx, descs,
1895 R_00B030_SPI_SHADER_USER_DATA_PS_0);
1896 si_emit_shader_pointer(sctx, descs,
1897 R_00B130_SPI_SHADER_USER_DATA_VS_0);
1898 si_emit_shader_pointer(sctx, descs,
1899 R_00B330_SPI_SHADER_USER_DATA_ES_0);
1900
1901 /* GFX9 merged LS-HS and ES-GS. Only set RW_BUFFERS for ES and LS. */
1902 if (sctx->b.chip_class >= GFX9) {
1903 si_emit_shader_pointer(sctx, descs,
1904 R_00B430_SPI_SHADER_USER_DATA_LS_0);
1905 } else {
1906 si_emit_shader_pointer(sctx, descs,
1907 R_00B230_SPI_SHADER_USER_DATA_GS_0);
1908 si_emit_shader_pointer(sctx, descs,
1909 R_00B430_SPI_SHADER_USER_DATA_HS_0);
1910 }
1911 }
1912
1913 mask = sctx->shader_pointers_dirty &
1914 u_bit_consecutive(SI_DESCS_FIRST_SHADER,
1915 SI_DESCS_FIRST_COMPUTE - SI_DESCS_FIRST_SHADER);
1916
1917 while (mask) {
1918 unsigned i = u_bit_scan(&mask);
1919 unsigned shader = (i - SI_DESCS_FIRST_SHADER) / SI_NUM_SHADER_DESCS;
1920 unsigned base = sh_base[shader];
1921
1922 if (base)
1923 si_emit_shader_pointer(sctx, descs + i, base);
1924 }
1925 sctx->shader_pointers_dirty &=
1926 ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
1927
1928 if (sctx->vertex_buffer_pointer_dirty) {
1929 si_emit_shader_pointer(sctx, &sctx->vertex_buffers,
1930 sh_base[PIPE_SHADER_VERTEX]);
1931 sctx->vertex_buffer_pointer_dirty = false;
1932 }
1933 }
1934
1935 void si_emit_compute_shader_userdata(struct si_context *sctx)
1936 {
1937 unsigned base = R_00B900_COMPUTE_USER_DATA_0;
1938 struct si_descriptors *descs = sctx->descriptors;
1939 unsigned compute_mask =
1940 u_bit_consecutive(SI_DESCS_FIRST_COMPUTE, SI_NUM_SHADER_DESCS);
1941 unsigned mask = sctx->shader_pointers_dirty & compute_mask;
1942
1943 while (mask) {
1944 unsigned i = u_bit_scan(&mask);
1945
1946 si_emit_shader_pointer(sctx, descs + i, base);
1947 }
1948 sctx->shader_pointers_dirty &= ~compute_mask;
1949 }
1950
1951 /* INIT/DEINIT/UPLOAD */
1952
1953 void si_init_all_descriptors(struct si_context *sctx)
1954 {
1955 int i;
1956 unsigned ce_offset = 0;
1957
1958 for (i = 0; i < SI_NUM_SHADERS; i++) {
1959 /* GFX9 has only 4KB of CE, while previous chips had 32KB.
1960 * Rarely used descriptors don't use CE RAM.
1961 */
1962 bool big_ce = sctx->b.chip_class <= VI;
1963 bool images_use_ce = big_ce;
1964 bool shaderbufs_use_ce = big_ce ||
1965 i == PIPE_SHADER_COMPUTE;
1966 bool samplers_use_ce = big_ce ||
1967 i == PIPE_SHADER_FRAGMENT;
1968
1969 si_init_buffer_resources(&sctx->const_buffers[i],
1970 si_const_buffer_descriptors(sctx, i),
1971 SI_NUM_CONST_BUFFERS, SI_SGPR_CONST_BUFFERS,
1972 RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER,
1973 &ce_offset);
1974 si_init_buffer_resources(&sctx->shader_buffers[i],
1975 si_shader_buffer_descriptors(sctx, i),
1976 SI_NUM_SHADER_BUFFERS, SI_SGPR_SHADER_BUFFERS,
1977 RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER,
1978 shaderbufs_use_ce ? &ce_offset : NULL);
1979
1980 si_init_descriptors(si_sampler_descriptors(sctx, i),
1981 SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS,
1982 null_texture_descriptor,
1983 samplers_use_ce ? &ce_offset : NULL);
1984
1985 si_init_descriptors(si_image_descriptors(sctx, i),
1986 SI_SGPR_IMAGES, 8, SI_NUM_IMAGES,
1987 null_image_descriptor,
1988 images_use_ce ? &ce_offset : NULL);
1989 }
1990
1991 si_init_buffer_resources(&sctx->rw_buffers,
1992 &sctx->descriptors[SI_DESCS_RW_BUFFERS],
1993 SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
1994 RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS,
1995 &ce_offset);
1996 si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
1997 4, SI_NUM_VERTEX_BUFFERS, NULL, NULL);
1998
1999 sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
2000
2001 if (sctx->b.chip_class >= GFX9)
2002 assert(ce_offset <= 4096);
2003 else
2004 assert(ce_offset <= 32768);
2005
2006 /* Set pipe_context functions. */
2007 sctx->b.b.bind_sampler_states = si_bind_sampler_states;
2008 sctx->b.b.set_shader_images = si_set_shader_images;
2009 sctx->b.b.set_constant_buffer = si_pipe_set_constant_buffer;
2010 sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
2011 sctx->b.b.set_shader_buffers = si_set_shader_buffers;
2012 sctx->b.b.set_sampler_views = si_set_sampler_views;
2013 sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
2014 sctx->b.invalidate_buffer = si_invalidate_buffer;
2015
2016 /* Shader user data. */
2017 si_init_atom(sctx, &sctx->shader_userdata.atom, &sctx->atoms.s.shader_userdata,
2018 si_emit_graphics_shader_userdata);
2019
2020 /* Set default and immutable mappings. */
2021 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
2022
2023 if (sctx->b.chip_class >= GFX9) {
2024 si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
2025 R_00B430_SPI_SHADER_USER_DATA_LS_0);
2026 si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
2027 R_00B330_SPI_SHADER_USER_DATA_ES_0);
2028 } else {
2029 si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
2030 R_00B430_SPI_SHADER_USER_DATA_HS_0);
2031 si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
2032 R_00B230_SPI_SHADER_USER_DATA_GS_0);
2033 }
2034 si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
2035 }
2036
2037 bool si_upload_graphics_shader_descriptors(struct si_context *sctx)
2038 {
2039 const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE);
2040 unsigned dirty = sctx->descriptors_dirty & mask;
2041
2042 /* Assume nothing will go wrong: */
2043 sctx->shader_pointers_dirty |= dirty;
2044
2045 while (dirty) {
2046 unsigned i = u_bit_scan(&dirty);
2047
2048 if (!si_upload_descriptors(sctx, &sctx->descriptors[i],
2049 &sctx->shader_userdata.atom))
2050 return false;
2051 }
2052
2053 sctx->descriptors_dirty &= ~mask;
2054 return true;
2055 }
2056
2057 bool si_upload_compute_shader_descriptors(struct si_context *sctx)
2058 {
2059 /* Does not update rw_buffers as that is not needed for compute shaders
2060 * and the input buffer is using the same SGPR's anyway.
2061 */
2062 const unsigned mask = u_bit_consecutive(SI_DESCS_FIRST_COMPUTE,
2063 SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE);
2064 unsigned dirty = sctx->descriptors_dirty & mask;
2065
2066 /* Assume nothing will go wrong: */
2067 sctx->shader_pointers_dirty |= dirty;
2068
2069 while (dirty) {
2070 unsigned i = u_bit_scan(&dirty);
2071
2072 if (!si_upload_descriptors(sctx, &sctx->descriptors[i], NULL))
2073 return false;
2074 }
2075
2076 sctx->descriptors_dirty &= ~mask;
2077
2078 return true;
2079 }
2080
2081 void si_release_all_descriptors(struct si_context *sctx)
2082 {
2083 int i;
2084
2085 for (i = 0; i < SI_NUM_SHADERS; i++) {
2086 si_release_buffer_resources(&sctx->const_buffers[i],
2087 si_const_buffer_descriptors(sctx, i));
2088 si_release_buffer_resources(&sctx->shader_buffers[i],
2089 si_shader_buffer_descriptors(sctx, i));
2090 si_release_sampler_views(&sctx->samplers[i].views);
2091 si_release_image_views(&sctx->images[i]);
2092 }
2093 si_release_buffer_resources(&sctx->rw_buffers,
2094 &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
2095
2096 for (i = 0; i < SI_NUM_DESCS; ++i)
2097 si_release_descriptors(&sctx->descriptors[i]);
2098 si_release_descriptors(&sctx->vertex_buffers);
2099 }
2100
2101 void si_all_descriptors_begin_new_cs(struct si_context *sctx)
2102 {
2103 int i;
2104
2105 for (i = 0; i < SI_NUM_SHADERS; i++) {
2106 si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
2107 si_buffer_resources_begin_new_cs(sctx, &sctx->shader_buffers[i]);
2108 si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
2109 si_image_views_begin_new_cs(sctx, &sctx->images[i]);
2110 }
2111 si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers);
2112 si_vertex_buffers_begin_new_cs(sctx);
2113
2114 for (i = 0; i < SI_NUM_DESCS; ++i)
2115 si_descriptors_begin_new_cs(sctx, &sctx->descriptors[i]);
2116
2117 si_shader_userdata_begin_new_cs(sctx);
2118 }