radeonsi: initialize atom IDs
[mesa.git] / src / gallium / drivers / radeonsi / si_state.c
1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Christian König <christian.koenig@amd.com>
25 */
26
27 #include "si_pipe.h"
28 #include "si_shader.h"
29 #include "sid.h"
30 #include "radeon/r600_cs.h"
31
32 #include "util/u_dual_blend.h"
33 #include "util/u_format.h"
34 #include "util/u_format_s3tc.h"
35 #include "util/u_memory.h"
36 #include "util/u_pstipple.h"
37
38 static void si_init_atom(struct si_context *sctx,
39 struct r600_atom *atom, struct r600_atom **list_elem,
40 void (*emit_func)(struct si_context *ctx, struct r600_atom *state),
41 unsigned num_dw)
42 {
43 atom->emit = (void*)emit_func;
44 atom->num_dw = num_dw;
45 atom->dirty = false;
46 atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */
47 *list_elem = atom;
48 }
49
50 unsigned si_array_mode(unsigned mode)
51 {
52 switch (mode) {
53 case RADEON_SURF_MODE_LINEAR_ALIGNED:
54 return V_009910_ARRAY_LINEAR_ALIGNED;
55 case RADEON_SURF_MODE_1D:
56 return V_009910_ARRAY_1D_TILED_THIN1;
57 case RADEON_SURF_MODE_2D:
58 return V_009910_ARRAY_2D_TILED_THIN1;
59 default:
60 case RADEON_SURF_MODE_LINEAR:
61 return V_009910_ARRAY_LINEAR_GENERAL;
62 }
63 }
64
65 uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex)
66 {
67 if (sscreen->b.chip_class >= CIK &&
68 sscreen->b.info.cik_macrotile_mode_array_valid) {
69 unsigned index, tileb;
70
71 tileb = 8 * 8 * tex->surface.bpe;
72 tileb = MIN2(tex->surface.tile_split, tileb);
73
74 for (index = 0; tileb > 64; index++) {
75 tileb >>= 1;
76 }
77 assert(index < 16);
78
79 return (sscreen->b.info.cik_macrotile_mode_array[index] >> 6) & 0x3;
80 }
81
82 if (sscreen->b.chip_class == SI &&
83 sscreen->b.info.si_tile_mode_array_valid) {
84 /* Don't use stencil_tiling_index, because num_banks is always
85 * read from the depth mode. */
86 unsigned tile_mode_index = tex->surface.tiling_index[0];
87 assert(tile_mode_index < 32);
88
89 return G_009910_NUM_BANKS(sscreen->b.info.si_tile_mode_array[tile_mode_index]);
90 }
91
92 /* The old way. */
93 switch (sscreen->b.tiling_info.num_banks) {
94 case 2:
95 return V_02803C_ADDR_SURF_2_BANK;
96 case 4:
97 return V_02803C_ADDR_SURF_4_BANK;
98 case 8:
99 default:
100 return V_02803C_ADDR_SURF_8_BANK;
101 case 16:
102 return V_02803C_ADDR_SURF_16_BANK;
103 }
104 }
105
106 unsigned cik_tile_split(unsigned tile_split)
107 {
108 switch (tile_split) {
109 case 64:
110 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_64B;
111 break;
112 case 128:
113 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_128B;
114 break;
115 case 256:
116 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_256B;
117 break;
118 case 512:
119 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_512B;
120 break;
121 default:
122 case 1024:
123 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_1KB;
124 break;
125 case 2048:
126 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_2KB;
127 break;
128 case 4096:
129 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_4KB;
130 break;
131 }
132 return tile_split;
133 }
134
135 unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect)
136 {
137 switch (macro_tile_aspect) {
138 default:
139 case 1:
140 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_1;
141 break;
142 case 2:
143 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_2;
144 break;
145 case 4:
146 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_4;
147 break;
148 case 8:
149 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_8;
150 break;
151 }
152 return macro_tile_aspect;
153 }
154
155 unsigned cik_bank_wh(unsigned bankwh)
156 {
157 switch (bankwh) {
158 default:
159 case 1:
160 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_1;
161 break;
162 case 2:
163 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_2;
164 break;
165 case 4:
166 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_4;
167 break;
168 case 8:
169 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_8;
170 break;
171 }
172 return bankwh;
173 }
174
175 unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode)
176 {
177 if (sscreen->b.info.si_tile_mode_array_valid) {
178 uint32_t gb_tile_mode = sscreen->b.info.si_tile_mode_array[tile_mode];
179
180 return G_009910_PIPE_CONFIG(gb_tile_mode);
181 }
182
183 /* This is probably broken for a lot of chips, but it's only used
184 * if the kernel cannot return the tile mode array for CIK. */
185 switch (sscreen->b.info.r600_num_tile_pipes) {
186 case 16:
187 return V_02803C_X_ADDR_SURF_P16_32X32_16X16;
188 case 8:
189 return V_02803C_X_ADDR_SURF_P8_32X32_16X16;
190 case 4:
191 default:
192 if (sscreen->b.info.r600_num_backends == 4)
193 return V_02803C_X_ADDR_SURF_P4_16X16;
194 else
195 return V_02803C_X_ADDR_SURF_P4_8X16;
196 case 2:
197 return V_02803C_ADDR_SURF_P2;
198 }
199 }
200
201 static unsigned si_map_swizzle(unsigned swizzle)
202 {
203 switch (swizzle) {
204 case UTIL_FORMAT_SWIZZLE_Y:
205 return V_008F0C_SQ_SEL_Y;
206 case UTIL_FORMAT_SWIZZLE_Z:
207 return V_008F0C_SQ_SEL_Z;
208 case UTIL_FORMAT_SWIZZLE_W:
209 return V_008F0C_SQ_SEL_W;
210 case UTIL_FORMAT_SWIZZLE_0:
211 return V_008F0C_SQ_SEL_0;
212 case UTIL_FORMAT_SWIZZLE_1:
213 return V_008F0C_SQ_SEL_1;
214 default: /* UTIL_FORMAT_SWIZZLE_X */
215 return V_008F0C_SQ_SEL_X;
216 }
217 }
218
219 static uint32_t S_FIXED(float value, uint32_t frac_bits)
220 {
221 return value * (1 << frac_bits);
222 }
223
224 /* 12.4 fixed-point */
225 static unsigned si_pack_float_12p4(float x)
226 {
227 return x <= 0 ? 0 :
228 x >= 4096 ? 0xffff : x * 16;
229 }
230
231 /*
232 * Inferred framebuffer and blender state.
233 *
234 * One of the reasons this must be derived from the framebuffer state is that:
235 * - The blend state mask is 0xf most of the time.
236 * - The COLOR1 format isn't INVALID because of possible dual-source blending,
237 * so COLOR1 is enabled pretty much all the time.
238 * So CB_TARGET_MASK is the only register that can disable COLOR1.
239 *
240 * Another reason is to avoid a hang with dual source blending.
241 */
242 void si_update_fb_blend_state(struct si_context *sctx)
243 {
244 struct si_pm4_state *pm4;
245 struct si_state_blend *blend = sctx->queued.named.blend;
246 uint32_t mask = 0, i;
247
248 if (blend == NULL)
249 return;
250
251 pm4 = CALLOC_STRUCT(si_pm4_state);
252 if (pm4 == NULL)
253 return;
254
255 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++)
256 if (sctx->framebuffer.state.cbufs[i])
257 mask |= 0xf << (4*i);
258 mask &= blend->cb_target_mask;
259
260 /* Avoid a hang that happens when dual source blending is enabled
261 * but there is not enough color outputs. This is undefined behavior,
262 * so disable color writes completely.
263 *
264 * Reproducible with Unigine Heaven 4.0 and drirc missing.
265 */
266 if (blend->dual_src_blend &&
267 (sctx->ps_shader->ps_colors_written & 0x3) != 0x3)
268 mask = 0;
269
270 si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
271 si_pm4_set_state(sctx, fb_blend, pm4);
272 }
273
274 /*
275 * Blender functions
276 */
277
278 static uint32_t si_translate_blend_function(int blend_func)
279 {
280 switch (blend_func) {
281 case PIPE_BLEND_ADD:
282 return V_028780_COMB_DST_PLUS_SRC;
283 case PIPE_BLEND_SUBTRACT:
284 return V_028780_COMB_SRC_MINUS_DST;
285 case PIPE_BLEND_REVERSE_SUBTRACT:
286 return V_028780_COMB_DST_MINUS_SRC;
287 case PIPE_BLEND_MIN:
288 return V_028780_COMB_MIN_DST_SRC;
289 case PIPE_BLEND_MAX:
290 return V_028780_COMB_MAX_DST_SRC;
291 default:
292 R600_ERR("Unknown blend function %d\n", blend_func);
293 assert(0);
294 break;
295 }
296 return 0;
297 }
298
299 static uint32_t si_translate_blend_factor(int blend_fact)
300 {
301 switch (blend_fact) {
302 case PIPE_BLENDFACTOR_ONE:
303 return V_028780_BLEND_ONE;
304 case PIPE_BLENDFACTOR_SRC_COLOR:
305 return V_028780_BLEND_SRC_COLOR;
306 case PIPE_BLENDFACTOR_SRC_ALPHA:
307 return V_028780_BLEND_SRC_ALPHA;
308 case PIPE_BLENDFACTOR_DST_ALPHA:
309 return V_028780_BLEND_DST_ALPHA;
310 case PIPE_BLENDFACTOR_DST_COLOR:
311 return V_028780_BLEND_DST_COLOR;
312 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
313 return V_028780_BLEND_SRC_ALPHA_SATURATE;
314 case PIPE_BLENDFACTOR_CONST_COLOR:
315 return V_028780_BLEND_CONSTANT_COLOR;
316 case PIPE_BLENDFACTOR_CONST_ALPHA:
317 return V_028780_BLEND_CONSTANT_ALPHA;
318 case PIPE_BLENDFACTOR_ZERO:
319 return V_028780_BLEND_ZERO;
320 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
321 return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
322 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
323 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
324 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
325 return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
326 case PIPE_BLENDFACTOR_INV_DST_COLOR:
327 return V_028780_BLEND_ONE_MINUS_DST_COLOR;
328 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
329 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
330 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
331 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
332 case PIPE_BLENDFACTOR_SRC1_COLOR:
333 return V_028780_BLEND_SRC1_COLOR;
334 case PIPE_BLENDFACTOR_SRC1_ALPHA:
335 return V_028780_BLEND_SRC1_ALPHA;
336 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
337 return V_028780_BLEND_INV_SRC1_COLOR;
338 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
339 return V_028780_BLEND_INV_SRC1_ALPHA;
340 default:
341 R600_ERR("Bad blend factor %d not supported!\n", blend_fact);
342 assert(0);
343 break;
344 }
345 return 0;
346 }
347
348 static void *si_create_blend_state_mode(struct pipe_context *ctx,
349 const struct pipe_blend_state *state,
350 unsigned mode)
351 {
352 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
353 struct si_pm4_state *pm4 = &blend->pm4;
354
355 uint32_t color_control = 0;
356
357 if (blend == NULL)
358 return NULL;
359
360 blend->alpha_to_one = state->alpha_to_one;
361 blend->dual_src_blend = util_blend_state_is_dual(state, 0);
362
363 if (state->logicop_enable) {
364 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
365 } else {
366 color_control |= S_028808_ROP3(0xcc);
367 }
368
369 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
370 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
371 S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
372 S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
373 S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
374 S_028B70_ALPHA_TO_MASK_OFFSET3(2));
375
376 blend->cb_target_mask = 0;
377 for (int i = 0; i < 8; i++) {
378 /* state->rt entries > 0 only written if independent blending */
379 const int j = state->independent_blend_enable ? i : 0;
380
381 unsigned eqRGB = state->rt[j].rgb_func;
382 unsigned srcRGB = state->rt[j].rgb_src_factor;
383 unsigned dstRGB = state->rt[j].rgb_dst_factor;
384 unsigned eqA = state->rt[j].alpha_func;
385 unsigned srcA = state->rt[j].alpha_src_factor;
386 unsigned dstA = state->rt[j].alpha_dst_factor;
387
388 unsigned blend_cntl = 0;
389
390 /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */
391 blend->cb_target_mask |= state->rt[j].colormask << (4 * i);
392
393 if (!state->rt[j].blend_enable) {
394 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
395 continue;
396 }
397
398 blend_cntl |= S_028780_ENABLE(1);
399 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
400 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
401 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
402
403 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
404 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
405 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
406 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
407 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
408 }
409 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
410 }
411
412 if (blend->cb_target_mask) {
413 color_control |= S_028808_MODE(mode);
414 } else {
415 color_control |= S_028808_MODE(V_028808_CB_DISABLE);
416 }
417 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
418
419 return blend;
420 }
421
422 static void *si_create_blend_state(struct pipe_context *ctx,
423 const struct pipe_blend_state *state)
424 {
425 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
426 }
427
428 static void si_bind_blend_state(struct pipe_context *ctx, void *state)
429 {
430 struct si_context *sctx = (struct si_context *)ctx;
431 si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
432 si_update_fb_blend_state(sctx);
433 }
434
435 static void si_delete_blend_state(struct pipe_context *ctx, void *state)
436 {
437 struct si_context *sctx = (struct si_context *)ctx;
438 si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
439 }
440
441 static void si_set_blend_color(struct pipe_context *ctx,
442 const struct pipe_blend_color *state)
443 {
444 struct si_context *sctx = (struct si_context *)ctx;
445 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
446
447 if (pm4 == NULL)
448 return;
449
450 si_pm4_set_reg(pm4, R_028414_CB_BLEND_RED, fui(state->color[0]));
451 si_pm4_set_reg(pm4, R_028418_CB_BLEND_GREEN, fui(state->color[1]));
452 si_pm4_set_reg(pm4, R_02841C_CB_BLEND_BLUE, fui(state->color[2]));
453 si_pm4_set_reg(pm4, R_028420_CB_BLEND_ALPHA, fui(state->color[3]));
454
455 si_pm4_set_state(sctx, blend_color, pm4);
456 }
457
458 /*
459 * Clipping, scissors and viewport
460 */
461
462 static void si_set_clip_state(struct pipe_context *ctx,
463 const struct pipe_clip_state *state)
464 {
465 struct si_context *sctx = (struct si_context *)ctx;
466 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
467 struct pipe_constant_buffer cb;
468
469 if (pm4 == NULL)
470 return;
471
472 for (int i = 0; i < 6; i++) {
473 si_pm4_set_reg(pm4, R_0285BC_PA_CL_UCP_0_X + i * 16,
474 fui(state->ucp[i][0]));
475 si_pm4_set_reg(pm4, R_0285C0_PA_CL_UCP_0_Y + i * 16,
476 fui(state->ucp[i][1]));
477 si_pm4_set_reg(pm4, R_0285C4_PA_CL_UCP_0_Z + i * 16,
478 fui(state->ucp[i][2]));
479 si_pm4_set_reg(pm4, R_0285C8_PA_CL_UCP_0_W + i * 16,
480 fui(state->ucp[i][3]));
481 }
482
483 cb.buffer = NULL;
484 cb.user_buffer = state->ucp;
485 cb.buffer_offset = 0;
486 cb.buffer_size = 4*4*8;
487 ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, SI_DRIVER_STATE_CONST_BUF, &cb);
488 pipe_resource_reference(&cb.buffer, NULL);
489
490 si_pm4_set_state(sctx, clip, pm4);
491 }
492
493 #define SIX_BITS 0x3F
494
495 static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
496 {
497 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
498 struct tgsi_shader_info *info = si_get_vs_info(sctx);
499 unsigned window_space =
500 info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
501 unsigned clipdist_mask =
502 info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask;
503
504 r600_write_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
505 S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
506 S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
507 S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
508 S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) |
509 S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) |
510 S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) |
511 S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize ||
512 info->writes_edgeflag ||
513 info->writes_layer ||
514 info->writes_viewport_index) |
515 S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) |
516 (sctx->queued.named.rasterizer->clip_plane_enable &
517 clipdist_mask));
518 r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
519 sctx->queued.named.rasterizer->pa_cl_clip_cntl |
520 (clipdist_mask ? 0 :
521 sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) |
522 S_028810_CLIP_DISABLE(window_space));
523 }
524
525 static void si_set_scissor_states(struct pipe_context *ctx,
526 unsigned start_slot,
527 unsigned num_scissors,
528 const struct pipe_scissor_state *state)
529 {
530 struct si_context *sctx = (struct si_context *)ctx;
531 int i;
532
533 for (i = 0; i < num_scissors; i++)
534 sctx->scissors.states[start_slot + i] = state[i];
535
536 sctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
537 si_mark_atom_dirty(sctx, &sctx->scissors.atom);
538 }
539
540 static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom)
541 {
542 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
543 struct pipe_scissor_state *states = sctx->scissors.states;
544 unsigned mask = sctx->scissors.dirty_mask;
545
546 /* The simple case: Only 1 viewport is active. */
547 if (mask & 1 &&
548 !si_get_vs_info(sctx)->writes_viewport_index) {
549 r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
550 radeon_emit(cs, S_028250_TL_X(states[0].minx) |
551 S_028250_TL_Y(states[0].miny) |
552 S_028250_WINDOW_OFFSET_DISABLE(1));
553 radeon_emit(cs, S_028254_BR_X(states[0].maxx) |
554 S_028254_BR_Y(states[0].maxy));
555 sctx->scissors.dirty_mask &= ~1; /* clear one bit */
556 return;
557 }
558
559 while (mask) {
560 int start, count, i;
561
562 u_bit_scan_consecutive_range(&mask, &start, &count);
563
564 r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
565 start * 4 * 2, count * 2);
566 for (i = start; i < start+count; i++) {
567 radeon_emit(cs, S_028250_TL_X(states[i].minx) |
568 S_028250_TL_Y(states[i].miny) |
569 S_028250_WINDOW_OFFSET_DISABLE(1));
570 radeon_emit(cs, S_028254_BR_X(states[i].maxx) |
571 S_028254_BR_Y(states[i].maxy));
572 }
573 }
574 sctx->scissors.dirty_mask = 0;
575 }
576
577 static void si_set_viewport_states(struct pipe_context *ctx,
578 unsigned start_slot,
579 unsigned num_viewports,
580 const struct pipe_viewport_state *state)
581 {
582 struct si_context *sctx = (struct si_context *)ctx;
583 int i;
584
585 for (i = 0; i < num_viewports; i++)
586 sctx->viewports.states[start_slot + i] = state[i];
587
588 sctx->viewports.dirty_mask |= ((1 << num_viewports) - 1) << start_slot;
589 si_mark_atom_dirty(sctx, &sctx->viewports.atom);
590 }
591
592 static void si_emit_viewports(struct si_context *sctx, struct r600_atom *atom)
593 {
594 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
595 struct pipe_viewport_state *states = sctx->viewports.states;
596 unsigned mask = sctx->viewports.dirty_mask;
597
598 /* The simple case: Only 1 viewport is active. */
599 if (mask & 1 &&
600 !si_get_vs_info(sctx)->writes_viewport_index) {
601 r600_write_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
602 radeon_emit(cs, fui(states[0].scale[0]));
603 radeon_emit(cs, fui(states[0].translate[0]));
604 radeon_emit(cs, fui(states[0].scale[1]));
605 radeon_emit(cs, fui(states[0].translate[1]));
606 radeon_emit(cs, fui(states[0].scale[2]));
607 radeon_emit(cs, fui(states[0].translate[2]));
608 sctx->viewports.dirty_mask &= ~1; /* clear one bit */
609 return;
610 }
611
612 while (mask) {
613 int start, count, i;
614
615 u_bit_scan_consecutive_range(&mask, &start, &count);
616
617 r600_write_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
618 start * 4 * 6, count * 6);
619 for (i = start; i < start+count; i++) {
620 radeon_emit(cs, fui(states[i].scale[0]));
621 radeon_emit(cs, fui(states[i].translate[0]));
622 radeon_emit(cs, fui(states[i].scale[1]));
623 radeon_emit(cs, fui(states[i].translate[1]));
624 radeon_emit(cs, fui(states[i].scale[2]));
625 radeon_emit(cs, fui(states[i].translate[2]));
626 }
627 }
628 sctx->viewports.dirty_mask = 0;
629 }
630
631 /*
632 * inferred state between framebuffer and rasterizer
633 */
634 static void si_update_fb_rs_state(struct si_context *sctx)
635 {
636 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
637 struct si_pm4_state *pm4;
638 float offset_units;
639
640 if (!rs || !sctx->framebuffer.state.zsbuf)
641 return;
642
643 offset_units = sctx->queued.named.rasterizer->offset_units;
644 switch (sctx->framebuffer.state.zsbuf->texture->format) {
645 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
646 case PIPE_FORMAT_X8Z24_UNORM:
647 case PIPE_FORMAT_Z24X8_UNORM:
648 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
649 offset_units *= 2.0f;
650 break;
651 case PIPE_FORMAT_Z32_FLOAT:
652 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
653 offset_units *= 1.0f;
654 break;
655 case PIPE_FORMAT_Z16_UNORM:
656 offset_units *= 4.0f;
657 break;
658 default:
659 return;
660 }
661
662 pm4 = CALLOC_STRUCT(si_pm4_state);
663
664 if (pm4 == NULL)
665 return;
666
667 /* FIXME some of those reg can be computed with cso */
668 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
669 fui(sctx->queued.named.rasterizer->offset_scale));
670 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units));
671 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
672 fui(sctx->queued.named.rasterizer->offset_scale));
673 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units));
674
675 si_pm4_set_state(sctx, fb_rs, pm4);
676 }
677
678 /*
679 * Rasterizer
680 */
681
682 static uint32_t si_translate_fill(uint32_t func)
683 {
684 switch(func) {
685 case PIPE_POLYGON_MODE_FILL:
686 return V_028814_X_DRAW_TRIANGLES;
687 case PIPE_POLYGON_MODE_LINE:
688 return V_028814_X_DRAW_LINES;
689 case PIPE_POLYGON_MODE_POINT:
690 return V_028814_X_DRAW_POINTS;
691 default:
692 assert(0);
693 return V_028814_X_DRAW_POINTS;
694 }
695 }
696
697 static void *si_create_rs_state(struct pipe_context *ctx,
698 const struct pipe_rasterizer_state *state)
699 {
700 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
701 struct si_pm4_state *pm4 = &rs->pm4;
702 unsigned tmp;
703 float psize_min, psize_max;
704
705 if (rs == NULL) {
706 return NULL;
707 }
708
709 rs->two_side = state->light_twoside;
710 rs->multisample_enable = state->multisample;
711 rs->clip_plane_enable = state->clip_plane_enable;
712 rs->line_stipple_enable = state->line_stipple_enable;
713 rs->poly_stipple_enable = state->poly_stipple_enable;
714 rs->line_smooth = state->line_smooth;
715 rs->poly_smooth = state->poly_smooth;
716
717 rs->flatshade = state->flatshade;
718 rs->sprite_coord_enable = state->sprite_coord_enable;
719 rs->pa_sc_line_stipple = state->line_stipple_enable ?
720 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
721 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
722 rs->pa_cl_clip_cntl =
723 S_028810_PS_UCP_MODE(3) |
724 S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |
725 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
726 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
727 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
728 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
729
730 /* offset */
731 rs->offset_units = state->offset_units;
732 rs->offset_scale = state->offset_scale * 16.0f;
733
734 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0,
735 S_0286D4_FLAT_SHADE_ENA(1) |
736 S_0286D4_PNT_SPRITE_ENA(1) |
737 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
738 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
739 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
740 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
741 S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT));
742
743 /* point size 12.4 fixed point */
744 tmp = (unsigned)(state->point_size * 8.0);
745 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
746
747 if (state->point_size_per_vertex) {
748 psize_min = util_get_min_point_size(state);
749 psize_max = 8192;
750 } else {
751 /* Force the point size to be as if the vertex output was disabled. */
752 psize_min = state->point_size;
753 psize_max = state->point_size;
754 }
755 /* Divide by two, because 0.5 = 1 pixel. */
756 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,
757 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) |
758 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2)));
759
760 tmp = (unsigned)state->line_width * 8;
761 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp));
762 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0,
763 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
764 S_028A48_MSAA_ENABLE(state->multisample ||
765 state->poly_smooth ||
766 state->line_smooth) |
767 S_028A48_VPORT_SCISSOR_ENABLE(state->scissor));
768
769 si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL,
770 S_028BE4_PIX_CENTER(state->half_pixel_center) |
771 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
772
773 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));
774 si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL,
775 S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) |
776 S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
777 S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
778 S_028814_FACE(!state->front_ccw) |
779 S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
780 S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
781 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
782 S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL ||
783 state->fill_back != PIPE_POLYGON_MODE_FILL) |
784 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
785 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)));
786 return rs;
787 }
788
789 static void si_bind_rs_state(struct pipe_context *ctx, void *state)
790 {
791 struct si_context *sctx = (struct si_context *)ctx;
792 struct si_state_rasterizer *old_rs =
793 (struct si_state_rasterizer*)sctx->queued.named.rasterizer;
794 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
795
796 if (state == NULL)
797 return;
798
799 if (sctx->framebuffer.nr_samples > 1 &&
800 (!old_rs || old_rs->multisample_enable != rs->multisample_enable))
801 si_mark_atom_dirty(sctx, &sctx->db_render_state);
802
803 si_pm4_bind_state(sctx, rasterizer, rs);
804 si_update_fb_rs_state(sctx);
805
806 si_mark_atom_dirty(sctx, &sctx->clip_regs);
807 }
808
809 static void si_delete_rs_state(struct pipe_context *ctx, void *state)
810 {
811 struct si_context *sctx = (struct si_context *)ctx;
812 si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state);
813 }
814
815 /*
816 * infeered state between dsa and stencil ref
817 */
818 static void si_update_dsa_stencil_ref(struct si_context *sctx)
819 {
820 struct si_pm4_state *pm4;
821 struct pipe_stencil_ref *ref = &sctx->stencil_ref;
822 struct si_state_dsa *dsa = sctx->queued.named.dsa;
823
824 if (!dsa)
825 return;
826
827 pm4 = CALLOC_STRUCT(si_pm4_state);
828 if (pm4 == NULL)
829 return;
830
831 si_pm4_set_reg(pm4, R_028430_DB_STENCILREFMASK,
832 S_028430_STENCILTESTVAL(ref->ref_value[0]) |
833 S_028430_STENCILMASK(dsa->valuemask[0]) |
834 S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
835 S_028430_STENCILOPVAL(1));
836 si_pm4_set_reg(pm4, R_028434_DB_STENCILREFMASK_BF,
837 S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
838 S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
839 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
840 S_028434_STENCILOPVAL_BF(1));
841
842 si_pm4_set_state(sctx, dsa_stencil_ref, pm4);
843 }
844
845 static void si_set_pipe_stencil_ref(struct pipe_context *ctx,
846 const struct pipe_stencil_ref *state)
847 {
848 struct si_context *sctx = (struct si_context *)ctx;
849 sctx->stencil_ref = *state;
850 si_update_dsa_stencil_ref(sctx);
851 }
852
853
854 /*
855 * DSA
856 */
857
858 static uint32_t si_translate_stencil_op(int s_op)
859 {
860 switch (s_op) {
861 case PIPE_STENCIL_OP_KEEP:
862 return V_02842C_STENCIL_KEEP;
863 case PIPE_STENCIL_OP_ZERO:
864 return V_02842C_STENCIL_ZERO;
865 case PIPE_STENCIL_OP_REPLACE:
866 return V_02842C_STENCIL_REPLACE_TEST;
867 case PIPE_STENCIL_OP_INCR:
868 return V_02842C_STENCIL_ADD_CLAMP;
869 case PIPE_STENCIL_OP_DECR:
870 return V_02842C_STENCIL_SUB_CLAMP;
871 case PIPE_STENCIL_OP_INCR_WRAP:
872 return V_02842C_STENCIL_ADD_WRAP;
873 case PIPE_STENCIL_OP_DECR_WRAP:
874 return V_02842C_STENCIL_SUB_WRAP;
875 case PIPE_STENCIL_OP_INVERT:
876 return V_02842C_STENCIL_INVERT;
877 default:
878 R600_ERR("Unknown stencil op %d", s_op);
879 assert(0);
880 break;
881 }
882 return 0;
883 }
884
885 static void *si_create_dsa_state(struct pipe_context *ctx,
886 const struct pipe_depth_stencil_alpha_state *state)
887 {
888 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
889 struct si_pm4_state *pm4 = &dsa->pm4;
890 unsigned db_depth_control;
891 uint32_t db_stencil_control = 0;
892
893 if (dsa == NULL) {
894 return NULL;
895 }
896
897 dsa->valuemask[0] = state->stencil[0].valuemask;
898 dsa->valuemask[1] = state->stencil[1].valuemask;
899 dsa->writemask[0] = state->stencil[0].writemask;
900 dsa->writemask[1] = state->stencil[1].writemask;
901
902 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
903 S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
904 S_028800_ZFUNC(state->depth.func) |
905 S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test);
906
907 /* stencil */
908 if (state->stencil[0].enabled) {
909 db_depth_control |= S_028800_STENCIL_ENABLE(1);
910 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func);
911 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op));
912 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op));
913 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op));
914
915 if (state->stencil[1].enabled) {
916 db_depth_control |= S_028800_BACKFACE_ENABLE(1);
917 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func);
918 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op));
919 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op));
920 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op));
921 }
922 }
923
924 /* alpha */
925 if (state->alpha.enabled) {
926 dsa->alpha_func = state->alpha.func;
927
928 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
929 SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value));
930 } else {
931 dsa->alpha_func = PIPE_FUNC_ALWAYS;
932 }
933
934 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
935 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
936 if (state->depth.bounds_test) {
937 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min));
938 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max));
939 }
940
941 return dsa;
942 }
943
944 static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
945 {
946 struct si_context *sctx = (struct si_context *)ctx;
947 struct si_state_dsa *dsa = state;
948
949 if (state == NULL)
950 return;
951
952 si_pm4_bind_state(sctx, dsa, dsa);
953 si_update_dsa_stencil_ref(sctx);
954 }
955
956 static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
957 {
958 struct si_context *sctx = (struct si_context *)ctx;
959 si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
960 }
961
962 static void *si_create_db_flush_dsa(struct si_context *sctx)
963 {
964 struct pipe_depth_stencil_alpha_state dsa = {};
965
966 return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa);
967 }
968
969 /* DB RENDER STATE */
970
971 static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
972 {
973 struct si_context *sctx = (struct si_context*)ctx;
974
975 si_mark_atom_dirty(sctx, &sctx->db_render_state);
976 }
977
978 static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
979 {
980 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
981 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
982 unsigned db_shader_control;
983
984 r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
985
986 /* DB_RENDER_CONTROL */
987 if (sctx->dbcb_depth_copy_enabled ||
988 sctx->dbcb_stencil_copy_enabled) {
989 radeon_emit(cs,
990 S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
991 S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
992 S_028000_COPY_CENTROID(1) |
993 S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
994 } else if (sctx->db_inplace_flush_enabled) {
995 radeon_emit(cs,
996 S_028000_DEPTH_COMPRESS_DISABLE(1) |
997 S_028000_STENCIL_COMPRESS_DISABLE(1));
998 } else if (sctx->db_depth_clear) {
999 radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1));
1000 } else {
1001 radeon_emit(cs, 0);
1002 }
1003
1004 /* DB_COUNT_CONTROL (occlusion queries) */
1005 if (sctx->b.num_occlusion_queries > 0) {
1006 if (sctx->b.chip_class >= CIK) {
1007 radeon_emit(cs,
1008 S_028004_PERFECT_ZPASS_COUNTS(1) |
1009 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
1010 S_028004_ZPASS_ENABLE(1) |
1011 S_028004_SLICE_EVEN_ENABLE(1) |
1012 S_028004_SLICE_ODD_ENABLE(1));
1013 } else {
1014 radeon_emit(cs,
1015 S_028004_PERFECT_ZPASS_COUNTS(1) |
1016 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples));
1017 }
1018 } else {
1019 /* Disable occlusion queries. */
1020 if (sctx->b.chip_class >= CIK) {
1021 radeon_emit(cs, 0);
1022 } else {
1023 radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1));
1024 }
1025 }
1026
1027 /* DB_RENDER_OVERRIDE2 */
1028 if (sctx->db_depth_disable_expclear) {
1029 r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
1030 S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1));
1031 } else {
1032 r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0);
1033 }
1034
1035 db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) |
1036 sctx->ps_db_shader_control;
1037
1038 /* Bug workaround for smoothing (overrasterization) on SI. */
1039 if (sctx->b.chip_class == SI && sctx->smoothing_enabled)
1040 db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);
1041 else
1042 db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
1043
1044 /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
1045 if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable))
1046 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
1047
1048 r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
1049 db_shader_control);
1050 }
1051
1052 /*
1053 * format translation
1054 */
1055 static uint32_t si_translate_colorformat(enum pipe_format format)
1056 {
1057 const struct util_format_description *desc = util_format_description(format);
1058
1059 #define HAS_SIZE(x,y,z,w) \
1060 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
1061 desc->channel[2].size == (z) && desc->channel[3].size == (w))
1062
1063 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
1064 return V_028C70_COLOR_10_11_11;
1065
1066 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
1067 return V_028C70_COLOR_INVALID;
1068
1069 switch (desc->nr_channels) {
1070 case 1:
1071 switch (desc->channel[0].size) {
1072 case 8:
1073 return V_028C70_COLOR_8;
1074 case 16:
1075 return V_028C70_COLOR_16;
1076 case 32:
1077 return V_028C70_COLOR_32;
1078 }
1079 break;
1080 case 2:
1081 if (desc->channel[0].size == desc->channel[1].size) {
1082 switch (desc->channel[0].size) {
1083 case 8:
1084 return V_028C70_COLOR_8_8;
1085 case 16:
1086 return V_028C70_COLOR_16_16;
1087 case 32:
1088 return V_028C70_COLOR_32_32;
1089 }
1090 } else if (HAS_SIZE(8,24,0,0)) {
1091 return V_028C70_COLOR_24_8;
1092 } else if (HAS_SIZE(24,8,0,0)) {
1093 return V_028C70_COLOR_8_24;
1094 }
1095 break;
1096 case 3:
1097 if (HAS_SIZE(5,6,5,0)) {
1098 return V_028C70_COLOR_5_6_5;
1099 } else if (HAS_SIZE(32,8,24,0)) {
1100 return V_028C70_COLOR_X24_8_32_FLOAT;
1101 }
1102 break;
1103 case 4:
1104 if (desc->channel[0].size == desc->channel[1].size &&
1105 desc->channel[0].size == desc->channel[2].size &&
1106 desc->channel[0].size == desc->channel[3].size) {
1107 switch (desc->channel[0].size) {
1108 case 4:
1109 return V_028C70_COLOR_4_4_4_4;
1110 case 8:
1111 return V_028C70_COLOR_8_8_8_8;
1112 case 16:
1113 return V_028C70_COLOR_16_16_16_16;
1114 case 32:
1115 return V_028C70_COLOR_32_32_32_32;
1116 }
1117 } else if (HAS_SIZE(5,5,5,1)) {
1118 return V_028C70_COLOR_1_5_5_5;
1119 } else if (HAS_SIZE(10,10,10,2)) {
1120 return V_028C70_COLOR_2_10_10_10;
1121 }
1122 break;
1123 }
1124 return V_028C70_COLOR_INVALID;
1125 }
1126
1127 static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
1128 {
1129 if (SI_BIG_ENDIAN) {
1130 switch(colorformat) {
1131 /* 8-bit buffers. */
1132 case V_028C70_COLOR_8:
1133 return V_028C70_ENDIAN_NONE;
1134
1135 /* 16-bit buffers. */
1136 case V_028C70_COLOR_5_6_5:
1137 case V_028C70_COLOR_1_5_5_5:
1138 case V_028C70_COLOR_4_4_4_4:
1139 case V_028C70_COLOR_16:
1140 case V_028C70_COLOR_8_8:
1141 return V_028C70_ENDIAN_8IN16;
1142
1143 /* 32-bit buffers. */
1144 case V_028C70_COLOR_8_8_8_8:
1145 case V_028C70_COLOR_2_10_10_10:
1146 case V_028C70_COLOR_8_24:
1147 case V_028C70_COLOR_24_8:
1148 case V_028C70_COLOR_16_16:
1149 return V_028C70_ENDIAN_8IN32;
1150
1151 /* 64-bit buffers. */
1152 case V_028C70_COLOR_16_16_16_16:
1153 return V_028C70_ENDIAN_8IN16;
1154
1155 case V_028C70_COLOR_32_32:
1156 return V_028C70_ENDIAN_8IN32;
1157
1158 /* 128-bit buffers. */
1159 case V_028C70_COLOR_32_32_32_32:
1160 return V_028C70_ENDIAN_8IN32;
1161 default:
1162 return V_028C70_ENDIAN_NONE; /* Unsupported. */
1163 }
1164 } else {
1165 return V_028C70_ENDIAN_NONE;
1166 }
1167 }
1168
1169 /* Returns the size in bits of the widest component of a CB format */
1170 static unsigned si_colorformat_max_comp_size(uint32_t colorformat)
1171 {
1172 switch(colorformat) {
1173 case V_028C70_COLOR_4_4_4_4:
1174 return 4;
1175
1176 case V_028C70_COLOR_1_5_5_5:
1177 case V_028C70_COLOR_5_5_5_1:
1178 return 5;
1179
1180 case V_028C70_COLOR_5_6_5:
1181 return 6;
1182
1183 case V_028C70_COLOR_8:
1184 case V_028C70_COLOR_8_8:
1185 case V_028C70_COLOR_8_8_8_8:
1186 return 8;
1187
1188 case V_028C70_COLOR_10_10_10_2:
1189 case V_028C70_COLOR_2_10_10_10:
1190 return 10;
1191
1192 case V_028C70_COLOR_10_11_11:
1193 case V_028C70_COLOR_11_11_10:
1194 return 11;
1195
1196 case V_028C70_COLOR_16:
1197 case V_028C70_COLOR_16_16:
1198 case V_028C70_COLOR_16_16_16_16:
1199 return 16;
1200
1201 case V_028C70_COLOR_8_24:
1202 case V_028C70_COLOR_24_8:
1203 return 24;
1204
1205 case V_028C70_COLOR_32:
1206 case V_028C70_COLOR_32_32:
1207 case V_028C70_COLOR_32_32_32_32:
1208 case V_028C70_COLOR_X24_8_32_FLOAT:
1209 return 32;
1210 }
1211
1212 assert(!"Unknown maximum component size");
1213 return 0;
1214 }
1215
1216 static uint32_t si_translate_dbformat(enum pipe_format format)
1217 {
1218 switch (format) {
1219 case PIPE_FORMAT_Z16_UNORM:
1220 return V_028040_Z_16;
1221 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1222 case PIPE_FORMAT_X8Z24_UNORM:
1223 case PIPE_FORMAT_Z24X8_UNORM:
1224 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1225 return V_028040_Z_24; /* deprecated on SI */
1226 case PIPE_FORMAT_Z32_FLOAT:
1227 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1228 return V_028040_Z_32_FLOAT;
1229 default:
1230 return V_028040_Z_INVALID;
1231 }
1232 }
1233
1234 /*
1235 * Texture translation
1236 */
1237
1238 static uint32_t si_translate_texformat(struct pipe_screen *screen,
1239 enum pipe_format format,
1240 const struct util_format_description *desc,
1241 int first_non_void)
1242 {
1243 struct si_screen *sscreen = (struct si_screen*)screen;
1244 bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 &&
1245 sscreen->b.info.drm_minor >= 31) ||
1246 sscreen->b.info.drm_major == 3;
1247 boolean uniform = TRUE;
1248 int i;
1249
1250 /* Colorspace (return non-RGB formats directly). */
1251 switch (desc->colorspace) {
1252 /* Depth stencil formats */
1253 case UTIL_FORMAT_COLORSPACE_ZS:
1254 switch (format) {
1255 case PIPE_FORMAT_Z16_UNORM:
1256 return V_008F14_IMG_DATA_FORMAT_16;
1257 case PIPE_FORMAT_X24S8_UINT:
1258 case PIPE_FORMAT_Z24X8_UNORM:
1259 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1260 return V_008F14_IMG_DATA_FORMAT_8_24;
1261 case PIPE_FORMAT_X8Z24_UNORM:
1262 case PIPE_FORMAT_S8X24_UINT:
1263 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1264 return V_008F14_IMG_DATA_FORMAT_24_8;
1265 case PIPE_FORMAT_S8_UINT:
1266 return V_008F14_IMG_DATA_FORMAT_8;
1267 case PIPE_FORMAT_Z32_FLOAT:
1268 return V_008F14_IMG_DATA_FORMAT_32;
1269 case PIPE_FORMAT_X32_S8X24_UINT:
1270 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1271 return V_008F14_IMG_DATA_FORMAT_X24_8_32;
1272 default:
1273 goto out_unknown;
1274 }
1275
1276 case UTIL_FORMAT_COLORSPACE_YUV:
1277 goto out_unknown; /* TODO */
1278
1279 case UTIL_FORMAT_COLORSPACE_SRGB:
1280 if (desc->nr_channels != 4 && desc->nr_channels != 1)
1281 goto out_unknown;
1282 break;
1283
1284 default:
1285 break;
1286 }
1287
1288 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
1289 if (!enable_compressed_formats)
1290 goto out_unknown;
1291
1292 switch (format) {
1293 case PIPE_FORMAT_RGTC1_SNORM:
1294 case PIPE_FORMAT_LATC1_SNORM:
1295 case PIPE_FORMAT_RGTC1_UNORM:
1296 case PIPE_FORMAT_LATC1_UNORM:
1297 return V_008F14_IMG_DATA_FORMAT_BC4;
1298 case PIPE_FORMAT_RGTC2_SNORM:
1299 case PIPE_FORMAT_LATC2_SNORM:
1300 case PIPE_FORMAT_RGTC2_UNORM:
1301 case PIPE_FORMAT_LATC2_UNORM:
1302 return V_008F14_IMG_DATA_FORMAT_BC5;
1303 default:
1304 goto out_unknown;
1305 }
1306 }
1307
1308 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
1309 if (!enable_compressed_formats)
1310 goto out_unknown;
1311
1312 switch (format) {
1313 case PIPE_FORMAT_BPTC_RGBA_UNORM:
1314 case PIPE_FORMAT_BPTC_SRGBA:
1315 return V_008F14_IMG_DATA_FORMAT_BC7;
1316 case PIPE_FORMAT_BPTC_RGB_FLOAT:
1317 case PIPE_FORMAT_BPTC_RGB_UFLOAT:
1318 return V_008F14_IMG_DATA_FORMAT_BC6;
1319 default:
1320 goto out_unknown;
1321 }
1322 }
1323
1324 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
1325 switch (format) {
1326 case PIPE_FORMAT_R8G8_B8G8_UNORM:
1327 case PIPE_FORMAT_G8R8_B8R8_UNORM:
1328 return V_008F14_IMG_DATA_FORMAT_GB_GR;
1329 case PIPE_FORMAT_G8R8_G8B8_UNORM:
1330 case PIPE_FORMAT_R8G8_R8B8_UNORM:
1331 return V_008F14_IMG_DATA_FORMAT_BG_RG;
1332 default:
1333 goto out_unknown;
1334 }
1335 }
1336
1337 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1338 if (!enable_compressed_formats)
1339 goto out_unknown;
1340
1341 if (!util_format_s3tc_enabled) {
1342 goto out_unknown;
1343 }
1344
1345 switch (format) {
1346 case PIPE_FORMAT_DXT1_RGB:
1347 case PIPE_FORMAT_DXT1_RGBA:
1348 case PIPE_FORMAT_DXT1_SRGB:
1349 case PIPE_FORMAT_DXT1_SRGBA:
1350 return V_008F14_IMG_DATA_FORMAT_BC1;
1351 case PIPE_FORMAT_DXT3_RGBA:
1352 case PIPE_FORMAT_DXT3_SRGBA:
1353 return V_008F14_IMG_DATA_FORMAT_BC2;
1354 case PIPE_FORMAT_DXT5_RGBA:
1355 case PIPE_FORMAT_DXT5_SRGBA:
1356 return V_008F14_IMG_DATA_FORMAT_BC3;
1357 default:
1358 goto out_unknown;
1359 }
1360 }
1361
1362 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
1363 return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
1364 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
1365 return V_008F14_IMG_DATA_FORMAT_10_11_11;
1366 }
1367
1368 /* R8G8Bx_SNORM - TODO CxV8U8 */
1369
1370 /* See whether the components are of the same size. */
1371 for (i = 1; i < desc->nr_channels; i++) {
1372 uniform = uniform && desc->channel[0].size == desc->channel[i].size;
1373 }
1374
1375 /* Non-uniform formats. */
1376 if (!uniform) {
1377 switch(desc->nr_channels) {
1378 case 3:
1379 if (desc->channel[0].size == 5 &&
1380 desc->channel[1].size == 6 &&
1381 desc->channel[2].size == 5) {
1382 return V_008F14_IMG_DATA_FORMAT_5_6_5;
1383 }
1384 goto out_unknown;
1385 case 4:
1386 if (desc->channel[0].size == 5 &&
1387 desc->channel[1].size == 5 &&
1388 desc->channel[2].size == 5 &&
1389 desc->channel[3].size == 1) {
1390 return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
1391 }
1392 if (desc->channel[0].size == 10 &&
1393 desc->channel[1].size == 10 &&
1394 desc->channel[2].size == 10 &&
1395 desc->channel[3].size == 2) {
1396 return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
1397 }
1398 goto out_unknown;
1399 }
1400 goto out_unknown;
1401 }
1402
1403 if (first_non_void < 0 || first_non_void > 3)
1404 goto out_unknown;
1405
1406 /* uniform formats */
1407 switch (desc->channel[first_non_void].size) {
1408 case 4:
1409 switch (desc->nr_channels) {
1410 #if 0 /* Not supported for render targets */
1411 case 2:
1412 return V_008F14_IMG_DATA_FORMAT_4_4;
1413 #endif
1414 case 4:
1415 return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
1416 }
1417 break;
1418 case 8:
1419 switch (desc->nr_channels) {
1420 case 1:
1421 return V_008F14_IMG_DATA_FORMAT_8;
1422 case 2:
1423 return V_008F14_IMG_DATA_FORMAT_8_8;
1424 case 4:
1425 return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
1426 }
1427 break;
1428 case 16:
1429 switch (desc->nr_channels) {
1430 case 1:
1431 return V_008F14_IMG_DATA_FORMAT_16;
1432 case 2:
1433 return V_008F14_IMG_DATA_FORMAT_16_16;
1434 case 4:
1435 return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
1436 }
1437 break;
1438 case 32:
1439 switch (desc->nr_channels) {
1440 case 1:
1441 return V_008F14_IMG_DATA_FORMAT_32;
1442 case 2:
1443 return V_008F14_IMG_DATA_FORMAT_32_32;
1444 #if 0 /* Not supported for render targets */
1445 case 3:
1446 return V_008F14_IMG_DATA_FORMAT_32_32_32;
1447 #endif
1448 case 4:
1449 return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
1450 }
1451 }
1452
1453 out_unknown:
1454 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */
1455 return ~0;
1456 }
1457
1458 static unsigned si_tex_wrap(unsigned wrap)
1459 {
1460 switch (wrap) {
1461 default:
1462 case PIPE_TEX_WRAP_REPEAT:
1463 return V_008F30_SQ_TEX_WRAP;
1464 case PIPE_TEX_WRAP_CLAMP:
1465 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER;
1466 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1467 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
1468 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1469 return V_008F30_SQ_TEX_CLAMP_BORDER;
1470 case PIPE_TEX_WRAP_MIRROR_REPEAT:
1471 return V_008F30_SQ_TEX_MIRROR;
1472 case PIPE_TEX_WRAP_MIRROR_CLAMP:
1473 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER;
1474 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1475 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
1476 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1477 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER;
1478 }
1479 }
1480
1481 static unsigned si_tex_filter(unsigned filter)
1482 {
1483 switch (filter) {
1484 default:
1485 case PIPE_TEX_FILTER_NEAREST:
1486 return V_008F38_SQ_TEX_XY_FILTER_POINT;
1487 case PIPE_TEX_FILTER_LINEAR:
1488 return V_008F38_SQ_TEX_XY_FILTER_BILINEAR;
1489 }
1490 }
1491
1492 static unsigned si_tex_mipfilter(unsigned filter)
1493 {
1494 switch (filter) {
1495 case PIPE_TEX_MIPFILTER_NEAREST:
1496 return V_008F38_SQ_TEX_Z_FILTER_POINT;
1497 case PIPE_TEX_MIPFILTER_LINEAR:
1498 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
1499 default:
1500 case PIPE_TEX_MIPFILTER_NONE:
1501 return V_008F38_SQ_TEX_Z_FILTER_NONE;
1502 }
1503 }
1504
1505 static unsigned si_tex_compare(unsigned compare)
1506 {
1507 switch (compare) {
1508 default:
1509 case PIPE_FUNC_NEVER:
1510 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
1511 case PIPE_FUNC_LESS:
1512 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
1513 case PIPE_FUNC_EQUAL:
1514 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
1515 case PIPE_FUNC_LEQUAL:
1516 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
1517 case PIPE_FUNC_GREATER:
1518 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
1519 case PIPE_FUNC_NOTEQUAL:
1520 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
1521 case PIPE_FUNC_GEQUAL:
1522 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
1523 case PIPE_FUNC_ALWAYS:
1524 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
1525 }
1526 }
1527
1528 static unsigned si_tex_dim(unsigned dim, unsigned nr_samples)
1529 {
1530 switch (dim) {
1531 default:
1532 case PIPE_TEXTURE_1D:
1533 return V_008F1C_SQ_RSRC_IMG_1D;
1534 case PIPE_TEXTURE_1D_ARRAY:
1535 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;
1536 case PIPE_TEXTURE_2D:
1537 case PIPE_TEXTURE_RECT:
1538 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA :
1539 V_008F1C_SQ_RSRC_IMG_2D;
1540 case PIPE_TEXTURE_2D_ARRAY:
1541 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY :
1542 V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
1543 case PIPE_TEXTURE_3D:
1544 return V_008F1C_SQ_RSRC_IMG_3D;
1545 case PIPE_TEXTURE_CUBE:
1546 case PIPE_TEXTURE_CUBE_ARRAY:
1547 return V_008F1C_SQ_RSRC_IMG_CUBE;
1548 }
1549 }
1550
1551 /*
1552 * Format support testing
1553 */
1554
1555 static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
1556 {
1557 return si_translate_texformat(screen, format, util_format_description(format),
1558 util_format_get_first_non_void_channel(format)) != ~0U;
1559 }
1560
1561 static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
1562 const struct util_format_description *desc,
1563 int first_non_void)
1564 {
1565 unsigned type = desc->channel[first_non_void].type;
1566 int i;
1567
1568 if (type == UTIL_FORMAT_TYPE_FIXED)
1569 return V_008F0C_BUF_DATA_FORMAT_INVALID;
1570
1571 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1572 return V_008F0C_BUF_DATA_FORMAT_10_11_11;
1573
1574 if (desc->nr_channels == 4 &&
1575 desc->channel[0].size == 10 &&
1576 desc->channel[1].size == 10 &&
1577 desc->channel[2].size == 10 &&
1578 desc->channel[3].size == 2)
1579 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
1580
1581 /* See whether the components are of the same size. */
1582 for (i = 0; i < desc->nr_channels; i++) {
1583 if (desc->channel[first_non_void].size != desc->channel[i].size)
1584 return V_008F0C_BUF_DATA_FORMAT_INVALID;
1585 }
1586
1587 switch (desc->channel[first_non_void].size) {
1588 case 8:
1589 switch (desc->nr_channels) {
1590 case 1:
1591 return V_008F0C_BUF_DATA_FORMAT_8;
1592 case 2:
1593 return V_008F0C_BUF_DATA_FORMAT_8_8;
1594 case 3:
1595 case 4:
1596 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
1597 }
1598 break;
1599 case 16:
1600 switch (desc->nr_channels) {
1601 case 1:
1602 return V_008F0C_BUF_DATA_FORMAT_16;
1603 case 2:
1604 return V_008F0C_BUF_DATA_FORMAT_16_16;
1605 case 3:
1606 case 4:
1607 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
1608 }
1609 break;
1610 case 32:
1611 /* From the Southern Islands ISA documentation about MTBUF:
1612 * 'Memory reads of data in memory that is 32 or 64 bits do not
1613 * undergo any format conversion.'
1614 */
1615 if (type != UTIL_FORMAT_TYPE_FLOAT &&
1616 !desc->channel[first_non_void].pure_integer)
1617 return V_008F0C_BUF_DATA_FORMAT_INVALID;
1618
1619 switch (desc->nr_channels) {
1620 case 1:
1621 return V_008F0C_BUF_DATA_FORMAT_32;
1622 case 2:
1623 return V_008F0C_BUF_DATA_FORMAT_32_32;
1624 case 3:
1625 return V_008F0C_BUF_DATA_FORMAT_32_32_32;
1626 case 4:
1627 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
1628 }
1629 break;
1630 }
1631
1632 return V_008F0C_BUF_DATA_FORMAT_INVALID;
1633 }
1634
1635 static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen,
1636 const struct util_format_description *desc,
1637 int first_non_void)
1638 {
1639 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1640 return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1641
1642 switch (desc->channel[first_non_void].type) {
1643 case UTIL_FORMAT_TYPE_SIGNED:
1644 if (desc->channel[first_non_void].normalized)
1645 return V_008F0C_BUF_NUM_FORMAT_SNORM;
1646 else if (desc->channel[first_non_void].pure_integer)
1647 return V_008F0C_BUF_NUM_FORMAT_SINT;
1648 else
1649 return V_008F0C_BUF_NUM_FORMAT_SSCALED;
1650 break;
1651 case UTIL_FORMAT_TYPE_UNSIGNED:
1652 if (desc->channel[first_non_void].normalized)
1653 return V_008F0C_BUF_NUM_FORMAT_UNORM;
1654 else if (desc->channel[first_non_void].pure_integer)
1655 return V_008F0C_BUF_NUM_FORMAT_UINT;
1656 else
1657 return V_008F0C_BUF_NUM_FORMAT_USCALED;
1658 break;
1659 case UTIL_FORMAT_TYPE_FLOAT:
1660 default:
1661 return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1662 }
1663 }
1664
1665 static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format)
1666 {
1667 const struct util_format_description *desc;
1668 int first_non_void;
1669 unsigned data_format;
1670
1671 desc = util_format_description(format);
1672 first_non_void = util_format_get_first_non_void_channel(format);
1673 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void);
1674 return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID;
1675 }
1676
1677 static bool si_is_colorbuffer_format_supported(enum pipe_format format)
1678 {
1679 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
1680 r600_translate_colorswap(format) != ~0U;
1681 }
1682
1683 static bool si_is_zs_format_supported(enum pipe_format format)
1684 {
1685 return si_translate_dbformat(format) != V_028040_Z_INVALID;
1686 }
1687
1688 boolean si_is_format_supported(struct pipe_screen *screen,
1689 enum pipe_format format,
1690 enum pipe_texture_target target,
1691 unsigned sample_count,
1692 unsigned usage)
1693 {
1694 unsigned retval = 0;
1695
1696 if (target >= PIPE_MAX_TEXTURE_TYPES) {
1697 R600_ERR("r600: unsupported texture type %d\n", target);
1698 return FALSE;
1699 }
1700
1701 if (!util_format_is_supported(format, usage))
1702 return FALSE;
1703
1704 if (sample_count > 1) {
1705 if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE))
1706 return FALSE;
1707
1708 switch (sample_count) {
1709 case 2:
1710 case 4:
1711 case 8:
1712 break;
1713 default:
1714 return FALSE;
1715 }
1716 }
1717
1718 if (usage & PIPE_BIND_SAMPLER_VIEW) {
1719 if (target == PIPE_BUFFER) {
1720 if (si_is_vertex_format_supported(screen, format))
1721 retval |= PIPE_BIND_SAMPLER_VIEW;
1722 } else {
1723 if (si_is_sampler_format_supported(screen, format))
1724 retval |= PIPE_BIND_SAMPLER_VIEW;
1725 }
1726 }
1727
1728 if ((usage & (PIPE_BIND_RENDER_TARGET |
1729 PIPE_BIND_DISPLAY_TARGET |
1730 PIPE_BIND_SCANOUT |
1731 PIPE_BIND_SHARED |
1732 PIPE_BIND_BLENDABLE)) &&
1733 si_is_colorbuffer_format_supported(format)) {
1734 retval |= usage &
1735 (PIPE_BIND_RENDER_TARGET |
1736 PIPE_BIND_DISPLAY_TARGET |
1737 PIPE_BIND_SCANOUT |
1738 PIPE_BIND_SHARED);
1739 if (!util_format_is_pure_integer(format) &&
1740 !util_format_is_depth_or_stencil(format))
1741 retval |= usage & PIPE_BIND_BLENDABLE;
1742 }
1743
1744 if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
1745 si_is_zs_format_supported(format)) {
1746 retval |= PIPE_BIND_DEPTH_STENCIL;
1747 }
1748
1749 if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
1750 si_is_vertex_format_supported(screen, format)) {
1751 retval |= PIPE_BIND_VERTEX_BUFFER;
1752 }
1753
1754 if (usage & PIPE_BIND_TRANSFER_READ)
1755 retval |= PIPE_BIND_TRANSFER_READ;
1756 if (usage & PIPE_BIND_TRANSFER_WRITE)
1757 retval |= PIPE_BIND_TRANSFER_WRITE;
1758
1759 return retval == usage;
1760 }
1761
1762 unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil)
1763 {
1764 unsigned tile_mode_index = 0;
1765
1766 if (stencil) {
1767 tile_mode_index = rtex->surface.stencil_tiling_index[level];
1768 } else {
1769 tile_mode_index = rtex->surface.tiling_index[level];
1770 }
1771 return tile_mode_index;
1772 }
1773
1774 /*
1775 * framebuffer handling
1776 */
1777
1778 static void si_initialize_color_surface(struct si_context *sctx,
1779 struct r600_surface *surf)
1780 {
1781 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
1782 unsigned level = surf->base.u.tex.level;
1783 uint64_t offset = rtex->surface.level[level].offset;
1784 unsigned pitch, slice;
1785 unsigned color_info, color_attrib, color_pitch, color_view;
1786 unsigned tile_mode_index;
1787 unsigned format, swap, ntype, endian;
1788 const struct util_format_description *desc;
1789 int i;
1790 unsigned blend_clamp = 0, blend_bypass = 0;
1791 unsigned max_comp_size;
1792
1793 /* Layered rendering doesn't work with LINEAR_GENERAL.
1794 * (LINEAR_ALIGNED and others work) */
1795 if (rtex->surface.level[level].mode == RADEON_SURF_MODE_LINEAR) {
1796 assert(surf->base.u.tex.first_layer == surf->base.u.tex.last_layer);
1797 offset += rtex->surface.level[level].slice_size *
1798 surf->base.u.tex.first_layer;
1799 color_view = 0;
1800 } else {
1801 color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
1802 S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer);
1803 }
1804
1805 pitch = (rtex->surface.level[level].nblk_x) / 8 - 1;
1806 slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64;
1807 if (slice) {
1808 slice = slice - 1;
1809 }
1810
1811 tile_mode_index = si_tile_mode_index(rtex, level, false);
1812
1813 desc = util_format_description(surf->base.format);
1814 for (i = 0; i < 4; i++) {
1815 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
1816 break;
1817 }
1818 }
1819 if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
1820 ntype = V_028C70_NUMBER_FLOAT;
1821 } else {
1822 ntype = V_028C70_NUMBER_UNORM;
1823 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
1824 ntype = V_028C70_NUMBER_SRGB;
1825 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
1826 if (desc->channel[i].pure_integer) {
1827 ntype = V_028C70_NUMBER_SINT;
1828 } else {
1829 assert(desc->channel[i].normalized);
1830 ntype = V_028C70_NUMBER_SNORM;
1831 }
1832 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1833 if (desc->channel[i].pure_integer) {
1834 ntype = V_028C70_NUMBER_UINT;
1835 } else {
1836 assert(desc->channel[i].normalized);
1837 ntype = V_028C70_NUMBER_UNORM;
1838 }
1839 }
1840 }
1841
1842 format = si_translate_colorformat(surf->base.format);
1843 if (format == V_028C70_COLOR_INVALID) {
1844 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
1845 }
1846 assert(format != V_028C70_COLOR_INVALID);
1847 swap = r600_translate_colorswap(surf->base.format);
1848 if (rtex->resource.b.b.usage == PIPE_USAGE_STAGING) {
1849 endian = V_028C70_ENDIAN_NONE;
1850 } else {
1851 endian = si_colorformat_endian_swap(format);
1852 }
1853
1854 /* blend clamp should be set for all NORM/SRGB types */
1855 if (ntype == V_028C70_NUMBER_UNORM ||
1856 ntype == V_028C70_NUMBER_SNORM ||
1857 ntype == V_028C70_NUMBER_SRGB)
1858 blend_clamp = 1;
1859
1860 /* set blend bypass according to docs if SINT/UINT or
1861 8/24 COLOR variants */
1862 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
1863 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
1864 format == V_028C70_COLOR_X24_8_32_FLOAT) {
1865 blend_clamp = 0;
1866 blend_bypass = 1;
1867 }
1868
1869 color_info = S_028C70_FORMAT(format) |
1870 S_028C70_COMP_SWAP(swap) |
1871 S_028C70_BLEND_CLAMP(blend_clamp) |
1872 S_028C70_BLEND_BYPASS(blend_bypass) |
1873 S_028C70_NUMBER_TYPE(ntype) |
1874 S_028C70_ENDIAN(endian);
1875
1876 color_pitch = S_028C64_TILE_MAX(pitch);
1877
1878 color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) |
1879 S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1);
1880
1881 if (rtex->resource.b.b.nr_samples > 1) {
1882 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
1883
1884 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
1885 S_028C74_NUM_FRAGMENTS(log_samples);
1886
1887 if (rtex->fmask.size) {
1888 color_info |= S_028C70_COMPRESSION(1);
1889 unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height);
1890
1891 color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index);
1892
1893 if (sctx->b.chip_class == SI) {
1894 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */
1895 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
1896 }
1897 if (sctx->b.chip_class >= CIK) {
1898 color_pitch |= S_028C64_FMASK_TILE_MAX(rtex->fmask.pitch / 8 - 1);
1899 }
1900 }
1901 }
1902
1903 offset += rtex->resource.gpu_address;
1904
1905 surf->cb_color_base = offset >> 8;
1906 surf->cb_color_pitch = color_pitch;
1907 surf->cb_color_slice = S_028C68_TILE_MAX(slice);
1908 surf->cb_color_view = color_view;
1909 surf->cb_color_info = color_info;
1910 surf->cb_color_attrib = color_attrib;
1911
1912 if (sctx->b.chip_class >= VI)
1913 surf->cb_dcc_control = S_028C78_OVERWRITE_COMBINER_DISABLE(1);
1914
1915 if (rtex->fmask.size) {
1916 surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8;
1917 surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
1918 } else {
1919 /* This must be set for fast clear to work without FMASK. */
1920 surf->cb_color_fmask = surf->cb_color_base;
1921 surf->cb_color_fmask_slice = surf->cb_color_slice;
1922 surf->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
1923
1924 if (sctx->b.chip_class == SI) {
1925 unsigned bankh = util_logbase2(rtex->surface.bankh);
1926 surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
1927 }
1928
1929 if (sctx->b.chip_class >= CIK) {
1930 surf->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch);
1931 }
1932 }
1933
1934 /* Determine pixel shader export format */
1935 max_comp_size = si_colorformat_max_comp_size(format);
1936 if (ntype == V_028C70_NUMBER_SRGB ||
1937 ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
1938 max_comp_size <= 10) ||
1939 (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) {
1940 surf->export_16bpc = true;
1941 }
1942
1943 surf->color_initialized = true;
1944 }
1945
1946 static void si_init_depth_surface(struct si_context *sctx,
1947 struct r600_surface *surf)
1948 {
1949 struct si_screen *sscreen = sctx->screen;
1950 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
1951 unsigned level = surf->base.u.tex.level;
1952 struct radeon_surf_level *levelinfo = &rtex->surface.level[level];
1953 unsigned format, tile_mode_index, array_mode;
1954 unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config;
1955 uint32_t z_info, s_info, db_depth_info;
1956 uint64_t z_offs, s_offs;
1957 uint32_t db_htile_data_base, db_htile_surface, pa_su_poly_offset_db_fmt_cntl = 0;
1958
1959 switch (sctx->framebuffer.state.zsbuf->texture->format) {
1960 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1961 case PIPE_FORMAT_X8Z24_UNORM:
1962 case PIPE_FORMAT_Z24X8_UNORM:
1963 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1964 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
1965 break;
1966 case PIPE_FORMAT_Z32_FLOAT:
1967 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1968 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
1969 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
1970 break;
1971 case PIPE_FORMAT_Z16_UNORM:
1972 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
1973 break;
1974 default:
1975 assert(0);
1976 }
1977
1978 format = si_translate_dbformat(rtex->resource.b.b.format);
1979
1980 if (format == V_028040_Z_INVALID) {
1981 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
1982 }
1983 assert(format != V_028040_Z_INVALID);
1984
1985 s_offs = z_offs = rtex->resource.gpu_address;
1986 z_offs += rtex->surface.level[level].offset;
1987 s_offs += rtex->surface.stencil_level[level].offset;
1988
1989 db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
1990
1991 z_info = S_028040_FORMAT(format);
1992 if (rtex->resource.b.b.nr_samples > 1) {
1993 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
1994 }
1995
1996 if (rtex->surface.flags & RADEON_SURF_SBUFFER)
1997 s_info = S_028044_FORMAT(V_028044_STENCIL_8);
1998 else
1999 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2000
2001 if (sctx->b.chip_class >= CIK) {
2002 switch (rtex->surface.level[level].mode) {
2003 case RADEON_SURF_MODE_2D:
2004 array_mode = V_02803C_ARRAY_2D_TILED_THIN1;
2005 break;
2006 case RADEON_SURF_MODE_1D:
2007 case RADEON_SURF_MODE_LINEAR_ALIGNED:
2008 case RADEON_SURF_MODE_LINEAR:
2009 default:
2010 array_mode = V_02803C_ARRAY_1D_TILED_THIN1;
2011 break;
2012 }
2013 tile_split = rtex->surface.tile_split;
2014 stile_split = rtex->surface.stencil_tile_split;
2015 macro_aspect = rtex->surface.mtilea;
2016 bankw = rtex->surface.bankw;
2017 bankh = rtex->surface.bankh;
2018 tile_split = cik_tile_split(tile_split);
2019 stile_split = cik_tile_split(stile_split);
2020 macro_aspect = cik_macro_tile_aspect(macro_aspect);
2021 bankw = cik_bank_wh(bankw);
2022 bankh = cik_bank_wh(bankh);
2023 nbanks = si_num_banks(sscreen, rtex);
2024 tile_mode_index = si_tile_mode_index(rtex, level, false);
2025 pipe_config = cik_db_pipe_config(sscreen, tile_mode_index);
2026
2027 db_depth_info |= S_02803C_ARRAY_MODE(array_mode) |
2028 S_02803C_PIPE_CONFIG(pipe_config) |
2029 S_02803C_BANK_WIDTH(bankw) |
2030 S_02803C_BANK_HEIGHT(bankh) |
2031 S_02803C_MACRO_TILE_ASPECT(macro_aspect) |
2032 S_02803C_NUM_BANKS(nbanks);
2033 z_info |= S_028040_TILE_SPLIT(tile_split);
2034 s_info |= S_028044_TILE_SPLIT(stile_split);
2035 } else {
2036 tile_mode_index = si_tile_mode_index(rtex, level, false);
2037 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2038 tile_mode_index = si_tile_mode_index(rtex, level, true);
2039 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2040 }
2041
2042 /* HiZ aka depth buffer htile */
2043 /* use htile only for first level */
2044 if (rtex->htile_buffer && !level) {
2045 z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2046 S_028040_ALLOW_EXPCLEAR(1);
2047
2048 /* Use all of the htile_buffer for depth, because we don't
2049 * use HTILE for stencil because of FAST_STENCIL_DISABLE. */
2050 s_info |= S_028044_TILE_STENCIL_DISABLE(1);
2051
2052 uint64_t va = rtex->htile_buffer->gpu_address;
2053 db_htile_data_base = va >> 8;
2054 db_htile_surface = S_028ABC_FULL_CACHE(1);
2055 } else {
2056 db_htile_data_base = 0;
2057 db_htile_surface = 0;
2058 }
2059
2060 assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
2061
2062 surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
2063 S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
2064 surf->db_htile_data_base = db_htile_data_base;
2065 surf->db_depth_info = db_depth_info;
2066 surf->db_z_info = z_info;
2067 surf->db_stencil_info = s_info;
2068 surf->db_depth_base = z_offs >> 8;
2069 surf->db_stencil_base = s_offs >> 8;
2070 surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |
2071 S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);
2072 surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x *
2073 levelinfo->nblk_y) / 64 - 1);
2074 surf->db_htile_surface = db_htile_surface;
2075 surf->pa_su_poly_offset_db_fmt_cntl = pa_su_poly_offset_db_fmt_cntl;
2076
2077 surf->depth_initialized = true;
2078 }
2079
2080 static void si_set_framebuffer_state(struct pipe_context *ctx,
2081 const struct pipe_framebuffer_state *state)
2082 {
2083 struct si_context *sctx = (struct si_context *)ctx;
2084 struct pipe_constant_buffer constbuf = {0};
2085 struct r600_surface *surf = NULL;
2086 struct r600_texture *rtex;
2087 bool old_cb0_is_integer = sctx->framebuffer.cb0_is_integer;
2088 unsigned old_nr_samples = sctx->framebuffer.nr_samples;
2089 int i;
2090
2091 /* Only flush TC when changing the framebuffer state, because
2092 * the only client not using TC that can change textures is
2093 * the framebuffer.
2094 *
2095 * Flush all CB and DB caches here because all buffers can be used
2096 * for write by both TC (with shader image stores) and CB/DB.
2097 */
2098 sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
2099 SI_CONTEXT_INV_TC_L2 |
2100 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
2101
2102 util_copy_framebuffer_state(&sctx->framebuffer.state, state);
2103
2104 sctx->framebuffer.export_16bpc = 0;
2105 sctx->framebuffer.compressed_cb_mask = 0;
2106 sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
2107 sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
2108 sctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] &&
2109 util_format_is_pure_integer(state->cbufs[0]->format);
2110
2111 if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer)
2112 si_mark_atom_dirty(sctx, &sctx->db_render_state);
2113
2114 for (i = 0; i < state->nr_cbufs; i++) {
2115 if (!state->cbufs[i])
2116 continue;
2117
2118 surf = (struct r600_surface*)state->cbufs[i];
2119 rtex = (struct r600_texture*)surf->base.texture;
2120
2121 if (!surf->color_initialized) {
2122 si_initialize_color_surface(sctx, surf);
2123 }
2124
2125 if (surf->export_16bpc) {
2126 sctx->framebuffer.export_16bpc |= 1 << i;
2127 }
2128
2129 if (rtex->fmask.size && rtex->cmask.size) {
2130 sctx->framebuffer.compressed_cb_mask |= 1 << i;
2131 }
2132 r600_context_add_resource_size(ctx, surf->base.texture);
2133 }
2134 /* Set the 16BPC export for possible dual-src blending. */
2135 if (i == 1 && surf && surf->export_16bpc) {
2136 sctx->framebuffer.export_16bpc |= 1 << 1;
2137 }
2138
2139 assert(!(sctx->framebuffer.export_16bpc & ~0xff));
2140
2141 if (state->zsbuf) {
2142 surf = (struct r600_surface*)state->zsbuf;
2143
2144 if (!surf->depth_initialized) {
2145 si_init_depth_surface(sctx, surf);
2146 }
2147 r600_context_add_resource_size(ctx, surf->base.texture);
2148 }
2149
2150 si_update_fb_rs_state(sctx);
2151 si_update_fb_blend_state(sctx);
2152
2153 sctx->framebuffer.atom.num_dw = state->nr_cbufs*16 + (8 - state->nr_cbufs)*3;
2154 sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4;
2155 sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */
2156 sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */
2157 si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
2158
2159 if (sctx->framebuffer.nr_samples != old_nr_samples) {
2160 si_mark_atom_dirty(sctx, &sctx->msaa_config);
2161 si_mark_atom_dirty(sctx, &sctx->db_render_state);
2162
2163 /* Set sample locations as fragment shader constants. */
2164 switch (sctx->framebuffer.nr_samples) {
2165 case 1:
2166 constbuf.user_buffer = sctx->b.sample_locations_1x;
2167 break;
2168 case 2:
2169 constbuf.user_buffer = sctx->b.sample_locations_2x;
2170 break;
2171 case 4:
2172 constbuf.user_buffer = sctx->b.sample_locations_4x;
2173 break;
2174 case 8:
2175 constbuf.user_buffer = sctx->b.sample_locations_8x;
2176 break;
2177 case 16:
2178 constbuf.user_buffer = sctx->b.sample_locations_16x;
2179 break;
2180 default:
2181 assert(0);
2182 }
2183 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
2184 ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT,
2185 SI_DRIVER_STATE_CONST_BUF, &constbuf);
2186
2187 /* Smoothing (only possible with nr_samples == 1) uses the same
2188 * sample locations as the MSAA it simulates.
2189 *
2190 * Therefore, don't update the sample locations when
2191 * transitioning from no AA to smoothing-equivalent AA, and
2192 * vice versa.
2193 */
2194 if ((sctx->framebuffer.nr_samples != 1 ||
2195 old_nr_samples != SI_NUM_SMOOTH_AA_SAMPLES) &&
2196 (sctx->framebuffer.nr_samples != SI_NUM_SMOOTH_AA_SAMPLES ||
2197 old_nr_samples != 1))
2198 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs);
2199 }
2200 }
2201
2202 static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
2203 {
2204 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
2205 struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
2206 unsigned i, nr_cbufs = state->nr_cbufs;
2207 struct r600_texture *tex = NULL;
2208 struct r600_surface *cb = NULL;
2209
2210 /* Colorbuffers. */
2211 for (i = 0; i < nr_cbufs; i++) {
2212 cb = (struct r600_surface*)state->cbufs[i];
2213 if (!cb) {
2214 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
2215 S_028C70_FORMAT(V_028C70_COLOR_INVALID));
2216 continue;
2217 }
2218
2219 tex = (struct r600_texture *)cb->base.texture;
2220 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
2221 &tex->resource, RADEON_USAGE_READWRITE,
2222 tex->surface.nsamples > 1 ?
2223 RADEON_PRIO_COLOR_BUFFER_MSAA :
2224 RADEON_PRIO_COLOR_BUFFER);
2225
2226 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
2227 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
2228 tex->cmask_buffer, RADEON_USAGE_READWRITE,
2229 RADEON_PRIO_COLOR_META);
2230 }
2231
2232 r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
2233 sctx->b.chip_class >= VI ? 14 : 13);
2234 radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */
2235 radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */
2236 radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */
2237 radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */
2238 radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */
2239 radeon_emit(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */
2240 radeon_emit(cs, cb->cb_dcc_control); /* R_028C78_CB_COLOR0_DCC_CONTROL */
2241 radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */
2242 radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */
2243 radeon_emit(cs, cb->cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */
2244 radeon_emit(cs, cb->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */
2245 radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */
2246 radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */
2247
2248 if (sctx->b.chip_class >= VI)
2249 radeon_emit(cs, 0); /* R_028C94_CB_COLOR0_DCC_BASE */
2250 }
2251 /* set CB_COLOR1_INFO for possible dual-src blending */
2252 if (i == 1 && state->cbufs[0]) {
2253 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
2254 cb->cb_color_info | tex->cb_color_info);
2255 i++;
2256 }
2257 for (; i < 8 ; i++) {
2258 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
2259 }
2260
2261 /* ZS buffer. */
2262 if (state->zsbuf) {
2263 struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
2264 struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
2265
2266 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
2267 &rtex->resource, RADEON_USAGE_READWRITE,
2268 zb->base.texture->nr_samples > 1 ?
2269 RADEON_PRIO_DEPTH_BUFFER_MSAA :
2270 RADEON_PRIO_DEPTH_BUFFER);
2271
2272 if (zb->db_htile_data_base) {
2273 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
2274 rtex->htile_buffer, RADEON_USAGE_READWRITE,
2275 RADEON_PRIO_DEPTH_META);
2276 }
2277
2278 r600_write_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
2279 r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
2280
2281 r600_write_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
2282 radeon_emit(cs, zb->db_depth_info); /* R_02803C_DB_DEPTH_INFO */
2283 radeon_emit(cs, zb->db_z_info | /* R_028040_DB_Z_INFO */
2284 S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0));
2285 radeon_emit(cs, zb->db_stencil_info); /* R_028044_DB_STENCIL_INFO */
2286 radeon_emit(cs, zb->db_depth_base); /* R_028048_DB_Z_READ_BASE */
2287 radeon_emit(cs, zb->db_stencil_base); /* R_02804C_DB_STENCIL_READ_BASE */
2288 radeon_emit(cs, zb->db_depth_base); /* R_028050_DB_Z_WRITE_BASE */
2289 radeon_emit(cs, zb->db_stencil_base); /* R_028054_DB_STENCIL_WRITE_BASE */
2290 radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */
2291 radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */
2292
2293 r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
2294 r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
2295 r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
2296 zb->pa_su_poly_offset_db_fmt_cntl);
2297 } else {
2298 r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
2299 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
2300 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
2301 }
2302
2303 /* Framebuffer dimensions. */
2304 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
2305 r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
2306 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
2307 }
2308
2309 static void si_emit_msaa_sample_locs(struct r600_common_context *rctx,
2310 struct r600_atom *atom)
2311 {
2312 struct si_context *sctx = (struct si_context *)rctx;
2313 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
2314 unsigned nr_samples = sctx->framebuffer.nr_samples;
2315
2316 cayman_emit_msaa_sample_locs(cs, nr_samples > 1 ? nr_samples :
2317 SI_NUM_SMOOTH_AA_SAMPLES);
2318 }
2319
2320 const struct r600_atom si_atom_msaa_sample_locs = { si_emit_msaa_sample_locs, 18 }; /* number of CS dwords */
2321
2322 static void si_emit_msaa_config(struct r600_common_context *rctx, struct r600_atom *atom)
2323 {
2324 struct si_context *sctx = (struct si_context *)rctx;
2325 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
2326
2327 cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
2328 sctx->ps_iter_samples,
2329 sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0);
2330 }
2331
2332 const struct r600_atom si_atom_msaa_config = { si_emit_msaa_config, 10 }; /* number of CS dwords */
2333
2334 static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
2335 {
2336 struct si_context *sctx = (struct si_context *)ctx;
2337
2338 if (sctx->ps_iter_samples == min_samples)
2339 return;
2340
2341 sctx->ps_iter_samples = min_samples;
2342
2343 if (sctx->framebuffer.nr_samples > 1)
2344 si_mark_atom_dirty(sctx, &sctx->msaa_config);
2345 }
2346
2347 /*
2348 * Samplers
2349 */
2350
2351 /**
2352 * Create a sampler view.
2353 *
2354 * @param ctx context
2355 * @param texture texture
2356 * @param state sampler view template
2357 * @param width0 width0 override (for compressed textures as int)
2358 * @param height0 height0 override (for compressed textures as int)
2359 * @param force_level set the base address to the level (for compressed textures)
2360 */
2361 struct pipe_sampler_view *
2362 si_create_sampler_view_custom(struct pipe_context *ctx,
2363 struct pipe_resource *texture,
2364 const struct pipe_sampler_view *state,
2365 unsigned width0, unsigned height0,
2366 unsigned force_level)
2367 {
2368 struct si_context *sctx = (struct si_context*)ctx;
2369 struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view);
2370 struct r600_texture *tmp = (struct r600_texture*)texture;
2371 const struct util_format_description *desc;
2372 unsigned format, num_format, base_level, first_level, last_level;
2373 uint32_t pitch = 0;
2374 unsigned char state_swizzle[4], swizzle[4];
2375 unsigned height, depth, width;
2376 enum pipe_format pipe_format = state->format;
2377 struct radeon_surf_level *surflevel;
2378 int first_non_void;
2379 uint64_t va;
2380
2381 if (view == NULL)
2382 return NULL;
2383
2384 /* initialize base object */
2385 view->base = *state;
2386 view->base.texture = NULL;
2387 view->base.reference.count = 1;
2388 view->base.context = ctx;
2389
2390 /* NULL resource, obey swizzle (only ZERO and ONE make sense). */
2391 if (!texture) {
2392 view->state[3] = S_008F1C_DST_SEL_X(si_map_swizzle(state->swizzle_r)) |
2393 S_008F1C_DST_SEL_Y(si_map_swizzle(state->swizzle_g)) |
2394 S_008F1C_DST_SEL_Z(si_map_swizzle(state->swizzle_b)) |
2395 S_008F1C_DST_SEL_W(si_map_swizzle(state->swizzle_a)) |
2396 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D);
2397 return &view->base;
2398 }
2399
2400 pipe_resource_reference(&view->base.texture, texture);
2401 view->resource = &tmp->resource;
2402
2403 /* Buffer resource. */
2404 if (texture->target == PIPE_BUFFER) {
2405 unsigned stride, num_records;
2406
2407 desc = util_format_description(state->format);
2408 first_non_void = util_format_get_first_non_void_channel(state->format);
2409 stride = desc->block.bits / 8;
2410 va = tmp->resource.gpu_address + state->u.buf.first_element*stride;
2411 format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
2412 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
2413
2414 num_records = state->u.buf.last_element + 1 - state->u.buf.first_element;
2415 num_records = MIN2(num_records, texture->width0 / stride);
2416
2417 if (sctx->b.chip_class >= VI)
2418 num_records *= stride;
2419
2420 view->state[4] = va;
2421 view->state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
2422 S_008F04_STRIDE(stride);
2423 view->state[6] = num_records;
2424 view->state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
2425 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
2426 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
2427 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
2428 S_008F0C_NUM_FORMAT(num_format) |
2429 S_008F0C_DATA_FORMAT(format);
2430
2431 LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers);
2432 return &view->base;
2433 }
2434
2435 state_swizzle[0] = state->swizzle_r;
2436 state_swizzle[1] = state->swizzle_g;
2437 state_swizzle[2] = state->swizzle_b;
2438 state_swizzle[3] = state->swizzle_a;
2439
2440 surflevel = tmp->surface.level;
2441
2442 /* Texturing with separate depth and stencil. */
2443 if (tmp->is_depth && !tmp->is_flushing_texture) {
2444 switch (pipe_format) {
2445 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2446 pipe_format = PIPE_FORMAT_Z32_FLOAT;
2447 break;
2448 case PIPE_FORMAT_X8Z24_UNORM:
2449 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2450 /* Z24 is always stored like this. */
2451 pipe_format = PIPE_FORMAT_Z24X8_UNORM;
2452 break;
2453 case PIPE_FORMAT_X24S8_UINT:
2454 case PIPE_FORMAT_S8X24_UINT:
2455 case PIPE_FORMAT_X32_S8X24_UINT:
2456 pipe_format = PIPE_FORMAT_S8_UINT;
2457 surflevel = tmp->surface.stencil_level;
2458 break;
2459 default:;
2460 }
2461 }
2462
2463 desc = util_format_description(pipe_format);
2464
2465 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
2466 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
2467 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
2468
2469 switch (pipe_format) {
2470 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2471 case PIPE_FORMAT_X24S8_UINT:
2472 case PIPE_FORMAT_X32_S8X24_UINT:
2473 case PIPE_FORMAT_X8Z24_UNORM:
2474 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
2475 break;
2476 default:
2477 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
2478 }
2479 } else {
2480 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
2481 }
2482
2483 first_non_void = util_format_get_first_non_void_channel(pipe_format);
2484
2485 switch (pipe_format) {
2486 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2487 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2488 break;
2489 default:
2490 if (first_non_void < 0) {
2491 if (util_format_is_compressed(pipe_format)) {
2492 switch (pipe_format) {
2493 case PIPE_FORMAT_DXT1_SRGB:
2494 case PIPE_FORMAT_DXT1_SRGBA:
2495 case PIPE_FORMAT_DXT3_SRGBA:
2496 case PIPE_FORMAT_DXT5_SRGBA:
2497 case PIPE_FORMAT_BPTC_SRGBA:
2498 num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2499 break;
2500 case PIPE_FORMAT_RGTC1_SNORM:
2501 case PIPE_FORMAT_LATC1_SNORM:
2502 case PIPE_FORMAT_RGTC2_SNORM:
2503 case PIPE_FORMAT_LATC2_SNORM:
2504 /* implies float, so use SNORM/UNORM to determine
2505 whether data is signed or not */
2506 case PIPE_FORMAT_BPTC_RGB_FLOAT:
2507 num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2508 break;
2509 default:
2510 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2511 break;
2512 }
2513 } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
2514 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2515 } else {
2516 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2517 }
2518 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
2519 num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2520 } else {
2521 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2522
2523 switch (desc->channel[first_non_void].type) {
2524 case UTIL_FORMAT_TYPE_FLOAT:
2525 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2526 break;
2527 case UTIL_FORMAT_TYPE_SIGNED:
2528 if (desc->channel[first_non_void].normalized)
2529 num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2530 else if (desc->channel[first_non_void].pure_integer)
2531 num_format = V_008F14_IMG_NUM_FORMAT_SINT;
2532 else
2533 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;
2534 break;
2535 case UTIL_FORMAT_TYPE_UNSIGNED:
2536 if (desc->channel[first_non_void].normalized)
2537 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2538 else if (desc->channel[first_non_void].pure_integer)
2539 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
2540 else
2541 num_format = V_008F14_IMG_NUM_FORMAT_USCALED;
2542 }
2543 }
2544 }
2545
2546 format = si_translate_texformat(ctx->screen, pipe_format, desc, first_non_void);
2547 if (format == ~0) {
2548 format = 0;
2549 }
2550
2551 base_level = 0;
2552 first_level = state->u.tex.first_level;
2553 last_level = state->u.tex.last_level;
2554 width = width0;
2555 height = height0;
2556 depth = texture->depth0;
2557
2558 if (force_level) {
2559 assert(force_level == first_level &&
2560 force_level == last_level);
2561 base_level = force_level;
2562 first_level = 0;
2563 last_level = 0;
2564 width = u_minify(width, force_level);
2565 height = u_minify(height, force_level);
2566 depth = u_minify(depth, force_level);
2567 }
2568
2569 pitch = surflevel[base_level].nblk_x * util_format_get_blockwidth(pipe_format);
2570
2571 if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
2572 height = 1;
2573 depth = texture->array_size;
2574 } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) {
2575 depth = texture->array_size;
2576 } else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY)
2577 depth = texture->array_size / 6;
2578
2579 va = tmp->resource.gpu_address + surflevel[base_level].offset;
2580
2581 view->state[0] = va >> 8;
2582 view->state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) |
2583 S_008F14_DATA_FORMAT(format) |
2584 S_008F14_NUM_FORMAT(num_format));
2585 view->state[2] = (S_008F18_WIDTH(width - 1) |
2586 S_008F18_HEIGHT(height - 1));
2587 view->state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
2588 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
2589 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
2590 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
2591 S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ?
2592 0 : first_level) |
2593 S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ?
2594 util_logbase2(texture->nr_samples) :
2595 last_level) |
2596 S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, base_level, false)) |
2597 S_008F1C_POW2_PAD(texture->last_level > 0) |
2598 S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples)));
2599 view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
2600 view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
2601 S_008F24_LAST_ARRAY(state->u.tex.last_layer));
2602 view->state[6] = 0;
2603 view->state[7] = 0;
2604
2605 /* Initialize the sampler view for FMASK. */
2606 if (tmp->fmask.size) {
2607 uint64_t va = tmp->resource.gpu_address + tmp->fmask.offset;
2608 uint32_t fmask_format;
2609
2610 switch (texture->nr_samples) {
2611 case 2:
2612 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
2613 break;
2614 case 4:
2615 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
2616 break;
2617 case 8:
2618 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
2619 break;
2620 default:
2621 assert(0);
2622 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
2623 }
2624
2625 view->fmask_state[0] = va >> 8;
2626 view->fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
2627 S_008F14_DATA_FORMAT(fmask_format) |
2628 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT);
2629 view->fmask_state[2] = S_008F18_WIDTH(width - 1) |
2630 S_008F18_HEIGHT(height - 1);
2631 view->fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
2632 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
2633 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
2634 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
2635 S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) |
2636 S_008F1C_TYPE(si_tex_dim(texture->target, 0));
2637 view->fmask_state[4] = S_008F20_DEPTH(depth - 1) |
2638 S_008F20_PITCH(tmp->fmask.pitch - 1);
2639 view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
2640 S_008F24_LAST_ARRAY(state->u.tex.last_layer);
2641 view->fmask_state[6] = 0;
2642 view->fmask_state[7] = 0;
2643 }
2644
2645 return &view->base;
2646 }
2647
2648 static struct pipe_sampler_view *
2649 si_create_sampler_view(struct pipe_context *ctx,
2650 struct pipe_resource *texture,
2651 const struct pipe_sampler_view *state)
2652 {
2653 return si_create_sampler_view_custom(ctx, texture, state,
2654 texture ? texture->width0 : 0,
2655 texture ? texture->height0 : 0, 0);
2656 }
2657
2658 static void si_sampler_view_destroy(struct pipe_context *ctx,
2659 struct pipe_sampler_view *state)
2660 {
2661 struct si_sampler_view *view = (struct si_sampler_view *)state;
2662
2663 if (view->resource && view->resource->b.b.target == PIPE_BUFFER)
2664 LIST_DELINIT(&view->list);
2665
2666 pipe_resource_reference(&state->texture, NULL);
2667 FREE(view);
2668 }
2669
2670 static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
2671 {
2672 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
2673 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
2674 (linear_filter &&
2675 (wrap == PIPE_TEX_WRAP_CLAMP ||
2676 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
2677 }
2678
2679 static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
2680 {
2681 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
2682 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
2683
2684 return (state->border_color.ui[0] || state->border_color.ui[1] ||
2685 state->border_color.ui[2] || state->border_color.ui[3]) &&
2686 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) ||
2687 wrap_mode_uses_border_color(state->wrap_t, linear_filter) ||
2688 wrap_mode_uses_border_color(state->wrap_r, linear_filter));
2689 }
2690
2691 static void *si_create_sampler_state(struct pipe_context *ctx,
2692 const struct pipe_sampler_state *state)
2693 {
2694 struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
2695 unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
2696 unsigned border_color_type;
2697
2698 if (rstate == NULL) {
2699 return NULL;
2700 }
2701
2702 if (sampler_state_needs_border_color(state))
2703 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
2704 else
2705 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2706
2707 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
2708 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
2709 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
2710 r600_tex_aniso_filter(state->max_anisotropy) << 9 |
2711 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
2712 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
2713 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map));
2714 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
2715 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)));
2716 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
2717 S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
2718 S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) |
2719 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
2720 rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type);
2721
2722 if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
2723 memcpy(rstate->border_color, state->border_color.ui,
2724 sizeof(rstate->border_color));
2725 }
2726
2727 return rstate;
2728 }
2729
2730 /* Upload border colors and update the pointers in resource descriptors.
2731 * There can only be 4096 border colors per context.
2732 *
2733 * XXX: This is broken if the buffer gets reallocated.
2734 */
2735 static void si_set_border_colors(struct si_context *sctx, unsigned count,
2736 void **states)
2737 {
2738 struct si_sampler_state **rstates = (struct si_sampler_state **)states;
2739 uint32_t *border_color_table = NULL;
2740 int i, j;
2741
2742 for (i = 0; i < count; i++) {
2743 if (rstates[i] &&
2744 G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) ==
2745 V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
2746 if (!sctx->border_color_table ||
2747 ((sctx->border_color_offset + count - i) &
2748 C_008F3C_BORDER_COLOR_PTR)) {
2749 r600_resource_reference(&sctx->border_color_table, NULL);
2750 sctx->border_color_offset = 0;
2751
2752 sctx->border_color_table =
2753 si_resource_create_custom(&sctx->screen->b.b,
2754 PIPE_USAGE_DYNAMIC,
2755 4096 * 4 * 4);
2756 }
2757
2758 if (!border_color_table) {
2759 border_color_table =
2760 sctx->b.ws->buffer_map(sctx->border_color_table->cs_buf,
2761 sctx->b.rings.gfx.cs,
2762 PIPE_TRANSFER_WRITE |
2763 PIPE_TRANSFER_UNSYNCHRONIZED);
2764 }
2765
2766 for (j = 0; j < 4; j++) {
2767 border_color_table[4 * sctx->border_color_offset + j] =
2768 util_le32_to_cpu(rstates[i]->border_color[j]);
2769 }
2770
2771 rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR;
2772 rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++);
2773 }
2774 }
2775
2776 if (border_color_table) {
2777 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
2778
2779 uint64_t va_offset = sctx->border_color_table->gpu_address;
2780
2781 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8);
2782 if (sctx->b.chip_class >= CIK)
2783 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40);
2784 si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ,
2785 RADEON_PRIO_SHADER_DATA);
2786 si_pm4_set_state(sctx, ta_bordercolor_base, pm4);
2787 }
2788 }
2789
2790 static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
2791 unsigned start, unsigned count,
2792 void **states)
2793 {
2794 struct si_context *sctx = (struct si_context *)ctx;
2795
2796 if (!count || shader >= SI_NUM_SHADERS)
2797 return;
2798
2799 si_set_border_colors(sctx, count, states);
2800 si_set_sampler_descriptors(sctx, shader, start, count, states);
2801 }
2802
2803 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
2804 {
2805 struct si_context *sctx = (struct si_context *)ctx;
2806 struct si_state_sample_mask *state = CALLOC_STRUCT(si_state_sample_mask);
2807 struct si_pm4_state *pm4 = &state->pm4;
2808 uint16_t mask = sample_mask;
2809
2810 if (state == NULL)
2811 return;
2812
2813 state->sample_mask = mask;
2814 si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16));
2815 si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16));
2816
2817 si_pm4_set_state(sctx, sample_mask, state);
2818 }
2819
2820 static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
2821 {
2822 free(state);
2823 }
2824
2825 /*
2826 * Vertex elements & buffers
2827 */
2828
2829 static void *si_create_vertex_elements(struct pipe_context *ctx,
2830 unsigned count,
2831 const struct pipe_vertex_element *elements)
2832 {
2833 struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
2834 int i;
2835
2836 assert(count < SI_MAX_ATTRIBS);
2837 if (!v)
2838 return NULL;
2839
2840 v->count = count;
2841 for (i = 0; i < count; ++i) {
2842 const struct util_format_description *desc;
2843 unsigned data_format, num_format;
2844 int first_non_void;
2845
2846 desc = util_format_description(elements[i].src_format);
2847 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
2848 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
2849 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
2850
2851 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
2852 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
2853 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
2854 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
2855 S_008F0C_NUM_FORMAT(num_format) |
2856 S_008F0C_DATA_FORMAT(data_format);
2857 v->format_size[i] = desc->block.bits / 8;
2858 }
2859 memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
2860
2861 return v;
2862 }
2863
2864 static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
2865 {
2866 struct si_context *sctx = (struct si_context *)ctx;
2867 struct si_vertex_element *v = (struct si_vertex_element*)state;
2868
2869 sctx->vertex_elements = v;
2870 sctx->vertex_buffers_dirty = true;
2871 }
2872
2873 static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
2874 {
2875 struct si_context *sctx = (struct si_context *)ctx;
2876
2877 if (sctx->vertex_elements == state)
2878 sctx->vertex_elements = NULL;
2879 FREE(state);
2880 }
2881
2882 static void si_set_vertex_buffers(struct pipe_context *ctx,
2883 unsigned start_slot, unsigned count,
2884 const struct pipe_vertex_buffer *buffers)
2885 {
2886 struct si_context *sctx = (struct si_context *)ctx;
2887 struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot;
2888 int i;
2889
2890 assert(start_slot + count <= Elements(sctx->vertex_buffer));
2891
2892 if (buffers) {
2893 for (i = 0; i < count; i++) {
2894 const struct pipe_vertex_buffer *src = buffers + i;
2895 struct pipe_vertex_buffer *dsti = dst + i;
2896
2897 pipe_resource_reference(&dsti->buffer, src->buffer);
2898 dsti->buffer_offset = src->buffer_offset;
2899 dsti->stride = src->stride;
2900 r600_context_add_resource_size(ctx, src->buffer);
2901 }
2902 } else {
2903 for (i = 0; i < count; i++) {
2904 pipe_resource_reference(&dst[i].buffer, NULL);
2905 }
2906 }
2907 sctx->vertex_buffers_dirty = true;
2908 }
2909
2910 static void si_set_index_buffer(struct pipe_context *ctx,
2911 const struct pipe_index_buffer *ib)
2912 {
2913 struct si_context *sctx = (struct si_context *)ctx;
2914
2915 if (ib) {
2916 pipe_resource_reference(&sctx->index_buffer.buffer, ib->buffer);
2917 memcpy(&sctx->index_buffer, ib, sizeof(*ib));
2918 r600_context_add_resource_size(ctx, ib->buffer);
2919 } else {
2920 pipe_resource_reference(&sctx->index_buffer.buffer, NULL);
2921 }
2922 }
2923
2924 /*
2925 * Misc
2926 */
2927 static void si_set_polygon_stipple(struct pipe_context *ctx,
2928 const struct pipe_poly_stipple *state)
2929 {
2930 struct si_context *sctx = (struct si_context *)ctx;
2931 struct pipe_resource *tex;
2932 struct pipe_sampler_view *view;
2933 bool is_zero = true;
2934 bool is_one = true;
2935 int i;
2936
2937 /* The hardware obeys 0 and 1 swizzles in the descriptor even if
2938 * the resource is NULL/invalid. Take advantage of this fact and skip
2939 * texture allocation if the stipple pattern is constant.
2940 *
2941 * This is an optimization for the common case when stippling isn't
2942 * used but set_polygon_stipple is still called by st/mesa.
2943 */
2944 for (i = 0; i < Elements(state->stipple); i++) {
2945 is_zero = is_zero && state->stipple[i] == 0;
2946 is_one = is_one && state->stipple[i] == 0xffffffff;
2947 }
2948
2949 if (is_zero || is_one) {
2950 struct pipe_sampler_view templ = {{0}};
2951
2952 templ.swizzle_r = PIPE_SWIZZLE_ZERO;
2953 templ.swizzle_g = PIPE_SWIZZLE_ZERO;
2954 templ.swizzle_b = PIPE_SWIZZLE_ZERO;
2955 /* The pattern should be inverted in the texture. */
2956 templ.swizzle_a = is_zero ? PIPE_SWIZZLE_ONE : PIPE_SWIZZLE_ZERO;
2957
2958 view = ctx->create_sampler_view(ctx, NULL, &templ);
2959 } else {
2960 /* Create a new texture. */
2961 tex = util_pstipple_create_stipple_texture(ctx, state->stipple);
2962 if (!tex)
2963 return;
2964
2965 view = util_pstipple_create_sampler_view(ctx, tex);
2966 pipe_resource_reference(&tex, NULL);
2967 }
2968
2969 ctx->set_sampler_views(ctx, PIPE_SHADER_FRAGMENT,
2970 SI_POLY_STIPPLE_SAMPLER, 1, &view);
2971 pipe_sampler_view_reference(&view, NULL);
2972
2973 /* Bind the sampler state if needed. */
2974 if (!sctx->pstipple_sampler_state) {
2975 sctx->pstipple_sampler_state = util_pstipple_create_sampler(ctx);
2976 ctx->bind_sampler_states(ctx, PIPE_SHADER_FRAGMENT,
2977 SI_POLY_STIPPLE_SAMPLER, 1,
2978 &sctx->pstipple_sampler_state);
2979 }
2980 }
2981
2982 static void si_set_tess_state(struct pipe_context *ctx,
2983 const float default_outer_level[4],
2984 const float default_inner_level[2])
2985 {
2986 struct si_context *sctx = (struct si_context *)ctx;
2987 struct pipe_constant_buffer cb;
2988 float array[8];
2989
2990 memcpy(array, default_outer_level, sizeof(float) * 4);
2991 memcpy(array+4, default_inner_level, sizeof(float) * 2);
2992
2993 cb.buffer = NULL;
2994 cb.user_buffer = NULL;
2995 cb.buffer_size = sizeof(array);
2996
2997 si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer,
2998 (void*)array, sizeof(array),
2999 &cb.buffer_offset);
3000
3001 ctx->set_constant_buffer(ctx, PIPE_SHADER_TESS_CTRL,
3002 SI_DRIVER_STATE_CONST_BUF, &cb);
3003 pipe_resource_reference(&cb.buffer, NULL);
3004 }
3005
3006 static void si_texture_barrier(struct pipe_context *ctx)
3007 {
3008 struct si_context *sctx = (struct si_context *)ctx;
3009
3010 sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
3011 SI_CONTEXT_INV_TC_L2 |
3012 SI_CONTEXT_FLUSH_AND_INV_CB;
3013 }
3014
3015 static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
3016 {
3017 struct pipe_blend_state blend;
3018
3019 memset(&blend, 0, sizeof(blend));
3020 blend.independent_blend_enable = true;
3021 blend.rt[0].colormask = 0xf;
3022 return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
3023 }
3024
3025 static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
3026 bool include_draw_vbo)
3027 {
3028 si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo);
3029 }
3030
3031 static void si_init_config(struct si_context *sctx);
3032
3033 void si_init_state_functions(struct si_context *sctx)
3034 {
3035 si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0);
3036 si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
3037 si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6);
3038 si_init_atom(sctx, &sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors, 16*4);
3039 si_init_atom(sctx, &sctx->viewports.atom, &sctx->atoms.s.viewports, si_emit_viewports, 16*8);
3040
3041 sctx->b.b.create_blend_state = si_create_blend_state;
3042 sctx->b.b.bind_blend_state = si_bind_blend_state;
3043 sctx->b.b.delete_blend_state = si_delete_blend_state;
3044 sctx->b.b.set_blend_color = si_set_blend_color;
3045
3046 sctx->b.b.create_rasterizer_state = si_create_rs_state;
3047 sctx->b.b.bind_rasterizer_state = si_bind_rs_state;
3048 sctx->b.b.delete_rasterizer_state = si_delete_rs_state;
3049
3050 sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state;
3051 sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
3052 sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
3053
3054 sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);
3055 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
3056 sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);
3057 sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);
3058
3059 sctx->b.b.set_clip_state = si_set_clip_state;
3060 sctx->b.b.set_scissor_states = si_set_scissor_states;
3061 sctx->b.b.set_viewport_states = si_set_viewport_states;
3062 sctx->b.b.set_stencil_ref = si_set_pipe_stencil_ref;
3063
3064 sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
3065 sctx->b.b.get_sample_position = cayman_get_sample_position;
3066
3067 sctx->b.b.create_sampler_state = si_create_sampler_state;
3068 sctx->b.b.bind_sampler_states = si_bind_sampler_states;
3069 sctx->b.b.delete_sampler_state = si_delete_sampler_state;
3070
3071 sctx->b.b.create_sampler_view = si_create_sampler_view;
3072 sctx->b.b.sampler_view_destroy = si_sampler_view_destroy;
3073
3074 sctx->b.b.set_sample_mask = si_set_sample_mask;
3075
3076 sctx->b.b.create_vertex_elements_state = si_create_vertex_elements;
3077 sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements;
3078 sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element;
3079 sctx->b.b.set_vertex_buffers = si_set_vertex_buffers;
3080 sctx->b.b.set_index_buffer = si_set_index_buffer;
3081
3082 sctx->b.b.texture_barrier = si_texture_barrier;
3083 sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
3084 sctx->b.b.set_min_samples = si_set_min_samples;
3085 sctx->b.b.set_tess_state = si_set_tess_state;
3086
3087 sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
3088 sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
3089
3090 sctx->b.b.draw_vbo = si_draw_vbo;
3091
3092 if (sctx->b.chip_class >= CIK) {
3093 sctx->b.dma_copy = cik_sdma_copy;
3094 } else {
3095 sctx->b.dma_copy = si_dma_copy;
3096 }
3097
3098 si_init_config(sctx);
3099 }
3100
3101 static void
3102 si_write_harvested_raster_configs(struct si_context *sctx,
3103 struct si_pm4_state *pm4,
3104 unsigned raster_config,
3105 unsigned raster_config_1)
3106 {
3107 unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1);
3108 unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1);
3109 unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
3110 unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
3111 unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2);
3112 unsigned rb_per_se = num_rb / num_se;
3113 unsigned se_mask[4];
3114 unsigned se;
3115
3116 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3117 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3118 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3119 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3120
3121 assert(num_se == 1 || num_se == 2 || num_se == 4);
3122 assert(sh_per_se == 1 || sh_per_se == 2);
3123 assert(rb_per_pkr == 1 || rb_per_pkr == 2);
3124
3125 /* XXX: I can't figure out what the *_XSEL and *_YSEL
3126 * fields are for, so I'm leaving them as their default
3127 * values. */
3128
3129 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3130 (!se_mask[2] && !se_mask[3]))) {
3131 raster_config_1 &= C_028354_SE_PAIR_MAP;
3132
3133 if (!se_mask[0] && !se_mask[1]) {
3134 raster_config_1 |=
3135 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3);
3136 } else {
3137 raster_config_1 |=
3138 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0);
3139 }
3140 }
3141
3142 for (se = 0; se < num_se; se++) {
3143 unsigned raster_config_se = raster_config;
3144 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3145 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3146 int idx = (se / 2) * 2;
3147
3148 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3149 raster_config_se &= C_028350_SE_MAP;
3150
3151 if (!se_mask[idx]) {
3152 raster_config_se |=
3153 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
3154 } else {
3155 raster_config_se |=
3156 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
3157 }
3158 }
3159
3160 pkr0_mask &= rb_mask;
3161 pkr1_mask &= rb_mask;
3162 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3163 raster_config_se &= C_028350_PKR_MAP;
3164
3165 if (!pkr0_mask) {
3166 raster_config_se |=
3167 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3);
3168 } else {
3169 raster_config_se |=
3170 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0);
3171 }
3172 }
3173
3174 if (rb_per_se >= 2) {
3175 unsigned rb0_mask = 1 << (se * rb_per_se);
3176 unsigned rb1_mask = rb0_mask << 1;
3177
3178 rb0_mask &= rb_mask;
3179 rb1_mask &= rb_mask;
3180 if (!rb0_mask || !rb1_mask) {
3181 raster_config_se &= C_028350_RB_MAP_PKR0;
3182
3183 if (!rb0_mask) {
3184 raster_config_se |=
3185 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3);
3186 } else {
3187 raster_config_se |=
3188 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0);
3189 }
3190 }
3191
3192 if (rb_per_se > 2) {
3193 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3194 rb1_mask = rb0_mask << 1;
3195 rb0_mask &= rb_mask;
3196 rb1_mask &= rb_mask;
3197 if (!rb0_mask || !rb1_mask) {
3198 raster_config_se &= C_028350_RB_MAP_PKR1;
3199
3200 if (!rb0_mask) {
3201 raster_config_se |=
3202 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3);
3203 } else {
3204 raster_config_se |=
3205 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0);
3206 }
3207 }
3208 }
3209 }
3210
3211 /* GRBM_GFX_INDEX is privileged on VI */
3212 if (sctx->b.chip_class <= CIK)
3213 si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
3214 SE_INDEX(se) | SH_BROADCAST_WRITES |
3215 INSTANCE_BROADCAST_WRITES);
3216 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
3217 if (sctx->b.chip_class >= CIK)
3218 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
3219 }
3220
3221 /* GRBM_GFX_INDEX is privileged on VI */
3222 if (sctx->b.chip_class <= CIK)
3223 si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
3224 SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
3225 INSTANCE_BROADCAST_WRITES);
3226 }
3227
3228 static void si_init_config(struct si_context *sctx)
3229 {
3230 unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
3231 unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
3232 unsigned raster_config, raster_config_1;
3233 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
3234 int i;
3235
3236 if (pm4 == NULL)
3237 return;
3238
3239 si_cmd_context_control(pm4);
3240
3241 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
3242 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
3243
3244 /* FIXME calculate these values somehow ??? */
3245 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80);
3246 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
3247 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
3248
3249 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
3250 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0);
3251 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
3252
3253 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
3254 si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0);
3255 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
3256 if (sctx->b.chip_class < CIK)
3257 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
3258 S_008A14_CLIP_VTX_REORDER_ENA(1));
3259
3260 si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
3261 si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
3262
3263 si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
3264
3265 for (i = 0; i < 16; i++) {
3266 si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0);
3267 si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0));
3268 }
3269
3270 switch (sctx->screen->b.family) {
3271 case CHIP_TAHITI:
3272 case CHIP_PITCAIRN:
3273 raster_config = 0x2a00126a;
3274 raster_config_1 = 0x00000000;
3275 break;
3276 case CHIP_VERDE:
3277 raster_config = 0x0000124a;
3278 raster_config_1 = 0x00000000;
3279 break;
3280 case CHIP_OLAND:
3281 raster_config = 0x00000082;
3282 raster_config_1 = 0x00000000;
3283 break;
3284 case CHIP_HAINAN:
3285 raster_config = 0x00000000;
3286 raster_config_1 = 0x00000000;
3287 break;
3288 case CHIP_BONAIRE:
3289 raster_config = 0x16000012;
3290 raster_config_1 = 0x00000000;
3291 break;
3292 case CHIP_HAWAII:
3293 raster_config = 0x3a00161a;
3294 raster_config_1 = 0x0000002e;
3295 break;
3296 case CHIP_FIJI:
3297 /* Fiji should be same as Hawaii, but that causes corruption in some cases */
3298 raster_config = 0x16000012; /* 0x3a00161a */
3299 raster_config_1 = 0x0000002a; /* 0x0000002e */
3300 break;
3301 case CHIP_TONGA:
3302 raster_config = 0x16000012;
3303 raster_config_1 = 0x0000002a;
3304 break;
3305 case CHIP_ICELAND:
3306 raster_config = 0x00000002;
3307 raster_config_1 = 0x00000000;
3308 break;
3309 case CHIP_CARRIZO:
3310 raster_config = 0x00000002;
3311 raster_config_1 = 0x00000000;
3312 break;
3313 case CHIP_KAVERI:
3314 /* KV should be 0x00000002, but that causes problems with radeon */
3315 raster_config = 0x00000000; /* 0x00000002 */
3316 raster_config_1 = 0x00000000;
3317 break;
3318 case CHIP_KABINI:
3319 case CHIP_MULLINS:
3320 raster_config = 0x00000000;
3321 raster_config_1 = 0x00000000;
3322 break;
3323 default:
3324 fprintf(stderr,
3325 "radeonsi: Unknown GPU, using 0 for raster_config\n");
3326 raster_config = 0x00000000;
3327 raster_config_1 = 0x00000000;
3328 break;
3329 }
3330
3331 /* Always use the default config when all backends are enabled
3332 * (or when we failed to determine the enabled backends).
3333 */
3334 if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
3335 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
3336 raster_config);
3337 if (sctx->b.chip_class >= CIK)
3338 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1,
3339 raster_config_1);
3340 } else {
3341 si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1);
3342 }
3343
3344 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
3345 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
3346 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
3347 S_028244_BR_X(16384) | S_028244_BR_Y(16384));
3348 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
3349 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
3350 S_028034_BR_X(16384) | S_028034_BR_Y(16384));
3351
3352 si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
3353 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
3354 /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
3355 si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
3356 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
3357 si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0));
3358 si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
3359 si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0));
3360 si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0));
3361 si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0);
3362 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
3363 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
3364 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
3365
3366 /* There is a hang if stencil is used and fast stencil is enabled
3367 * regardless of whether HTILE is depth-only or not.
3368 */
3369 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE,
3370 S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
3371 S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) |
3372 S_02800C_FAST_STENCIL_DISABLE(1));
3373
3374 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
3375 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
3376 si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
3377
3378 if (sctx->b.chip_class >= CIK) {
3379 si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffc));
3380 si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
3381 si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xfffe));
3382 si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
3383 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
3384 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0));
3385 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
3386 }
3387
3388 if (sctx->b.chip_class >= VI) {
3389 si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
3390 S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1));
3391 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
3392 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
3393 }
3394
3395 sctx->init_config = pm4;
3396 }