radeonsi: merge si_pipe_shader into si_shader
[mesa.git] / src / gallium / drivers / radeonsi / si_state.c
1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Christian König <christian.koenig@amd.com>
25 */
26
27 #include "si_pipe.h"
28 #include "si_shader.h"
29 #include "sid.h"
30 #include "radeon/r600_cs.h"
31
32 #include "tgsi/tgsi_parse.h"
33 #include "tgsi/tgsi_scan.h"
34 #include "util/u_format.h"
35 #include "util/u_format_s3tc.h"
36 #include "util/u_framebuffer.h"
37 #include "util/u_helpers.h"
38 #include "util/u_memory.h"
39 #include "util/u_simple_shaders.h"
40
41 static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem,
42 void (*emit)(struct si_context *ctx, struct r600_atom *state),
43 unsigned num_dw)
44 {
45 atom->emit = (void*)emit;
46 atom->num_dw = num_dw;
47 atom->dirty = false;
48 *list_elem = atom;
49 }
50
51 uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex)
52 {
53 if (sscreen->b.chip_class == CIK &&
54 sscreen->b.info.cik_macrotile_mode_array_valid) {
55 unsigned index, tileb;
56
57 tileb = 8 * 8 * tex->surface.bpe;
58 tileb = MIN2(tex->surface.tile_split, tileb);
59
60 for (index = 0; tileb > 64; index++) {
61 tileb >>= 1;
62 }
63 assert(index < 16);
64
65 return (sscreen->b.info.cik_macrotile_mode_array[index] >> 6) & 0x3;
66 }
67
68 if (sscreen->b.chip_class == SI &&
69 sscreen->b.info.si_tile_mode_array_valid) {
70 /* Don't use stencil_tiling_index, because num_banks is always
71 * read from the depth mode. */
72 unsigned tile_mode_index = tex->surface.tiling_index[0];
73 assert(tile_mode_index < 32);
74
75 return G_009910_NUM_BANKS(sscreen->b.info.si_tile_mode_array[tile_mode_index]);
76 }
77
78 /* The old way. */
79 switch (sscreen->b.tiling_info.num_banks) {
80 case 2:
81 return V_02803C_ADDR_SURF_2_BANK;
82 case 4:
83 return V_02803C_ADDR_SURF_4_BANK;
84 case 8:
85 default:
86 return V_02803C_ADDR_SURF_8_BANK;
87 case 16:
88 return V_02803C_ADDR_SURF_16_BANK;
89 }
90 }
91
92 unsigned cik_tile_split(unsigned tile_split)
93 {
94 switch (tile_split) {
95 case 64:
96 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_64B;
97 break;
98 case 128:
99 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_128B;
100 break;
101 case 256:
102 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_256B;
103 break;
104 case 512:
105 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_512B;
106 break;
107 default:
108 case 1024:
109 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_1KB;
110 break;
111 case 2048:
112 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_2KB;
113 break;
114 case 4096:
115 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_4KB;
116 break;
117 }
118 return tile_split;
119 }
120
121 unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect)
122 {
123 switch (macro_tile_aspect) {
124 default:
125 case 1:
126 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_1;
127 break;
128 case 2:
129 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_2;
130 break;
131 case 4:
132 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_4;
133 break;
134 case 8:
135 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_8;
136 break;
137 }
138 return macro_tile_aspect;
139 }
140
141 unsigned cik_bank_wh(unsigned bankwh)
142 {
143 switch (bankwh) {
144 default:
145 case 1:
146 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_1;
147 break;
148 case 2:
149 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_2;
150 break;
151 case 4:
152 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_4;
153 break;
154 case 8:
155 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_8;
156 break;
157 }
158 return bankwh;
159 }
160
161 unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode)
162 {
163 if (sscreen->b.info.si_tile_mode_array_valid) {
164 uint32_t gb_tile_mode = sscreen->b.info.si_tile_mode_array[tile_mode];
165
166 return G_009910_PIPE_CONFIG(gb_tile_mode);
167 }
168
169 /* This is probably broken for a lot of chips, but it's only used
170 * if the kernel cannot return the tile mode array for CIK. */
171 switch (sscreen->b.info.r600_num_tile_pipes) {
172 case 16:
173 return V_02803C_X_ADDR_SURF_P16_32X32_16X16;
174 case 8:
175 return V_02803C_X_ADDR_SURF_P8_32X32_16X16;
176 case 4:
177 default:
178 if (sscreen->b.info.r600_num_backends == 4)
179 return V_02803C_X_ADDR_SURF_P4_16X16;
180 else
181 return V_02803C_X_ADDR_SURF_P4_8X16;
182 case 2:
183 return V_02803C_ADDR_SURF_P2;
184 }
185 }
186
187 static unsigned si_map_swizzle(unsigned swizzle)
188 {
189 switch (swizzle) {
190 case UTIL_FORMAT_SWIZZLE_Y:
191 return V_008F0C_SQ_SEL_Y;
192 case UTIL_FORMAT_SWIZZLE_Z:
193 return V_008F0C_SQ_SEL_Z;
194 case UTIL_FORMAT_SWIZZLE_W:
195 return V_008F0C_SQ_SEL_W;
196 case UTIL_FORMAT_SWIZZLE_0:
197 return V_008F0C_SQ_SEL_0;
198 case UTIL_FORMAT_SWIZZLE_1:
199 return V_008F0C_SQ_SEL_1;
200 default: /* UTIL_FORMAT_SWIZZLE_X */
201 return V_008F0C_SQ_SEL_X;
202 }
203 }
204
205 static uint32_t S_FIXED(float value, uint32_t frac_bits)
206 {
207 return value * (1 << frac_bits);
208 }
209
210 /* 12.4 fixed-point */
211 static unsigned si_pack_float_12p4(float x)
212 {
213 return x <= 0 ? 0 :
214 x >= 4096 ? 0xffff : x * 16;
215 }
216
217 /*
218 * inferred framebuffer and blender state
219 */
220 static void si_update_fb_blend_state(struct si_context *sctx)
221 {
222 struct si_pm4_state *pm4;
223 struct si_state_blend *blend = sctx->queued.named.blend;
224 uint32_t mask;
225
226 if (blend == NULL)
227 return;
228
229 pm4 = si_pm4_alloc_state(sctx);
230 if (pm4 == NULL)
231 return;
232
233 mask = (1ULL << ((unsigned)sctx->framebuffer.state.nr_cbufs * 4)) - 1;
234 mask &= blend->cb_target_mask;
235 si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
236
237 si_pm4_set_state(sctx, fb_blend, pm4);
238 }
239
240 /*
241 * Blender functions
242 */
243
244 static uint32_t si_translate_blend_function(int blend_func)
245 {
246 switch (blend_func) {
247 case PIPE_BLEND_ADD:
248 return V_028780_COMB_DST_PLUS_SRC;
249 case PIPE_BLEND_SUBTRACT:
250 return V_028780_COMB_SRC_MINUS_DST;
251 case PIPE_BLEND_REVERSE_SUBTRACT:
252 return V_028780_COMB_DST_MINUS_SRC;
253 case PIPE_BLEND_MIN:
254 return V_028780_COMB_MIN_DST_SRC;
255 case PIPE_BLEND_MAX:
256 return V_028780_COMB_MAX_DST_SRC;
257 default:
258 R600_ERR("Unknown blend function %d\n", blend_func);
259 assert(0);
260 break;
261 }
262 return 0;
263 }
264
265 static uint32_t si_translate_blend_factor(int blend_fact)
266 {
267 switch (blend_fact) {
268 case PIPE_BLENDFACTOR_ONE:
269 return V_028780_BLEND_ONE;
270 case PIPE_BLENDFACTOR_SRC_COLOR:
271 return V_028780_BLEND_SRC_COLOR;
272 case PIPE_BLENDFACTOR_SRC_ALPHA:
273 return V_028780_BLEND_SRC_ALPHA;
274 case PIPE_BLENDFACTOR_DST_ALPHA:
275 return V_028780_BLEND_DST_ALPHA;
276 case PIPE_BLENDFACTOR_DST_COLOR:
277 return V_028780_BLEND_DST_COLOR;
278 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
279 return V_028780_BLEND_SRC_ALPHA_SATURATE;
280 case PIPE_BLENDFACTOR_CONST_COLOR:
281 return V_028780_BLEND_CONSTANT_COLOR;
282 case PIPE_BLENDFACTOR_CONST_ALPHA:
283 return V_028780_BLEND_CONSTANT_ALPHA;
284 case PIPE_BLENDFACTOR_ZERO:
285 return V_028780_BLEND_ZERO;
286 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
287 return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
288 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
289 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
290 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
291 return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
292 case PIPE_BLENDFACTOR_INV_DST_COLOR:
293 return V_028780_BLEND_ONE_MINUS_DST_COLOR;
294 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
295 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
296 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
297 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
298 case PIPE_BLENDFACTOR_SRC1_COLOR:
299 return V_028780_BLEND_SRC1_COLOR;
300 case PIPE_BLENDFACTOR_SRC1_ALPHA:
301 return V_028780_BLEND_SRC1_ALPHA;
302 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
303 return V_028780_BLEND_INV_SRC1_COLOR;
304 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
305 return V_028780_BLEND_INV_SRC1_ALPHA;
306 default:
307 R600_ERR("Bad blend factor %d not supported!\n", blend_fact);
308 assert(0);
309 break;
310 }
311 return 0;
312 }
313
314 static void *si_create_blend_state_mode(struct pipe_context *ctx,
315 const struct pipe_blend_state *state,
316 unsigned mode)
317 {
318 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
319 struct si_pm4_state *pm4 = &blend->pm4;
320
321 uint32_t color_control = 0;
322
323 if (blend == NULL)
324 return NULL;
325
326 blend->alpha_to_one = state->alpha_to_one;
327
328 if (state->logicop_enable) {
329 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
330 } else {
331 color_control |= S_028808_ROP3(0xcc);
332 }
333
334 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
335 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
336 S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
337 S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
338 S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
339 S_028B70_ALPHA_TO_MASK_OFFSET3(2));
340
341 blend->cb_target_mask = 0;
342 for (int i = 0; i < 8; i++) {
343 /* state->rt entries > 0 only written if independent blending */
344 const int j = state->independent_blend_enable ? i : 0;
345
346 unsigned eqRGB = state->rt[j].rgb_func;
347 unsigned srcRGB = state->rt[j].rgb_src_factor;
348 unsigned dstRGB = state->rt[j].rgb_dst_factor;
349 unsigned eqA = state->rt[j].alpha_func;
350 unsigned srcA = state->rt[j].alpha_src_factor;
351 unsigned dstA = state->rt[j].alpha_dst_factor;
352
353 unsigned blend_cntl = 0;
354
355 /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */
356 blend->cb_target_mask |= state->rt[j].colormask << (4 * i);
357
358 if (!state->rt[j].blend_enable) {
359 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
360 continue;
361 }
362
363 blend_cntl |= S_028780_ENABLE(1);
364 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
365 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
366 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
367
368 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
369 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
370 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
371 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
372 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
373 }
374 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
375 }
376
377 if (blend->cb_target_mask) {
378 color_control |= S_028808_MODE(mode);
379 } else {
380 color_control |= S_028808_MODE(V_028808_CB_DISABLE);
381 }
382 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
383
384 return blend;
385 }
386
387 static void *si_create_blend_state(struct pipe_context *ctx,
388 const struct pipe_blend_state *state)
389 {
390 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
391 }
392
393 static void si_bind_blend_state(struct pipe_context *ctx, void *state)
394 {
395 struct si_context *sctx = (struct si_context *)ctx;
396 si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
397 si_update_fb_blend_state(sctx);
398 }
399
400 static void si_delete_blend_state(struct pipe_context *ctx, void *state)
401 {
402 struct si_context *sctx = (struct si_context *)ctx;
403 si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
404 }
405
406 static void si_set_blend_color(struct pipe_context *ctx,
407 const struct pipe_blend_color *state)
408 {
409 struct si_context *sctx = (struct si_context *)ctx;
410 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
411
412 if (pm4 == NULL)
413 return;
414
415 si_pm4_set_reg(pm4, R_028414_CB_BLEND_RED, fui(state->color[0]));
416 si_pm4_set_reg(pm4, R_028418_CB_BLEND_GREEN, fui(state->color[1]));
417 si_pm4_set_reg(pm4, R_02841C_CB_BLEND_BLUE, fui(state->color[2]));
418 si_pm4_set_reg(pm4, R_028420_CB_BLEND_ALPHA, fui(state->color[3]));
419
420 si_pm4_set_state(sctx, blend_color, pm4);
421 }
422
423 /*
424 * Clipping, scissors and viewport
425 */
426
427 static void si_set_clip_state(struct pipe_context *ctx,
428 const struct pipe_clip_state *state)
429 {
430 struct si_context *sctx = (struct si_context *)ctx;
431 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
432 struct pipe_constant_buffer cb;
433
434 if (pm4 == NULL)
435 return;
436
437 for (int i = 0; i < 6; i++) {
438 si_pm4_set_reg(pm4, R_0285BC_PA_CL_UCP_0_X + i * 16,
439 fui(state->ucp[i][0]));
440 si_pm4_set_reg(pm4, R_0285C0_PA_CL_UCP_0_Y + i * 16,
441 fui(state->ucp[i][1]));
442 si_pm4_set_reg(pm4, R_0285C4_PA_CL_UCP_0_Z + i * 16,
443 fui(state->ucp[i][2]));
444 si_pm4_set_reg(pm4, R_0285C8_PA_CL_UCP_0_W + i * 16,
445 fui(state->ucp[i][3]));
446 }
447
448 cb.buffer = NULL;
449 cb.user_buffer = state->ucp;
450 cb.buffer_offset = 0;
451 cb.buffer_size = 4*4*8;
452 ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, SI_DRIVER_STATE_CONST_BUF, &cb);
453 pipe_resource_reference(&cb.buffer, NULL);
454
455 si_pm4_set_state(sctx, clip, pm4);
456 }
457
458 static void si_set_scissor_states(struct pipe_context *ctx,
459 unsigned start_slot,
460 unsigned num_scissors,
461 const struct pipe_scissor_state *state)
462 {
463 struct si_context *sctx = (struct si_context *)ctx;
464 struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor);
465 struct si_pm4_state *pm4 = &scissor->pm4;
466
467 if (scissor == NULL)
468 return;
469
470 scissor->scissor = *state;
471 si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL,
472 S_028250_TL_X(state->minx) | S_028250_TL_Y(state->miny) |
473 S_028250_WINDOW_OFFSET_DISABLE(1));
474 si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR,
475 S_028254_BR_X(state->maxx) | S_028254_BR_Y(state->maxy));
476
477 si_pm4_set_state(sctx, scissor, scissor);
478 }
479
480 static void si_set_viewport_states(struct pipe_context *ctx,
481 unsigned start_slot,
482 unsigned num_viewports,
483 const struct pipe_viewport_state *state)
484 {
485 struct si_context *sctx = (struct si_context *)ctx;
486 struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport);
487 struct si_pm4_state *pm4 = &viewport->pm4;
488
489 if (viewport == NULL)
490 return;
491
492 viewport->viewport = *state;
493 si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]));
494 si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]));
495 si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]));
496 si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]));
497 si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]));
498 si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]));
499
500 si_pm4_set_state(sctx, viewport, viewport);
501 }
502
503 /*
504 * inferred state between framebuffer and rasterizer
505 */
506 static void si_update_fb_rs_state(struct si_context *sctx)
507 {
508 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
509 struct si_pm4_state *pm4;
510 float offset_units;
511
512 if (!rs || !sctx->framebuffer.state.zsbuf)
513 return;
514
515 offset_units = sctx->queued.named.rasterizer->offset_units;
516 switch (sctx->framebuffer.state.zsbuf->texture->format) {
517 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
518 case PIPE_FORMAT_X8Z24_UNORM:
519 case PIPE_FORMAT_Z24X8_UNORM:
520 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
521 offset_units *= 2.0f;
522 break;
523 case PIPE_FORMAT_Z32_FLOAT:
524 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
525 offset_units *= 1.0f;
526 break;
527 case PIPE_FORMAT_Z16_UNORM:
528 offset_units *= 4.0f;
529 break;
530 default:
531 return;
532 }
533
534 pm4 = si_pm4_alloc_state(sctx);
535
536 if (pm4 == NULL)
537 return;
538
539 /* FIXME some of those reg can be computed with cso */
540 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
541 fui(sctx->queued.named.rasterizer->offset_scale));
542 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units));
543 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
544 fui(sctx->queued.named.rasterizer->offset_scale));
545 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units));
546
547 si_pm4_set_state(sctx, fb_rs, pm4);
548 }
549
550 /*
551 * Rasterizer
552 */
553
554 static uint32_t si_translate_fill(uint32_t func)
555 {
556 switch(func) {
557 case PIPE_POLYGON_MODE_FILL:
558 return V_028814_X_DRAW_TRIANGLES;
559 case PIPE_POLYGON_MODE_LINE:
560 return V_028814_X_DRAW_LINES;
561 case PIPE_POLYGON_MODE_POINT:
562 return V_028814_X_DRAW_POINTS;
563 default:
564 assert(0);
565 return V_028814_X_DRAW_POINTS;
566 }
567 }
568
569 static void *si_create_rs_state(struct pipe_context *ctx,
570 const struct pipe_rasterizer_state *state)
571 {
572 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
573 struct si_pm4_state *pm4 = &rs->pm4;
574 unsigned tmp;
575 unsigned prov_vtx = 1, polygon_dual_mode;
576 float psize_min, psize_max;
577
578 if (rs == NULL) {
579 return NULL;
580 }
581
582 rs->two_side = state->light_twoside;
583 rs->multisample_enable = state->multisample;
584 rs->clip_plane_enable = state->clip_plane_enable;
585 rs->line_stipple_enable = state->line_stipple_enable;
586
587 polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL ||
588 state->fill_back != PIPE_POLYGON_MODE_FILL);
589
590 if (state->flatshade_first)
591 prov_vtx = 0;
592
593 rs->flatshade = state->flatshade;
594 rs->sprite_coord_enable = state->sprite_coord_enable;
595 rs->pa_sc_line_stipple = state->line_stipple_enable ?
596 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
597 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
598 rs->pa_su_sc_mode_cntl =
599 S_028814_PROVOKING_VTX_LAST(prov_vtx) |
600 S_028814_CULL_FRONT(state->rasterizer_discard || (state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
601 S_028814_CULL_BACK(state->rasterizer_discard || (state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
602 S_028814_FACE(!state->front_ccw) |
603 S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
604 S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
605 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) |
606 S_028814_POLY_MODE(polygon_dual_mode) |
607 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
608 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back));
609 rs->pa_cl_clip_cntl =
610 S_028810_PS_UCP_MODE(3) |
611 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
612 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
613 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
614 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
615
616 /* offset */
617 rs->offset_units = state->offset_units;
618 rs->offset_scale = state->offset_scale * 12.0f;
619
620 tmp = S_0286D4_FLAT_SHADE_ENA(1);
621 if (state->sprite_coord_enable) {
622 tmp |= S_0286D4_PNT_SPRITE_ENA(1) |
623 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
624 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
625 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
626 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1);
627 if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
628 tmp |= S_0286D4_PNT_SPRITE_TOP_1(1);
629 }
630 }
631 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, tmp);
632
633 /* point size 12.4 fixed point */
634 tmp = (unsigned)(state->point_size * 8.0);
635 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
636
637 if (state->point_size_per_vertex) {
638 psize_min = util_get_min_point_size(state);
639 psize_max = 8192;
640 } else {
641 /* Force the point size to be as if the vertex output was disabled. */
642 psize_min = state->point_size;
643 psize_max = state->point_size;
644 }
645 /* Divide by two, because 0.5 = 1 pixel. */
646 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,
647 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) |
648 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2)));
649
650 tmp = (unsigned)state->line_width * 8;
651 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp));
652 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0,
653 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
654 S_028A48_MSAA_ENABLE(state->multisample) |
655 S_028A48_VPORT_SCISSOR_ENABLE(state->scissor));
656
657 si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL,
658 S_028BE4_PIX_CENTER(state->half_pixel_center) |
659 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
660
661 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));
662
663 return rs;
664 }
665
666 static void si_bind_rs_state(struct pipe_context *ctx, void *state)
667 {
668 struct si_context *sctx = (struct si_context *)ctx;
669 struct si_state_rasterizer *old_rs =
670 (struct si_state_rasterizer*)sctx->queued.named.rasterizer;
671 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
672
673 if (state == NULL)
674 return;
675
676 // TODO
677 sctx->sprite_coord_enable = rs->sprite_coord_enable;
678 sctx->pa_sc_line_stipple = rs->pa_sc_line_stipple;
679 sctx->pa_su_sc_mode_cntl = rs->pa_su_sc_mode_cntl;
680
681 if (sctx->framebuffer.nr_samples > 1 &&
682 (!old_rs || old_rs->multisample_enable != rs->multisample_enable))
683 sctx->db_render_state.dirty = true;
684
685 si_pm4_bind_state(sctx, rasterizer, rs);
686 si_update_fb_rs_state(sctx);
687 }
688
689 static void si_delete_rs_state(struct pipe_context *ctx, void *state)
690 {
691 struct si_context *sctx = (struct si_context *)ctx;
692 si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state);
693 }
694
695 /*
696 * infeered state between dsa and stencil ref
697 */
698 static void si_update_dsa_stencil_ref(struct si_context *sctx)
699 {
700 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
701 struct pipe_stencil_ref *ref = &sctx->stencil_ref;
702 struct si_state_dsa *dsa = sctx->queued.named.dsa;
703
704 if (pm4 == NULL)
705 return;
706
707 si_pm4_set_reg(pm4, R_028430_DB_STENCILREFMASK,
708 S_028430_STENCILTESTVAL(ref->ref_value[0]) |
709 S_028430_STENCILMASK(dsa->valuemask[0]) |
710 S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
711 S_028430_STENCILOPVAL(1));
712 si_pm4_set_reg(pm4, R_028434_DB_STENCILREFMASK_BF,
713 S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
714 S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
715 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
716 S_028434_STENCILOPVAL_BF(1));
717
718 si_pm4_set_state(sctx, dsa_stencil_ref, pm4);
719 }
720
721 static void si_set_pipe_stencil_ref(struct pipe_context *ctx,
722 const struct pipe_stencil_ref *state)
723 {
724 struct si_context *sctx = (struct si_context *)ctx;
725 sctx->stencil_ref = *state;
726 si_update_dsa_stencil_ref(sctx);
727 }
728
729
730 /*
731 * DSA
732 */
733
734 static uint32_t si_translate_stencil_op(int s_op)
735 {
736 switch (s_op) {
737 case PIPE_STENCIL_OP_KEEP:
738 return V_02842C_STENCIL_KEEP;
739 case PIPE_STENCIL_OP_ZERO:
740 return V_02842C_STENCIL_ZERO;
741 case PIPE_STENCIL_OP_REPLACE:
742 return V_02842C_STENCIL_REPLACE_TEST;
743 case PIPE_STENCIL_OP_INCR:
744 return V_02842C_STENCIL_ADD_CLAMP;
745 case PIPE_STENCIL_OP_DECR:
746 return V_02842C_STENCIL_SUB_CLAMP;
747 case PIPE_STENCIL_OP_INCR_WRAP:
748 return V_02842C_STENCIL_ADD_WRAP;
749 case PIPE_STENCIL_OP_DECR_WRAP:
750 return V_02842C_STENCIL_SUB_WRAP;
751 case PIPE_STENCIL_OP_INVERT:
752 return V_02842C_STENCIL_INVERT;
753 default:
754 R600_ERR("Unknown stencil op %d", s_op);
755 assert(0);
756 break;
757 }
758 return 0;
759 }
760
761 static void *si_create_dsa_state(struct pipe_context *ctx,
762 const struct pipe_depth_stencil_alpha_state *state)
763 {
764 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
765 struct si_pm4_state *pm4 = &dsa->pm4;
766 unsigned db_depth_control;
767 uint32_t db_stencil_control = 0;
768
769 if (dsa == NULL) {
770 return NULL;
771 }
772
773 dsa->valuemask[0] = state->stencil[0].valuemask;
774 dsa->valuemask[1] = state->stencil[1].valuemask;
775 dsa->writemask[0] = state->stencil[0].writemask;
776 dsa->writemask[1] = state->stencil[1].writemask;
777
778 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
779 S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
780 S_028800_ZFUNC(state->depth.func);
781
782 /* stencil */
783 if (state->stencil[0].enabled) {
784 db_depth_control |= S_028800_STENCIL_ENABLE(1);
785 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func);
786 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op));
787 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op));
788 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op));
789
790 if (state->stencil[1].enabled) {
791 db_depth_control |= S_028800_BACKFACE_ENABLE(1);
792 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func);
793 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op));
794 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op));
795 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op));
796 }
797 }
798
799 /* alpha */
800 if (state->alpha.enabled) {
801 dsa->alpha_func = state->alpha.func;
802 dsa->alpha_ref = state->alpha.ref_value;
803
804 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
805 SI_SGPR_ALPHA_REF * 4, fui(dsa->alpha_ref));
806 } else {
807 dsa->alpha_func = PIPE_FUNC_ALWAYS;
808 }
809
810 /* misc */
811 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
812 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
813
814 return dsa;
815 }
816
817 static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
818 {
819 struct si_context *sctx = (struct si_context *)ctx;
820 struct si_state_dsa *dsa = state;
821
822 if (state == NULL)
823 return;
824
825 si_pm4_bind_state(sctx, dsa, dsa);
826 si_update_dsa_stencil_ref(sctx);
827 }
828
829 static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
830 {
831 struct si_context *sctx = (struct si_context *)ctx;
832 si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
833 }
834
835 static void *si_create_db_flush_dsa(struct si_context *sctx)
836 {
837 struct pipe_depth_stencil_alpha_state dsa = {};
838
839 return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa);
840 }
841
842 /* DB RENDER STATE */
843
844 static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
845 {
846 struct si_context *sctx = (struct si_context*)ctx;
847
848 sctx->db_render_state.dirty = true;
849 }
850
851 static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
852 {
853 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
854 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
855 unsigned db_shader_control;
856
857 r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
858
859 /* DB_RENDER_CONTROL */
860 if (sctx->dbcb_depth_copy_enabled ||
861 sctx->dbcb_stencil_copy_enabled) {
862 radeon_emit(cs,
863 S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
864 S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
865 S_028000_COPY_CENTROID(1) |
866 S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
867 } else if (sctx->db_inplace_flush_enabled) {
868 radeon_emit(cs,
869 S_028000_DEPTH_COMPRESS_DISABLE(1) |
870 S_028000_STENCIL_COMPRESS_DISABLE(1));
871 } else if (sctx->db_depth_clear) {
872 radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1));
873 } else {
874 radeon_emit(cs, 0);
875 }
876
877 /* DB_COUNT_CONTROL (occlusion queries) */
878 if (sctx->b.num_occlusion_queries > 0) {
879 if (sctx->b.chip_class >= CIK) {
880 radeon_emit(cs,
881 S_028004_PERFECT_ZPASS_COUNTS(1) |
882 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
883 S_028004_ZPASS_ENABLE(1) |
884 S_028004_SLICE_EVEN_ENABLE(1) |
885 S_028004_SLICE_ODD_ENABLE(1));
886 } else {
887 radeon_emit(cs,
888 S_028004_PERFECT_ZPASS_COUNTS(1) |
889 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples));
890 }
891 } else {
892 /* Disable occlusion queries. */
893 if (sctx->b.chip_class >= CIK) {
894 radeon_emit(cs, 0);
895 } else {
896 radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1));
897 }
898 }
899
900 /* DB_RENDER_OVERRIDE2 */
901 if (sctx->db_depth_disable_expclear) {
902 r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
903 S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1));
904 } else {
905 r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0);
906 }
907
908 db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) |
909 S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) |
910 sctx->ps_db_shader_control;
911
912 /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
913 if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable))
914 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
915
916 r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
917 db_shader_control);
918 }
919
920 /*
921 * format translation
922 */
923 static uint32_t si_translate_colorformat(enum pipe_format format)
924 {
925 const struct util_format_description *desc = util_format_description(format);
926
927 #define HAS_SIZE(x,y,z,w) \
928 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
929 desc->channel[2].size == (z) && desc->channel[3].size == (w))
930
931 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
932 return V_028C70_COLOR_10_11_11;
933
934 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
935 return V_028C70_COLOR_INVALID;
936
937 switch (desc->nr_channels) {
938 case 1:
939 switch (desc->channel[0].size) {
940 case 8:
941 return V_028C70_COLOR_8;
942 case 16:
943 return V_028C70_COLOR_16;
944 case 32:
945 return V_028C70_COLOR_32;
946 }
947 break;
948 case 2:
949 if (desc->channel[0].size == desc->channel[1].size) {
950 switch (desc->channel[0].size) {
951 case 8:
952 return V_028C70_COLOR_8_8;
953 case 16:
954 return V_028C70_COLOR_16_16;
955 case 32:
956 return V_028C70_COLOR_32_32;
957 }
958 } else if (HAS_SIZE(8,24,0,0)) {
959 return V_028C70_COLOR_24_8;
960 } else if (HAS_SIZE(24,8,0,0)) {
961 return V_028C70_COLOR_8_24;
962 }
963 break;
964 case 3:
965 if (HAS_SIZE(5,6,5,0)) {
966 return V_028C70_COLOR_5_6_5;
967 } else if (HAS_SIZE(32,8,24,0)) {
968 return V_028C70_COLOR_X24_8_32_FLOAT;
969 }
970 break;
971 case 4:
972 if (desc->channel[0].size == desc->channel[1].size &&
973 desc->channel[0].size == desc->channel[2].size &&
974 desc->channel[0].size == desc->channel[3].size) {
975 switch (desc->channel[0].size) {
976 case 4:
977 return V_028C70_COLOR_4_4_4_4;
978 case 8:
979 return V_028C70_COLOR_8_8_8_8;
980 case 16:
981 return V_028C70_COLOR_16_16_16_16;
982 case 32:
983 return V_028C70_COLOR_32_32_32_32;
984 }
985 } else if (HAS_SIZE(5,5,5,1)) {
986 return V_028C70_COLOR_1_5_5_5;
987 } else if (HAS_SIZE(10,10,10,2)) {
988 return V_028C70_COLOR_2_10_10_10;
989 }
990 break;
991 }
992 return V_028C70_COLOR_INVALID;
993 }
994
995 static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
996 {
997 if (SI_BIG_ENDIAN) {
998 switch(colorformat) {
999 /* 8-bit buffers. */
1000 case V_028C70_COLOR_8:
1001 return V_028C70_ENDIAN_NONE;
1002
1003 /* 16-bit buffers. */
1004 case V_028C70_COLOR_5_6_5:
1005 case V_028C70_COLOR_1_5_5_5:
1006 case V_028C70_COLOR_4_4_4_4:
1007 case V_028C70_COLOR_16:
1008 case V_028C70_COLOR_8_8:
1009 return V_028C70_ENDIAN_8IN16;
1010
1011 /* 32-bit buffers. */
1012 case V_028C70_COLOR_8_8_8_8:
1013 case V_028C70_COLOR_2_10_10_10:
1014 case V_028C70_COLOR_8_24:
1015 case V_028C70_COLOR_24_8:
1016 case V_028C70_COLOR_16_16:
1017 return V_028C70_ENDIAN_8IN32;
1018
1019 /* 64-bit buffers. */
1020 case V_028C70_COLOR_16_16_16_16:
1021 return V_028C70_ENDIAN_8IN16;
1022
1023 case V_028C70_COLOR_32_32:
1024 return V_028C70_ENDIAN_8IN32;
1025
1026 /* 128-bit buffers. */
1027 case V_028C70_COLOR_32_32_32_32:
1028 return V_028C70_ENDIAN_8IN32;
1029 default:
1030 return V_028C70_ENDIAN_NONE; /* Unsupported. */
1031 }
1032 } else {
1033 return V_028C70_ENDIAN_NONE;
1034 }
1035 }
1036
1037 /* Returns the size in bits of the widest component of a CB format */
1038 static unsigned si_colorformat_max_comp_size(uint32_t colorformat)
1039 {
1040 switch(colorformat) {
1041 case V_028C70_COLOR_4_4_4_4:
1042 return 4;
1043
1044 case V_028C70_COLOR_1_5_5_5:
1045 case V_028C70_COLOR_5_5_5_1:
1046 return 5;
1047
1048 case V_028C70_COLOR_5_6_5:
1049 return 6;
1050
1051 case V_028C70_COLOR_8:
1052 case V_028C70_COLOR_8_8:
1053 case V_028C70_COLOR_8_8_8_8:
1054 return 8;
1055
1056 case V_028C70_COLOR_10_10_10_2:
1057 case V_028C70_COLOR_2_10_10_10:
1058 return 10;
1059
1060 case V_028C70_COLOR_10_11_11:
1061 case V_028C70_COLOR_11_11_10:
1062 return 11;
1063
1064 case V_028C70_COLOR_16:
1065 case V_028C70_COLOR_16_16:
1066 case V_028C70_COLOR_16_16_16_16:
1067 return 16;
1068
1069 case V_028C70_COLOR_8_24:
1070 case V_028C70_COLOR_24_8:
1071 return 24;
1072
1073 case V_028C70_COLOR_32:
1074 case V_028C70_COLOR_32_32:
1075 case V_028C70_COLOR_32_32_32_32:
1076 case V_028C70_COLOR_X24_8_32_FLOAT:
1077 return 32;
1078 }
1079
1080 assert(!"Unknown maximum component size");
1081 return 0;
1082 }
1083
1084 static uint32_t si_translate_dbformat(enum pipe_format format)
1085 {
1086 switch (format) {
1087 case PIPE_FORMAT_Z16_UNORM:
1088 return V_028040_Z_16;
1089 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1090 case PIPE_FORMAT_X8Z24_UNORM:
1091 case PIPE_FORMAT_Z24X8_UNORM:
1092 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1093 return V_028040_Z_24; /* deprecated on SI */
1094 case PIPE_FORMAT_Z32_FLOAT:
1095 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1096 return V_028040_Z_32_FLOAT;
1097 default:
1098 return V_028040_Z_INVALID;
1099 }
1100 }
1101
1102 /*
1103 * Texture translation
1104 */
1105
1106 static uint32_t si_translate_texformat(struct pipe_screen *screen,
1107 enum pipe_format format,
1108 const struct util_format_description *desc,
1109 int first_non_void)
1110 {
1111 struct si_screen *sscreen = (struct si_screen*)screen;
1112 bool enable_s3tc = sscreen->b.info.drm_minor >= 31;
1113 boolean uniform = TRUE;
1114 int i;
1115
1116 /* Colorspace (return non-RGB formats directly). */
1117 switch (desc->colorspace) {
1118 /* Depth stencil formats */
1119 case UTIL_FORMAT_COLORSPACE_ZS:
1120 switch (format) {
1121 case PIPE_FORMAT_Z16_UNORM:
1122 return V_008F14_IMG_DATA_FORMAT_16;
1123 case PIPE_FORMAT_X24S8_UINT:
1124 case PIPE_FORMAT_Z24X8_UNORM:
1125 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1126 return V_008F14_IMG_DATA_FORMAT_8_24;
1127 case PIPE_FORMAT_X8Z24_UNORM:
1128 case PIPE_FORMAT_S8X24_UINT:
1129 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1130 return V_008F14_IMG_DATA_FORMAT_24_8;
1131 case PIPE_FORMAT_S8_UINT:
1132 return V_008F14_IMG_DATA_FORMAT_8;
1133 case PIPE_FORMAT_Z32_FLOAT:
1134 return V_008F14_IMG_DATA_FORMAT_32;
1135 case PIPE_FORMAT_X32_S8X24_UINT:
1136 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1137 return V_008F14_IMG_DATA_FORMAT_X24_8_32;
1138 default:
1139 goto out_unknown;
1140 }
1141
1142 case UTIL_FORMAT_COLORSPACE_YUV:
1143 goto out_unknown; /* TODO */
1144
1145 case UTIL_FORMAT_COLORSPACE_SRGB:
1146 if (desc->nr_channels != 4 && desc->nr_channels != 1)
1147 goto out_unknown;
1148 break;
1149
1150 default:
1151 break;
1152 }
1153
1154 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
1155 if (!enable_s3tc)
1156 goto out_unknown;
1157
1158 switch (format) {
1159 case PIPE_FORMAT_RGTC1_SNORM:
1160 case PIPE_FORMAT_LATC1_SNORM:
1161 case PIPE_FORMAT_RGTC1_UNORM:
1162 case PIPE_FORMAT_LATC1_UNORM:
1163 return V_008F14_IMG_DATA_FORMAT_BC4;
1164 case PIPE_FORMAT_RGTC2_SNORM:
1165 case PIPE_FORMAT_LATC2_SNORM:
1166 case PIPE_FORMAT_RGTC2_UNORM:
1167 case PIPE_FORMAT_LATC2_UNORM:
1168 return V_008F14_IMG_DATA_FORMAT_BC5;
1169 default:
1170 goto out_unknown;
1171 }
1172 }
1173
1174 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
1175 if (!enable_s3tc)
1176 goto out_unknown;
1177
1178 switch (format) {
1179 case PIPE_FORMAT_BPTC_RGBA_UNORM:
1180 case PIPE_FORMAT_BPTC_SRGBA:
1181 return V_008F14_IMG_DATA_FORMAT_BC7;
1182 case PIPE_FORMAT_BPTC_RGB_FLOAT:
1183 case PIPE_FORMAT_BPTC_RGB_UFLOAT:
1184 return V_008F14_IMG_DATA_FORMAT_BC6;
1185 default:
1186 goto out_unknown;
1187 }
1188 }
1189
1190 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
1191 switch (format) {
1192 case PIPE_FORMAT_R8G8_B8G8_UNORM:
1193 case PIPE_FORMAT_G8R8_B8R8_UNORM:
1194 return V_008F14_IMG_DATA_FORMAT_GB_GR;
1195 case PIPE_FORMAT_G8R8_G8B8_UNORM:
1196 case PIPE_FORMAT_R8G8_R8B8_UNORM:
1197 return V_008F14_IMG_DATA_FORMAT_BG_RG;
1198 default:
1199 goto out_unknown;
1200 }
1201 }
1202
1203 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1204
1205 if (!enable_s3tc)
1206 goto out_unknown;
1207
1208 if (!util_format_s3tc_enabled) {
1209 goto out_unknown;
1210 }
1211
1212 switch (format) {
1213 case PIPE_FORMAT_DXT1_RGB:
1214 case PIPE_FORMAT_DXT1_RGBA:
1215 case PIPE_FORMAT_DXT1_SRGB:
1216 case PIPE_FORMAT_DXT1_SRGBA:
1217 return V_008F14_IMG_DATA_FORMAT_BC1;
1218 case PIPE_FORMAT_DXT3_RGBA:
1219 case PIPE_FORMAT_DXT3_SRGBA:
1220 return V_008F14_IMG_DATA_FORMAT_BC2;
1221 case PIPE_FORMAT_DXT5_RGBA:
1222 case PIPE_FORMAT_DXT5_SRGBA:
1223 return V_008F14_IMG_DATA_FORMAT_BC3;
1224 default:
1225 goto out_unknown;
1226 }
1227 }
1228
1229 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
1230 return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
1231 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
1232 return V_008F14_IMG_DATA_FORMAT_10_11_11;
1233 }
1234
1235 /* R8G8Bx_SNORM - TODO CxV8U8 */
1236
1237 /* See whether the components are of the same size. */
1238 for (i = 1; i < desc->nr_channels; i++) {
1239 uniform = uniform && desc->channel[0].size == desc->channel[i].size;
1240 }
1241
1242 /* Non-uniform formats. */
1243 if (!uniform) {
1244 switch(desc->nr_channels) {
1245 case 3:
1246 if (desc->channel[0].size == 5 &&
1247 desc->channel[1].size == 6 &&
1248 desc->channel[2].size == 5) {
1249 return V_008F14_IMG_DATA_FORMAT_5_6_5;
1250 }
1251 goto out_unknown;
1252 case 4:
1253 if (desc->channel[0].size == 5 &&
1254 desc->channel[1].size == 5 &&
1255 desc->channel[2].size == 5 &&
1256 desc->channel[3].size == 1) {
1257 return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
1258 }
1259 if (desc->channel[0].size == 10 &&
1260 desc->channel[1].size == 10 &&
1261 desc->channel[2].size == 10 &&
1262 desc->channel[3].size == 2) {
1263 return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
1264 }
1265 goto out_unknown;
1266 }
1267 goto out_unknown;
1268 }
1269
1270 if (first_non_void < 0 || first_non_void > 3)
1271 goto out_unknown;
1272
1273 /* uniform formats */
1274 switch (desc->channel[first_non_void].size) {
1275 case 4:
1276 switch (desc->nr_channels) {
1277 #if 0 /* Not supported for render targets */
1278 case 2:
1279 return V_008F14_IMG_DATA_FORMAT_4_4;
1280 #endif
1281 case 4:
1282 return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
1283 }
1284 break;
1285 case 8:
1286 switch (desc->nr_channels) {
1287 case 1:
1288 return V_008F14_IMG_DATA_FORMAT_8;
1289 case 2:
1290 return V_008F14_IMG_DATA_FORMAT_8_8;
1291 case 4:
1292 return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
1293 }
1294 break;
1295 case 16:
1296 switch (desc->nr_channels) {
1297 case 1:
1298 return V_008F14_IMG_DATA_FORMAT_16;
1299 case 2:
1300 return V_008F14_IMG_DATA_FORMAT_16_16;
1301 case 4:
1302 return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
1303 }
1304 break;
1305 case 32:
1306 switch (desc->nr_channels) {
1307 case 1:
1308 return V_008F14_IMG_DATA_FORMAT_32;
1309 case 2:
1310 return V_008F14_IMG_DATA_FORMAT_32_32;
1311 #if 0 /* Not supported for render targets */
1312 case 3:
1313 return V_008F14_IMG_DATA_FORMAT_32_32_32;
1314 #endif
1315 case 4:
1316 return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
1317 }
1318 }
1319
1320 out_unknown:
1321 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */
1322 return ~0;
1323 }
1324
1325 static unsigned si_tex_wrap(unsigned wrap)
1326 {
1327 switch (wrap) {
1328 default:
1329 case PIPE_TEX_WRAP_REPEAT:
1330 return V_008F30_SQ_TEX_WRAP;
1331 case PIPE_TEX_WRAP_CLAMP:
1332 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER;
1333 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1334 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
1335 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1336 return V_008F30_SQ_TEX_CLAMP_BORDER;
1337 case PIPE_TEX_WRAP_MIRROR_REPEAT:
1338 return V_008F30_SQ_TEX_MIRROR;
1339 case PIPE_TEX_WRAP_MIRROR_CLAMP:
1340 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER;
1341 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1342 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
1343 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1344 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER;
1345 }
1346 }
1347
1348 static unsigned si_tex_filter(unsigned filter)
1349 {
1350 switch (filter) {
1351 default:
1352 case PIPE_TEX_FILTER_NEAREST:
1353 return V_008F38_SQ_TEX_XY_FILTER_POINT;
1354 case PIPE_TEX_FILTER_LINEAR:
1355 return V_008F38_SQ_TEX_XY_FILTER_BILINEAR;
1356 }
1357 }
1358
1359 static unsigned si_tex_mipfilter(unsigned filter)
1360 {
1361 switch (filter) {
1362 case PIPE_TEX_MIPFILTER_NEAREST:
1363 return V_008F38_SQ_TEX_Z_FILTER_POINT;
1364 case PIPE_TEX_MIPFILTER_LINEAR:
1365 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
1366 default:
1367 case PIPE_TEX_MIPFILTER_NONE:
1368 return V_008F38_SQ_TEX_Z_FILTER_NONE;
1369 }
1370 }
1371
1372 static unsigned si_tex_compare(unsigned compare)
1373 {
1374 switch (compare) {
1375 default:
1376 case PIPE_FUNC_NEVER:
1377 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
1378 case PIPE_FUNC_LESS:
1379 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
1380 case PIPE_FUNC_EQUAL:
1381 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
1382 case PIPE_FUNC_LEQUAL:
1383 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
1384 case PIPE_FUNC_GREATER:
1385 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
1386 case PIPE_FUNC_NOTEQUAL:
1387 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
1388 case PIPE_FUNC_GEQUAL:
1389 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
1390 case PIPE_FUNC_ALWAYS:
1391 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
1392 }
1393 }
1394
1395 static unsigned si_tex_dim(unsigned dim, unsigned nr_samples)
1396 {
1397 switch (dim) {
1398 default:
1399 case PIPE_TEXTURE_1D:
1400 return V_008F1C_SQ_RSRC_IMG_1D;
1401 case PIPE_TEXTURE_1D_ARRAY:
1402 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;
1403 case PIPE_TEXTURE_2D:
1404 case PIPE_TEXTURE_RECT:
1405 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA :
1406 V_008F1C_SQ_RSRC_IMG_2D;
1407 case PIPE_TEXTURE_2D_ARRAY:
1408 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY :
1409 V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
1410 case PIPE_TEXTURE_3D:
1411 return V_008F1C_SQ_RSRC_IMG_3D;
1412 case PIPE_TEXTURE_CUBE:
1413 case PIPE_TEXTURE_CUBE_ARRAY:
1414 return V_008F1C_SQ_RSRC_IMG_CUBE;
1415 }
1416 }
1417
1418 /*
1419 * Format support testing
1420 */
1421
1422 static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
1423 {
1424 return si_translate_texformat(screen, format, util_format_description(format),
1425 util_format_get_first_non_void_channel(format)) != ~0U;
1426 }
1427
1428 static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
1429 const struct util_format_description *desc,
1430 int first_non_void)
1431 {
1432 unsigned type = desc->channel[first_non_void].type;
1433 int i;
1434
1435 if (type == UTIL_FORMAT_TYPE_FIXED)
1436 return V_008F0C_BUF_DATA_FORMAT_INVALID;
1437
1438 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1439 return V_008F0C_BUF_DATA_FORMAT_10_11_11;
1440
1441 if (desc->nr_channels == 4 &&
1442 desc->channel[0].size == 10 &&
1443 desc->channel[1].size == 10 &&
1444 desc->channel[2].size == 10 &&
1445 desc->channel[3].size == 2)
1446 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
1447
1448 /* See whether the components are of the same size. */
1449 for (i = 0; i < desc->nr_channels; i++) {
1450 if (desc->channel[first_non_void].size != desc->channel[i].size)
1451 return V_008F0C_BUF_DATA_FORMAT_INVALID;
1452 }
1453
1454 switch (desc->channel[first_non_void].size) {
1455 case 8:
1456 switch (desc->nr_channels) {
1457 case 1:
1458 return V_008F0C_BUF_DATA_FORMAT_8;
1459 case 2:
1460 return V_008F0C_BUF_DATA_FORMAT_8_8;
1461 case 3:
1462 case 4:
1463 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
1464 }
1465 break;
1466 case 16:
1467 switch (desc->nr_channels) {
1468 case 1:
1469 return V_008F0C_BUF_DATA_FORMAT_16;
1470 case 2:
1471 return V_008F0C_BUF_DATA_FORMAT_16_16;
1472 case 3:
1473 case 4:
1474 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
1475 }
1476 break;
1477 case 32:
1478 /* From the Southern Islands ISA documentation about MTBUF:
1479 * 'Memory reads of data in memory that is 32 or 64 bits do not
1480 * undergo any format conversion.'
1481 */
1482 if (type != UTIL_FORMAT_TYPE_FLOAT &&
1483 !desc->channel[first_non_void].pure_integer)
1484 return V_008F0C_BUF_DATA_FORMAT_INVALID;
1485
1486 switch (desc->nr_channels) {
1487 case 1:
1488 return V_008F0C_BUF_DATA_FORMAT_32;
1489 case 2:
1490 return V_008F0C_BUF_DATA_FORMAT_32_32;
1491 case 3:
1492 return V_008F0C_BUF_DATA_FORMAT_32_32_32;
1493 case 4:
1494 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
1495 }
1496 break;
1497 }
1498
1499 return V_008F0C_BUF_DATA_FORMAT_INVALID;
1500 }
1501
1502 static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen,
1503 const struct util_format_description *desc,
1504 int first_non_void)
1505 {
1506 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1507 return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1508
1509 switch (desc->channel[first_non_void].type) {
1510 case UTIL_FORMAT_TYPE_SIGNED:
1511 if (desc->channel[first_non_void].normalized)
1512 return V_008F0C_BUF_NUM_FORMAT_SNORM;
1513 else if (desc->channel[first_non_void].pure_integer)
1514 return V_008F0C_BUF_NUM_FORMAT_SINT;
1515 else
1516 return V_008F0C_BUF_NUM_FORMAT_SSCALED;
1517 break;
1518 case UTIL_FORMAT_TYPE_UNSIGNED:
1519 if (desc->channel[first_non_void].normalized)
1520 return V_008F0C_BUF_NUM_FORMAT_UNORM;
1521 else if (desc->channel[first_non_void].pure_integer)
1522 return V_008F0C_BUF_NUM_FORMAT_UINT;
1523 else
1524 return V_008F0C_BUF_NUM_FORMAT_USCALED;
1525 break;
1526 case UTIL_FORMAT_TYPE_FLOAT:
1527 default:
1528 return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1529 }
1530 }
1531
1532 static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format)
1533 {
1534 const struct util_format_description *desc;
1535 int first_non_void;
1536 unsigned data_format;
1537
1538 desc = util_format_description(format);
1539 first_non_void = util_format_get_first_non_void_channel(format);
1540 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void);
1541 return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID;
1542 }
1543
1544 static bool si_is_colorbuffer_format_supported(enum pipe_format format)
1545 {
1546 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
1547 r600_translate_colorswap(format) != ~0U;
1548 }
1549
1550 static bool si_is_zs_format_supported(enum pipe_format format)
1551 {
1552 return si_translate_dbformat(format) != V_028040_Z_INVALID;
1553 }
1554
1555 boolean si_is_format_supported(struct pipe_screen *screen,
1556 enum pipe_format format,
1557 enum pipe_texture_target target,
1558 unsigned sample_count,
1559 unsigned usage)
1560 {
1561 struct si_screen *sscreen = (struct si_screen *)screen;
1562 unsigned retval = 0;
1563
1564 if (target >= PIPE_MAX_TEXTURE_TYPES) {
1565 R600_ERR("r600: unsupported texture type %d\n", target);
1566 return FALSE;
1567 }
1568
1569 if (!util_format_is_supported(format, usage))
1570 return FALSE;
1571
1572 if (sample_count > 1) {
1573 /* 2D tiling on CIK is supported since DRM 2.35.0 */
1574 if (sscreen->b.chip_class >= CIK && sscreen->b.info.drm_minor < 35)
1575 return FALSE;
1576
1577 switch (sample_count) {
1578 case 2:
1579 case 4:
1580 case 8:
1581 break;
1582 default:
1583 return FALSE;
1584 }
1585 }
1586
1587 if (usage & PIPE_BIND_SAMPLER_VIEW) {
1588 if (target == PIPE_BUFFER) {
1589 if (si_is_vertex_format_supported(screen, format))
1590 retval |= PIPE_BIND_SAMPLER_VIEW;
1591 } else {
1592 if (si_is_sampler_format_supported(screen, format))
1593 retval |= PIPE_BIND_SAMPLER_VIEW;
1594 }
1595 }
1596
1597 if ((usage & (PIPE_BIND_RENDER_TARGET |
1598 PIPE_BIND_DISPLAY_TARGET |
1599 PIPE_BIND_SCANOUT |
1600 PIPE_BIND_SHARED |
1601 PIPE_BIND_BLENDABLE)) &&
1602 si_is_colorbuffer_format_supported(format)) {
1603 retval |= usage &
1604 (PIPE_BIND_RENDER_TARGET |
1605 PIPE_BIND_DISPLAY_TARGET |
1606 PIPE_BIND_SCANOUT |
1607 PIPE_BIND_SHARED);
1608 if (!util_format_is_pure_integer(format) &&
1609 !util_format_is_depth_or_stencil(format))
1610 retval |= usage & PIPE_BIND_BLENDABLE;
1611 }
1612
1613 if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
1614 si_is_zs_format_supported(format)) {
1615 retval |= PIPE_BIND_DEPTH_STENCIL;
1616 }
1617
1618 if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
1619 si_is_vertex_format_supported(screen, format)) {
1620 retval |= PIPE_BIND_VERTEX_BUFFER;
1621 }
1622
1623 if (usage & PIPE_BIND_TRANSFER_READ)
1624 retval |= PIPE_BIND_TRANSFER_READ;
1625 if (usage & PIPE_BIND_TRANSFER_WRITE)
1626 retval |= PIPE_BIND_TRANSFER_WRITE;
1627
1628 return retval == usage;
1629 }
1630
1631 unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil)
1632 {
1633 unsigned tile_mode_index = 0;
1634
1635 if (stencil) {
1636 tile_mode_index = rtex->surface.stencil_tiling_index[level];
1637 } else {
1638 tile_mode_index = rtex->surface.tiling_index[level];
1639 }
1640 return tile_mode_index;
1641 }
1642
1643 /*
1644 * framebuffer handling
1645 */
1646
1647 static void si_initialize_color_surface(struct si_context *sctx,
1648 struct r600_surface *surf)
1649 {
1650 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
1651 unsigned level = surf->base.u.tex.level;
1652 uint64_t offset = rtex->surface.level[level].offset;
1653 unsigned pitch, slice;
1654 unsigned color_info, color_attrib, color_pitch, color_view;
1655 unsigned tile_mode_index;
1656 unsigned format, swap, ntype, endian;
1657 const struct util_format_description *desc;
1658 int i;
1659 unsigned blend_clamp = 0, blend_bypass = 0;
1660 unsigned max_comp_size;
1661
1662 /* Layered rendering doesn't work with LINEAR_GENERAL.
1663 * (LINEAR_ALIGNED and others work) */
1664 if (rtex->surface.level[level].mode == RADEON_SURF_MODE_LINEAR) {
1665 assert(surf->base.u.tex.first_layer == surf->base.u.tex.last_layer);
1666 offset += rtex->surface.level[level].slice_size *
1667 surf->base.u.tex.first_layer;
1668 color_view = 0;
1669 } else {
1670 color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
1671 S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer);
1672 }
1673
1674 pitch = (rtex->surface.level[level].nblk_x) / 8 - 1;
1675 slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64;
1676 if (slice) {
1677 slice = slice - 1;
1678 }
1679
1680 tile_mode_index = si_tile_mode_index(rtex, level, false);
1681
1682 desc = util_format_description(surf->base.format);
1683 for (i = 0; i < 4; i++) {
1684 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
1685 break;
1686 }
1687 }
1688 if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
1689 ntype = V_028C70_NUMBER_FLOAT;
1690 } else {
1691 ntype = V_028C70_NUMBER_UNORM;
1692 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
1693 ntype = V_028C70_NUMBER_SRGB;
1694 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
1695 if (desc->channel[i].pure_integer) {
1696 ntype = V_028C70_NUMBER_SINT;
1697 } else {
1698 assert(desc->channel[i].normalized);
1699 ntype = V_028C70_NUMBER_SNORM;
1700 }
1701 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1702 if (desc->channel[i].pure_integer) {
1703 ntype = V_028C70_NUMBER_UINT;
1704 } else {
1705 assert(desc->channel[i].normalized);
1706 ntype = V_028C70_NUMBER_UNORM;
1707 }
1708 }
1709 }
1710
1711 format = si_translate_colorformat(surf->base.format);
1712 if (format == V_028C70_COLOR_INVALID) {
1713 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
1714 }
1715 assert(format != V_028C70_COLOR_INVALID);
1716 swap = r600_translate_colorswap(surf->base.format);
1717 if (rtex->resource.b.b.usage == PIPE_USAGE_STAGING) {
1718 endian = V_028C70_ENDIAN_NONE;
1719 } else {
1720 endian = si_colorformat_endian_swap(format);
1721 }
1722
1723 /* blend clamp should be set for all NORM/SRGB types */
1724 if (ntype == V_028C70_NUMBER_UNORM ||
1725 ntype == V_028C70_NUMBER_SNORM ||
1726 ntype == V_028C70_NUMBER_SRGB)
1727 blend_clamp = 1;
1728
1729 /* set blend bypass according to docs if SINT/UINT or
1730 8/24 COLOR variants */
1731 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
1732 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
1733 format == V_028C70_COLOR_X24_8_32_FLOAT) {
1734 blend_clamp = 0;
1735 blend_bypass = 1;
1736 }
1737
1738 color_info = S_028C70_FORMAT(format) |
1739 S_028C70_COMP_SWAP(swap) |
1740 S_028C70_BLEND_CLAMP(blend_clamp) |
1741 S_028C70_BLEND_BYPASS(blend_bypass) |
1742 S_028C70_NUMBER_TYPE(ntype) |
1743 S_028C70_ENDIAN(endian);
1744
1745 color_pitch = S_028C64_TILE_MAX(pitch);
1746
1747 color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) |
1748 S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1);
1749
1750 if (rtex->resource.b.b.nr_samples > 1) {
1751 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
1752
1753 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
1754 S_028C74_NUM_FRAGMENTS(log_samples);
1755
1756 if (rtex->fmask.size) {
1757 color_info |= S_028C70_COMPRESSION(1);
1758 unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height);
1759
1760 color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index);
1761
1762 if (sctx->b.chip_class == SI) {
1763 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */
1764 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
1765 }
1766 if (sctx->b.chip_class >= CIK) {
1767 color_pitch |= S_028C64_FMASK_TILE_MAX(rtex->fmask.pitch / 8 - 1);
1768 }
1769 }
1770 }
1771
1772 offset += rtex->resource.gpu_address;
1773
1774 surf->cb_color_base = offset >> 8;
1775 surf->cb_color_pitch = color_pitch;
1776 surf->cb_color_slice = S_028C68_TILE_MAX(slice);
1777 surf->cb_color_view = color_view;
1778 surf->cb_color_info = color_info;
1779 surf->cb_color_attrib = color_attrib;
1780
1781 if (rtex->fmask.size) {
1782 surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8;
1783 surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
1784 } else {
1785 /* This must be set for fast clear to work without FMASK. */
1786 surf->cb_color_fmask = surf->cb_color_base;
1787 surf->cb_color_fmask_slice = surf->cb_color_slice;
1788 surf->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
1789
1790 if (sctx->b.chip_class == SI) {
1791 unsigned bankh = util_logbase2(rtex->surface.bankh);
1792 surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
1793 }
1794
1795 if (sctx->b.chip_class >= CIK) {
1796 surf->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch);
1797 }
1798 }
1799
1800 /* Determine pixel shader export format */
1801 max_comp_size = si_colorformat_max_comp_size(format);
1802 if (ntype == V_028C70_NUMBER_SRGB ||
1803 ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
1804 max_comp_size <= 10) ||
1805 (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) {
1806 surf->export_16bpc = true;
1807 }
1808
1809 surf->color_initialized = true;
1810 }
1811
1812 static void si_init_depth_surface(struct si_context *sctx,
1813 struct r600_surface *surf)
1814 {
1815 struct si_screen *sscreen = sctx->screen;
1816 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
1817 unsigned level = surf->base.u.tex.level;
1818 struct radeon_surface_level *levelinfo = &rtex->surface.level[level];
1819 unsigned format, tile_mode_index, array_mode;
1820 unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config;
1821 uint32_t z_info, s_info, db_depth_info;
1822 uint64_t z_offs, s_offs;
1823 uint32_t db_htile_data_base, db_htile_surface, pa_su_poly_offset_db_fmt_cntl = 0;
1824
1825 switch (sctx->framebuffer.state.zsbuf->texture->format) {
1826 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1827 case PIPE_FORMAT_X8Z24_UNORM:
1828 case PIPE_FORMAT_Z24X8_UNORM:
1829 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1830 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
1831 break;
1832 case PIPE_FORMAT_Z32_FLOAT:
1833 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1834 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
1835 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
1836 break;
1837 case PIPE_FORMAT_Z16_UNORM:
1838 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
1839 break;
1840 default:
1841 assert(0);
1842 }
1843
1844 format = si_translate_dbformat(rtex->resource.b.b.format);
1845
1846 if (format == V_028040_Z_INVALID) {
1847 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
1848 }
1849 assert(format != V_028040_Z_INVALID);
1850
1851 s_offs = z_offs = rtex->resource.gpu_address;
1852 z_offs += rtex->surface.level[level].offset;
1853 s_offs += rtex->surface.stencil_level[level].offset;
1854
1855 db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
1856
1857 z_info = S_028040_FORMAT(format);
1858 if (rtex->resource.b.b.nr_samples > 1) {
1859 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
1860 }
1861
1862 if (rtex->surface.flags & RADEON_SURF_SBUFFER)
1863 s_info = S_028044_FORMAT(V_028044_STENCIL_8);
1864 else
1865 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
1866
1867 if (sctx->b.chip_class >= CIK) {
1868 switch (rtex->surface.level[level].mode) {
1869 case RADEON_SURF_MODE_2D:
1870 array_mode = V_02803C_ARRAY_2D_TILED_THIN1;
1871 break;
1872 case RADEON_SURF_MODE_1D:
1873 case RADEON_SURF_MODE_LINEAR_ALIGNED:
1874 case RADEON_SURF_MODE_LINEAR:
1875 default:
1876 array_mode = V_02803C_ARRAY_1D_TILED_THIN1;
1877 break;
1878 }
1879 tile_split = rtex->surface.tile_split;
1880 stile_split = rtex->surface.stencil_tile_split;
1881 macro_aspect = rtex->surface.mtilea;
1882 bankw = rtex->surface.bankw;
1883 bankh = rtex->surface.bankh;
1884 tile_split = cik_tile_split(tile_split);
1885 stile_split = cik_tile_split(stile_split);
1886 macro_aspect = cik_macro_tile_aspect(macro_aspect);
1887 bankw = cik_bank_wh(bankw);
1888 bankh = cik_bank_wh(bankh);
1889 nbanks = si_num_banks(sscreen, rtex);
1890 tile_mode_index = si_tile_mode_index(rtex, level, false);
1891 pipe_config = cik_db_pipe_config(sscreen, tile_mode_index);
1892
1893 db_depth_info |= S_02803C_ARRAY_MODE(array_mode) |
1894 S_02803C_PIPE_CONFIG(pipe_config) |
1895 S_02803C_BANK_WIDTH(bankw) |
1896 S_02803C_BANK_HEIGHT(bankh) |
1897 S_02803C_MACRO_TILE_ASPECT(macro_aspect) |
1898 S_02803C_NUM_BANKS(nbanks);
1899 z_info |= S_028040_TILE_SPLIT(tile_split);
1900 s_info |= S_028044_TILE_SPLIT(stile_split);
1901 } else {
1902 tile_mode_index = si_tile_mode_index(rtex, level, false);
1903 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
1904 tile_mode_index = si_tile_mode_index(rtex, level, true);
1905 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
1906 }
1907
1908 /* HiZ aka depth buffer htile */
1909 /* use htile only for first level */
1910 if (rtex->htile_buffer && !level) {
1911 z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
1912 S_028040_ALLOW_EXPCLEAR(1);
1913
1914 /* This is optimal for the clear value of 1.0 and using
1915 * the LESS and LEQUAL test functions. Set this to 0
1916 * for the opposite case. This can only be changed when
1917 * clearing. */
1918 z_info |= S_028040_ZRANGE_PRECISION(1);
1919
1920 /* Use all of the htile_buffer for depth, because we don't
1921 * use HTILE for stencil because of FAST_STENCIL_DISABLE. */
1922 s_info |= S_028044_TILE_STENCIL_DISABLE(1);
1923
1924 uint64_t va = rtex->htile_buffer->gpu_address;
1925 db_htile_data_base = va >> 8;
1926 db_htile_surface = S_028ABC_FULL_CACHE(1);
1927 } else {
1928 db_htile_data_base = 0;
1929 db_htile_surface = 0;
1930 }
1931
1932 assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
1933
1934 surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
1935 S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
1936 surf->db_htile_data_base = db_htile_data_base;
1937 surf->db_depth_info = db_depth_info;
1938 surf->db_z_info = z_info;
1939 surf->db_stencil_info = s_info;
1940 surf->db_depth_base = z_offs >> 8;
1941 surf->db_stencil_base = s_offs >> 8;
1942 surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |
1943 S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);
1944 surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x *
1945 levelinfo->nblk_y) / 64 - 1);
1946 surf->db_htile_surface = db_htile_surface;
1947 surf->pa_su_poly_offset_db_fmt_cntl = pa_su_poly_offset_db_fmt_cntl;
1948
1949 surf->depth_initialized = true;
1950 }
1951
1952 static void si_set_framebuffer_state(struct pipe_context *ctx,
1953 const struct pipe_framebuffer_state *state)
1954 {
1955 struct si_context *sctx = (struct si_context *)ctx;
1956 struct pipe_constant_buffer constbuf = {0};
1957 struct r600_surface *surf = NULL;
1958 struct r600_texture *rtex;
1959 bool old_cb0_is_integer = sctx->framebuffer.cb0_is_integer;
1960 unsigned old_nr_samples = sctx->framebuffer.nr_samples;
1961 int i;
1962
1963 if (sctx->framebuffer.state.nr_cbufs) {
1964 sctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB |
1965 R600_CONTEXT_FLUSH_AND_INV_CB_META;
1966 }
1967 if (sctx->framebuffer.state.zsbuf) {
1968 sctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_DB |
1969 R600_CONTEXT_FLUSH_AND_INV_DB_META;
1970 }
1971
1972 util_copy_framebuffer_state(&sctx->framebuffer.state, state);
1973
1974 sctx->framebuffer.export_16bpc = 0;
1975 sctx->framebuffer.compressed_cb_mask = 0;
1976 sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
1977 sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
1978 sctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] &&
1979 util_format_is_pure_integer(state->cbufs[0]->format);
1980
1981 if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer)
1982 sctx->db_render_state.dirty = true;
1983
1984 for (i = 0; i < state->nr_cbufs; i++) {
1985 if (!state->cbufs[i])
1986 continue;
1987
1988 surf = (struct r600_surface*)state->cbufs[i];
1989 rtex = (struct r600_texture*)surf->base.texture;
1990
1991 if (!surf->color_initialized) {
1992 si_initialize_color_surface(sctx, surf);
1993 }
1994
1995 if (surf->export_16bpc) {
1996 sctx->framebuffer.export_16bpc |= 1 << i;
1997 }
1998
1999 if (rtex->fmask.size && rtex->cmask.size) {
2000 sctx->framebuffer.compressed_cb_mask |= 1 << i;
2001 }
2002 }
2003 /* Set the 16BPC export for possible dual-src blending. */
2004 if (i == 1 && surf && surf->export_16bpc) {
2005 sctx->framebuffer.export_16bpc |= 1 << 1;
2006 }
2007
2008 assert(!(sctx->framebuffer.export_16bpc & ~0xff));
2009
2010 if (state->zsbuf) {
2011 surf = (struct r600_surface*)state->zsbuf;
2012
2013 if (!surf->depth_initialized) {
2014 si_init_depth_surface(sctx, surf);
2015 }
2016 }
2017
2018 si_update_fb_rs_state(sctx);
2019 si_update_fb_blend_state(sctx);
2020
2021 sctx->framebuffer.atom.num_dw = state->nr_cbufs*15 + (8 - state->nr_cbufs)*3;
2022 sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4;
2023 sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */
2024 sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */
2025 sctx->framebuffer.atom.dirty = true;
2026
2027 if (sctx->framebuffer.nr_samples != old_nr_samples) {
2028 sctx->msaa_config.dirty = true;
2029 sctx->db_render_state.dirty = true;
2030
2031 /* Set sample locations as fragment shader constants. */
2032 switch (sctx->framebuffer.nr_samples) {
2033 case 1:
2034 constbuf.user_buffer = sctx->b.sample_locations_1x;
2035 break;
2036 case 2:
2037 constbuf.user_buffer = sctx->b.sample_locations_2x;
2038 break;
2039 case 4:
2040 constbuf.user_buffer = sctx->b.sample_locations_4x;
2041 break;
2042 case 8:
2043 constbuf.user_buffer = sctx->b.sample_locations_8x;
2044 break;
2045 case 16:
2046 constbuf.user_buffer = sctx->b.sample_locations_16x;
2047 break;
2048 default:
2049 assert(0);
2050 }
2051 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
2052 ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT,
2053 SI_DRIVER_STATE_CONST_BUF, &constbuf);
2054 }
2055 }
2056
2057 static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
2058 {
2059 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
2060 struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
2061 unsigned i, nr_cbufs = state->nr_cbufs;
2062 struct r600_texture *tex = NULL;
2063 struct r600_surface *cb = NULL;
2064
2065 /* Colorbuffers. */
2066 for (i = 0; i < nr_cbufs; i++) {
2067 cb = (struct r600_surface*)state->cbufs[i];
2068 if (!cb) {
2069 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
2070 S_028C70_FORMAT(V_028C70_COLOR_INVALID));
2071 continue;
2072 }
2073
2074 tex = (struct r600_texture *)cb->base.texture;
2075 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
2076 &tex->resource, RADEON_USAGE_READWRITE,
2077 tex->surface.nsamples > 1 ?
2078 RADEON_PRIO_COLOR_BUFFER_MSAA :
2079 RADEON_PRIO_COLOR_BUFFER);
2080
2081 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
2082 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
2083 tex->cmask_buffer, RADEON_USAGE_READWRITE,
2084 RADEON_PRIO_COLOR_META);
2085 }
2086
2087 r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13);
2088 radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */
2089 radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */
2090 radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */
2091 radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */
2092 radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */
2093 radeon_emit(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */
2094 radeon_emit(cs, 0); /* R_028C78 unused */
2095 radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */
2096 radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */
2097 radeon_emit(cs, cb->cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */
2098 radeon_emit(cs, cb->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */
2099 radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */
2100 radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */
2101 }
2102 /* set CB_COLOR1_INFO for possible dual-src blending */
2103 if (i == 1 && state->cbufs[0]) {
2104 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
2105 cb->cb_color_info | tex->cb_color_info);
2106 i++;
2107 }
2108 for (; i < 8 ; i++) {
2109 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
2110 }
2111
2112 /* ZS buffer. */
2113 if (state->zsbuf) {
2114 struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
2115 struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
2116
2117 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
2118 &rtex->resource, RADEON_USAGE_READWRITE,
2119 zb->base.texture->nr_samples > 1 ?
2120 RADEON_PRIO_DEPTH_BUFFER_MSAA :
2121 RADEON_PRIO_DEPTH_BUFFER);
2122
2123 if (zb->db_htile_data_base) {
2124 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
2125 rtex->htile_buffer, RADEON_USAGE_READWRITE,
2126 RADEON_PRIO_DEPTH_META);
2127 }
2128
2129 r600_write_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
2130 r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
2131
2132 r600_write_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
2133 radeon_emit(cs, zb->db_depth_info); /* R_02803C_DB_DEPTH_INFO */
2134 radeon_emit(cs, zb->db_z_info); /* R_028040_DB_Z_INFO */
2135 radeon_emit(cs, zb->db_stencil_info); /* R_028044_DB_STENCIL_INFO */
2136 radeon_emit(cs, zb->db_depth_base); /* R_028048_DB_Z_READ_BASE */
2137 radeon_emit(cs, zb->db_stencil_base); /* R_02804C_DB_STENCIL_READ_BASE */
2138 radeon_emit(cs, zb->db_depth_base); /* R_028050_DB_Z_WRITE_BASE */
2139 radeon_emit(cs, zb->db_stencil_base); /* R_028054_DB_STENCIL_WRITE_BASE */
2140 radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */
2141 radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */
2142
2143 r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
2144 r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
2145 r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
2146 zb->pa_su_poly_offset_db_fmt_cntl);
2147 } else {
2148 r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
2149 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
2150 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
2151 }
2152
2153 /* Framebuffer dimensions. */
2154 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
2155 r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
2156 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
2157
2158 cayman_emit_msaa_sample_locs(cs, sctx->framebuffer.nr_samples);
2159 }
2160
2161 static void si_emit_msaa_config(struct r600_common_context *rctx, struct r600_atom *atom)
2162 {
2163 struct si_context *sctx = (struct si_context *)rctx;
2164 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
2165
2166 cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
2167 sctx->ps_iter_samples);
2168 }
2169
2170 const struct r600_atom si_atom_msaa_config = { si_emit_msaa_config, 10 }; /* number of CS dwords */
2171
2172 static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
2173 {
2174 struct si_context *sctx = (struct si_context *)ctx;
2175
2176 if (sctx->ps_iter_samples == min_samples)
2177 return;
2178
2179 sctx->ps_iter_samples = min_samples;
2180
2181 if (sctx->framebuffer.nr_samples > 1)
2182 sctx->msaa_config.dirty = true;
2183 }
2184
2185 /*
2186 * shaders
2187 */
2188
2189 /* Compute the key for the hw shader variant */
2190 static INLINE void si_shader_selector_key(struct pipe_context *ctx,
2191 struct si_pipe_shader_selector *sel,
2192 union si_shader_key *key)
2193 {
2194 struct si_context *sctx = (struct si_context *)ctx;
2195 memset(key, 0, sizeof(*key));
2196
2197 if ((sel->type == PIPE_SHADER_VERTEX || sel->type == PIPE_SHADER_GEOMETRY) &&
2198 sctx->queued.named.rasterizer) {
2199 if (sctx->queued.named.rasterizer->clip_plane_enable & 0xf0)
2200 key->vs.ucps_enabled |= 0x2;
2201 if (sctx->queued.named.rasterizer->clip_plane_enable & 0xf)
2202 key->vs.ucps_enabled |= 0x1;
2203 }
2204
2205 if (sel->type == PIPE_SHADER_VERTEX) {
2206 unsigned i;
2207 if (!sctx->vertex_elements)
2208 return;
2209
2210 for (i = 0; i < sctx->vertex_elements->count; ++i)
2211 key->vs.instance_divisors[i] = sctx->vertex_elements->elements[i].instance_divisor;
2212
2213 key->vs.as_es = sctx->gs_shader != NULL;
2214 } else if (sel->type == PIPE_SHADER_FRAGMENT) {
2215 if (sel->fs_write_all)
2216 key->ps.nr_cbufs = sctx->framebuffer.state.nr_cbufs;
2217 key->ps.export_16bpc = sctx->framebuffer.export_16bpc;
2218
2219 if (sctx->queued.named.rasterizer) {
2220 key->ps.color_two_side = sctx->queued.named.rasterizer->two_side;
2221 key->ps.flatshade = sctx->queued.named.rasterizer->flatshade;
2222 key->ps.interp_at_sample = sctx->framebuffer.nr_samples > 1 &&
2223 sctx->ps_iter_samples == sctx->framebuffer.nr_samples;
2224
2225 if (sctx->queued.named.blend) {
2226 key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&
2227 sctx->queued.named.rasterizer->multisample_enable &&
2228 !sctx->framebuffer.cb0_is_integer;
2229 }
2230 }
2231 if (sctx->queued.named.dsa) {
2232 key->ps.alpha_func = sctx->queued.named.dsa->alpha_func;
2233
2234 /* Alpha-test should be disabled if colorbuffer 0 is integer. */
2235 if (sctx->framebuffer.cb0_is_integer)
2236 key->ps.alpha_func = PIPE_FUNC_ALWAYS;
2237 } else {
2238 key->ps.alpha_func = PIPE_FUNC_ALWAYS;
2239 }
2240 }
2241 }
2242
2243 /* Select the hw shader variant depending on the current state. */
2244 int si_shader_select(struct pipe_context *ctx,
2245 struct si_pipe_shader_selector *sel)
2246 {
2247 union si_shader_key key;
2248 struct si_shader * shader = NULL;
2249 int r;
2250
2251 si_shader_selector_key(ctx, sel, &key);
2252
2253 /* Check if we don't need to change anything.
2254 * This path is also used for most shaders that don't need multiple
2255 * variants, it will cost just a computation of the key and this
2256 * test. */
2257 if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) {
2258 return 0;
2259 }
2260
2261 /* lookup if we have other variants in the list */
2262 if (sel->num_shaders > 1) {
2263 struct si_shader *p = sel->current, *c = p->next_variant;
2264
2265 while (c && memcmp(&c->key, &key, sizeof(key)) != 0) {
2266 p = c;
2267 c = c->next_variant;
2268 }
2269
2270 if (c) {
2271 p->next_variant = c->next_variant;
2272 shader = c;
2273 }
2274 }
2275
2276 if (shader) {
2277 shader->next_variant = sel->current;
2278 sel->current = shader;
2279 } else {
2280 shader = CALLOC(1, sizeof(struct si_shader));
2281 shader->selector = sel;
2282 shader->key = key;
2283
2284 shader->next_variant = sel->current;
2285 sel->current = shader;
2286 r = si_pipe_shader_create(ctx, shader);
2287 if (unlikely(r)) {
2288 R600_ERR("Failed to build shader variant (type=%u) %d\n",
2289 sel->type, r);
2290 sel->current = NULL;
2291 FREE(shader);
2292 return r;
2293 }
2294 sel->num_shaders++;
2295 }
2296
2297 return 0;
2298 }
2299
2300 static void *si_create_shader_state(struct pipe_context *ctx,
2301 const struct pipe_shader_state *state,
2302 unsigned pipe_shader_type)
2303 {
2304 struct si_pipe_shader_selector *sel = CALLOC_STRUCT(si_pipe_shader_selector);
2305 int r;
2306
2307 sel->type = pipe_shader_type;
2308 sel->tokens = tgsi_dup_tokens(state->tokens);
2309 sel->so = state->stream_output;
2310
2311 if (pipe_shader_type == PIPE_SHADER_FRAGMENT) {
2312 struct tgsi_shader_info info;
2313
2314 tgsi_scan_shader(state->tokens, &info);
2315 sel->fs_write_all = info.color0_writes_all_cbufs;
2316 }
2317
2318 r = si_shader_select(ctx, sel);
2319 if (r) {
2320 free(sel);
2321 return NULL;
2322 }
2323
2324 return sel;
2325 }
2326
2327 static void *si_create_fs_state(struct pipe_context *ctx,
2328 const struct pipe_shader_state *state)
2329 {
2330 return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT);
2331 }
2332
2333 static void *si_create_gs_state(struct pipe_context *ctx,
2334 const struct pipe_shader_state *state)
2335 {
2336 return si_create_shader_state(ctx, state, PIPE_SHADER_GEOMETRY);
2337 }
2338
2339 static void *si_create_vs_state(struct pipe_context *ctx,
2340 const struct pipe_shader_state *state)
2341 {
2342 return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX);
2343 }
2344
2345 static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
2346 {
2347 struct si_context *sctx = (struct si_context *)ctx;
2348 struct si_pipe_shader_selector *sel = state;
2349
2350 if (sctx->vs_shader == sel)
2351 return;
2352
2353 if (!sel || !sel->current)
2354 return;
2355
2356 sctx->vs_shader = sel;
2357 }
2358
2359 static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
2360 {
2361 struct si_context *sctx = (struct si_context *)ctx;
2362 struct si_pipe_shader_selector *sel = state;
2363
2364 if (sctx->gs_shader == sel)
2365 return;
2366
2367 sctx->gs_shader = sel;
2368 }
2369
2370 static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
2371 {
2372 struct si_context *sctx = (struct si_context *)ctx;
2373 struct si_pipe_shader_selector *sel = state;
2374
2375 /* skip if supplied shader is one already in use */
2376 if (sctx->ps_shader == sel)
2377 return;
2378
2379 /* use dummy shader if supplied shader is corrupt */
2380 if (!sel || !sel->current) {
2381 if (!sctx->dummy_pixel_shader) {
2382 sctx->dummy_pixel_shader =
2383 util_make_fragment_cloneinput_shader(&sctx->b.b, 0,
2384 TGSI_SEMANTIC_GENERIC,
2385 TGSI_INTERPOLATE_CONSTANT);
2386 }
2387
2388 sel = sctx->dummy_pixel_shader;
2389 }
2390
2391 sctx->ps_shader = sel;
2392 }
2393
2394 static void si_delete_shader_selector(struct pipe_context *ctx,
2395 struct si_pipe_shader_selector *sel)
2396 {
2397 struct si_context *sctx = (struct si_context *)ctx;
2398 struct si_shader *p = sel->current, *c;
2399
2400 while (p) {
2401 c = p->next_variant;
2402 if (sel->type == PIPE_SHADER_GEOMETRY)
2403 si_pm4_delete_state(sctx, gs, p->pm4);
2404 else if (sel->type == PIPE_SHADER_FRAGMENT)
2405 si_pm4_delete_state(sctx, ps, p->pm4);
2406 else if (p->key.vs.as_es)
2407 si_pm4_delete_state(sctx, es, p->pm4);
2408 else
2409 si_pm4_delete_state(sctx, vs, p->pm4);
2410 si_pipe_shader_destroy(ctx, p);
2411 free(p);
2412 p = c;
2413 }
2414
2415 free(sel->tokens);
2416 free(sel);
2417 }
2418
2419 static void si_delete_vs_shader(struct pipe_context *ctx, void *state)
2420 {
2421 struct si_context *sctx = (struct si_context *)ctx;
2422 struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
2423
2424 if (sctx->vs_shader == sel) {
2425 sctx->vs_shader = NULL;
2426 }
2427
2428 si_delete_shader_selector(ctx, sel);
2429 }
2430
2431 static void si_delete_gs_shader(struct pipe_context *ctx, void *state)
2432 {
2433 struct si_context *sctx = (struct si_context *)ctx;
2434 struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
2435
2436 if (sctx->gs_shader == sel) {
2437 sctx->gs_shader = NULL;
2438 }
2439
2440 si_delete_shader_selector(ctx, sel);
2441 }
2442
2443 static void si_delete_ps_shader(struct pipe_context *ctx, void *state)
2444 {
2445 struct si_context *sctx = (struct si_context *)ctx;
2446 struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
2447
2448 if (sctx->ps_shader == sel) {
2449 sctx->ps_shader = NULL;
2450 }
2451
2452 si_delete_shader_selector(ctx, sel);
2453 }
2454
2455 /*
2456 * Samplers
2457 */
2458
2459 static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx,
2460 struct pipe_resource *texture,
2461 const struct pipe_sampler_view *state)
2462 {
2463 struct si_context *sctx = (struct si_context*)ctx;
2464 struct si_pipe_sampler_view *view = CALLOC_STRUCT(si_pipe_sampler_view);
2465 struct r600_texture *tmp = (struct r600_texture*)texture;
2466 const struct util_format_description *desc;
2467 unsigned format, num_format;
2468 uint32_t pitch = 0;
2469 unsigned char state_swizzle[4], swizzle[4];
2470 unsigned height, depth, width;
2471 enum pipe_format pipe_format = state->format;
2472 struct radeon_surface_level *surflevel;
2473 int first_non_void;
2474 uint64_t va;
2475
2476 if (view == NULL)
2477 return NULL;
2478
2479 /* initialize base object */
2480 view->base = *state;
2481 view->base.texture = NULL;
2482 pipe_resource_reference(&view->base.texture, texture);
2483 view->base.reference.count = 1;
2484 view->base.context = ctx;
2485 view->resource = &tmp->resource;
2486
2487 /* Buffer resource. */
2488 if (texture->target == PIPE_BUFFER) {
2489 unsigned stride;
2490
2491 desc = util_format_description(state->format);
2492 first_non_void = util_format_get_first_non_void_channel(state->format);
2493 stride = desc->block.bits / 8;
2494 va = tmp->resource.gpu_address + state->u.buf.first_element*stride;
2495 format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
2496 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
2497
2498 view->state[0] = va;
2499 view->state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
2500 S_008F04_STRIDE(stride);
2501 view->state[2] = state->u.buf.last_element + 1 - state->u.buf.first_element;
2502 view->state[3] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
2503 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
2504 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
2505 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
2506 S_008F0C_NUM_FORMAT(num_format) |
2507 S_008F0C_DATA_FORMAT(format);
2508
2509 LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers);
2510 return &view->base;
2511 }
2512
2513 state_swizzle[0] = state->swizzle_r;
2514 state_swizzle[1] = state->swizzle_g;
2515 state_swizzle[2] = state->swizzle_b;
2516 state_swizzle[3] = state->swizzle_a;
2517
2518 surflevel = tmp->surface.level;
2519
2520 /* Texturing with separate depth and stencil. */
2521 if (tmp->is_depth && !tmp->is_flushing_texture) {
2522 switch (pipe_format) {
2523 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2524 pipe_format = PIPE_FORMAT_Z32_FLOAT;
2525 break;
2526 case PIPE_FORMAT_X8Z24_UNORM:
2527 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2528 /* Z24 is always stored like this. */
2529 pipe_format = PIPE_FORMAT_Z24X8_UNORM;
2530 break;
2531 case PIPE_FORMAT_X24S8_UINT:
2532 case PIPE_FORMAT_S8X24_UINT:
2533 case PIPE_FORMAT_X32_S8X24_UINT:
2534 pipe_format = PIPE_FORMAT_S8_UINT;
2535 surflevel = tmp->surface.stencil_level;
2536 break;
2537 default:;
2538 }
2539 }
2540
2541 desc = util_format_description(pipe_format);
2542
2543 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
2544 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
2545 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
2546
2547 switch (pipe_format) {
2548 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2549 case PIPE_FORMAT_X24S8_UINT:
2550 case PIPE_FORMAT_X32_S8X24_UINT:
2551 case PIPE_FORMAT_X8Z24_UNORM:
2552 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
2553 break;
2554 default:
2555 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
2556 }
2557 } else {
2558 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
2559 }
2560
2561 first_non_void = util_format_get_first_non_void_channel(pipe_format);
2562
2563 switch (pipe_format) {
2564 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2565 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2566 break;
2567 default:
2568 if (first_non_void < 0) {
2569 if (util_format_is_compressed(pipe_format)) {
2570 switch (pipe_format) {
2571 case PIPE_FORMAT_DXT1_SRGB:
2572 case PIPE_FORMAT_DXT1_SRGBA:
2573 case PIPE_FORMAT_DXT3_SRGBA:
2574 case PIPE_FORMAT_DXT5_SRGBA:
2575 case PIPE_FORMAT_BPTC_SRGBA:
2576 num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2577 break;
2578 case PIPE_FORMAT_RGTC1_SNORM:
2579 case PIPE_FORMAT_LATC1_SNORM:
2580 case PIPE_FORMAT_RGTC2_SNORM:
2581 case PIPE_FORMAT_LATC2_SNORM:
2582 /* implies float, so use SNORM/UNORM to determine
2583 whether data is signed or not */
2584 case PIPE_FORMAT_BPTC_RGB_FLOAT:
2585 num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2586 break;
2587 default:
2588 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2589 break;
2590 }
2591 } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
2592 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2593 } else {
2594 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2595 }
2596 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
2597 num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2598 } else {
2599 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2600
2601 switch (desc->channel[first_non_void].type) {
2602 case UTIL_FORMAT_TYPE_FLOAT:
2603 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2604 break;
2605 case UTIL_FORMAT_TYPE_SIGNED:
2606 if (desc->channel[first_non_void].normalized)
2607 num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2608 else if (desc->channel[first_non_void].pure_integer)
2609 num_format = V_008F14_IMG_NUM_FORMAT_SINT;
2610 else
2611 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;
2612 break;
2613 case UTIL_FORMAT_TYPE_UNSIGNED:
2614 if (desc->channel[first_non_void].normalized)
2615 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2616 else if (desc->channel[first_non_void].pure_integer)
2617 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
2618 else
2619 num_format = V_008F14_IMG_NUM_FORMAT_USCALED;
2620 }
2621 }
2622 }
2623
2624 format = si_translate_texformat(ctx->screen, pipe_format, desc, first_non_void);
2625 if (format == ~0) {
2626 format = 0;
2627 }
2628
2629 /* not supported any more */
2630 //endian = si_colorformat_endian_swap(format);
2631
2632 width = surflevel[0].npix_x;
2633 height = surflevel[0].npix_y;
2634 depth = surflevel[0].npix_z;
2635 pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format);
2636
2637 if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
2638 height = 1;
2639 depth = texture->array_size;
2640 } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) {
2641 depth = texture->array_size;
2642 } else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY)
2643 depth = texture->array_size / 6;
2644
2645 va = tmp->resource.gpu_address + surflevel[0].offset;
2646 va += tmp->mipmap_shift * surflevel[texture->last_level].slice_size * tmp->surface.array_size;
2647
2648 view->state[0] = va >> 8;
2649 view->state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) |
2650 S_008F14_DATA_FORMAT(format) |
2651 S_008F14_NUM_FORMAT(num_format));
2652 view->state[2] = (S_008F18_WIDTH(width - 1) |
2653 S_008F18_HEIGHT(height - 1));
2654 view->state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
2655 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
2656 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
2657 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
2658 S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ?
2659 0 : state->u.tex.first_level - tmp->mipmap_shift) |
2660 S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ?
2661 util_logbase2(texture->nr_samples) :
2662 state->u.tex.last_level - tmp->mipmap_shift) |
2663 S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, 0, false)) |
2664 S_008F1C_POW2_PAD(texture->last_level > 0) |
2665 S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples)));
2666 view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
2667 view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
2668 S_008F24_LAST_ARRAY(state->u.tex.last_layer));
2669 view->state[6] = 0;
2670 view->state[7] = 0;
2671
2672 /* Initialize the sampler view for FMASK. */
2673 if (tmp->fmask.size) {
2674 uint64_t va = tmp->resource.gpu_address + tmp->fmask.offset;
2675 uint32_t fmask_format;
2676
2677 switch (texture->nr_samples) {
2678 case 2:
2679 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
2680 break;
2681 case 4:
2682 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
2683 break;
2684 case 8:
2685 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
2686 break;
2687 default:
2688 assert(0);
2689 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
2690 }
2691
2692 view->fmask_state[0] = va >> 8;
2693 view->fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
2694 S_008F14_DATA_FORMAT(fmask_format) |
2695 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT);
2696 view->fmask_state[2] = S_008F18_WIDTH(width - 1) |
2697 S_008F18_HEIGHT(height - 1);
2698 view->fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
2699 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
2700 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
2701 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
2702 S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) |
2703 S_008F1C_TYPE(si_tex_dim(texture->target, 0));
2704 view->fmask_state[4] = S_008F20_DEPTH(depth - 1) |
2705 S_008F20_PITCH(tmp->fmask.pitch - 1);
2706 view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
2707 S_008F24_LAST_ARRAY(state->u.tex.last_layer);
2708 view->fmask_state[6] = 0;
2709 view->fmask_state[7] = 0;
2710 }
2711
2712 return &view->base;
2713 }
2714
2715 static void si_sampler_view_destroy(struct pipe_context *ctx,
2716 struct pipe_sampler_view *state)
2717 {
2718 struct si_pipe_sampler_view *view = (struct si_pipe_sampler_view *)state;
2719
2720 if (view->resource->b.b.target == PIPE_BUFFER)
2721 LIST_DELINIT(&view->list);
2722
2723 pipe_resource_reference(&state->texture, NULL);
2724 FREE(view);
2725 }
2726
2727 static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
2728 {
2729 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
2730 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
2731 (linear_filter &&
2732 (wrap == PIPE_TEX_WRAP_CLAMP ||
2733 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
2734 }
2735
2736 static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
2737 {
2738 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
2739 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
2740
2741 return (state->border_color.ui[0] || state->border_color.ui[1] ||
2742 state->border_color.ui[2] || state->border_color.ui[3]) &&
2743 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) ||
2744 wrap_mode_uses_border_color(state->wrap_t, linear_filter) ||
2745 wrap_mode_uses_border_color(state->wrap_r, linear_filter));
2746 }
2747
2748 static void *si_create_sampler_state(struct pipe_context *ctx,
2749 const struct pipe_sampler_state *state)
2750 {
2751 struct si_pipe_sampler_state *rstate = CALLOC_STRUCT(si_pipe_sampler_state);
2752 unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
2753 unsigned border_color_type;
2754
2755 if (rstate == NULL) {
2756 return NULL;
2757 }
2758
2759 if (sampler_state_needs_border_color(state))
2760 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
2761 else
2762 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2763
2764 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
2765 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
2766 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
2767 r600_tex_aniso_filter(state->max_anisotropy) << 9 |
2768 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
2769 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
2770 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map));
2771 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
2772 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)));
2773 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
2774 S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
2775 S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) |
2776 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
2777 rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type);
2778
2779 if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
2780 memcpy(rstate->border_color, state->border_color.ui,
2781 sizeof(rstate->border_color));
2782 }
2783
2784 return rstate;
2785 }
2786
2787 /* Upload border colors and update the pointers in resource descriptors.
2788 * There can only be 4096 border colors per context.
2789 *
2790 * XXX: This is broken if the buffer gets reallocated.
2791 */
2792 static void si_set_border_colors(struct si_context *sctx, unsigned count,
2793 void **states)
2794 {
2795 struct si_pipe_sampler_state **rstates = (struct si_pipe_sampler_state **)states;
2796 uint32_t *border_color_table = NULL;
2797 int i, j;
2798
2799 for (i = 0; i < count; i++) {
2800 if (rstates[i] &&
2801 G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) ==
2802 V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
2803 if (!sctx->border_color_table ||
2804 ((sctx->border_color_offset + count - i) &
2805 C_008F3C_BORDER_COLOR_PTR)) {
2806 r600_resource_reference(&sctx->border_color_table, NULL);
2807 sctx->border_color_offset = 0;
2808
2809 sctx->border_color_table =
2810 si_resource_create_custom(&sctx->screen->b.b,
2811 PIPE_USAGE_DYNAMIC,
2812 4096 * 4 * 4);
2813 }
2814
2815 if (!border_color_table) {
2816 border_color_table =
2817 sctx->b.ws->buffer_map(sctx->border_color_table->cs_buf,
2818 sctx->b.rings.gfx.cs,
2819 PIPE_TRANSFER_WRITE |
2820 PIPE_TRANSFER_UNSYNCHRONIZED);
2821 }
2822
2823 for (j = 0; j < 4; j++) {
2824 border_color_table[4 * sctx->border_color_offset + j] =
2825 util_le32_to_cpu(rstates[i]->border_color[j]);
2826 }
2827
2828 rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR;
2829 rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++);
2830 }
2831 }
2832
2833 if (border_color_table) {
2834 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
2835
2836 uint64_t va_offset = sctx->border_color_table->gpu_address;
2837
2838 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8);
2839 if (sctx->b.chip_class >= CIK)
2840 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40);
2841 si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ,
2842 RADEON_PRIO_SHADER_DATA);
2843 si_pm4_set_state(sctx, ta_bordercolor_base, pm4);
2844 }
2845 }
2846
2847 static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
2848 unsigned start, unsigned count,
2849 void **states)
2850 {
2851 struct si_context *sctx = (struct si_context *)ctx;
2852
2853 if (!count || shader >= SI_NUM_SHADERS)
2854 return;
2855
2856 si_set_border_colors(sctx, count, states);
2857 si_set_sampler_descriptors(sctx, shader, start, count, states);
2858 }
2859
2860 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
2861 {
2862 struct si_context *sctx = (struct si_context *)ctx;
2863 struct si_state_sample_mask *state = CALLOC_STRUCT(si_state_sample_mask);
2864 struct si_pm4_state *pm4 = &state->pm4;
2865 uint16_t mask = sample_mask;
2866
2867 if (state == NULL)
2868 return;
2869
2870 state->sample_mask = mask;
2871 si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16));
2872 si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16));
2873
2874 si_pm4_set_state(sctx, sample_mask, state);
2875 }
2876
2877 static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
2878 {
2879 free(state);
2880 }
2881
2882 /*
2883 * Vertex elements & buffers
2884 */
2885
2886 static void *si_create_vertex_elements(struct pipe_context *ctx,
2887 unsigned count,
2888 const struct pipe_vertex_element *elements)
2889 {
2890 struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
2891 int i;
2892
2893 assert(count < PIPE_MAX_ATTRIBS);
2894 if (!v)
2895 return NULL;
2896
2897 v->count = count;
2898 for (i = 0; i < count; ++i) {
2899 const struct util_format_description *desc;
2900 unsigned data_format, num_format;
2901 int first_non_void;
2902
2903 desc = util_format_description(elements[i].src_format);
2904 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
2905 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
2906 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
2907
2908 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
2909 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
2910 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
2911 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
2912 S_008F0C_NUM_FORMAT(num_format) |
2913 S_008F0C_DATA_FORMAT(data_format);
2914 v->format_size[i] = desc->block.bits / 8;
2915 }
2916 memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
2917
2918 return v;
2919 }
2920
2921 static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
2922 {
2923 struct si_context *sctx = (struct si_context *)ctx;
2924 struct si_vertex_element *v = (struct si_vertex_element*)state;
2925
2926 sctx->vertex_elements = v;
2927 sctx->vertex_buffers_dirty = true;
2928 }
2929
2930 static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
2931 {
2932 struct si_context *sctx = (struct si_context *)ctx;
2933
2934 if (sctx->vertex_elements == state)
2935 sctx->vertex_elements = NULL;
2936 FREE(state);
2937 }
2938
2939 static void si_set_vertex_buffers(struct pipe_context *ctx,
2940 unsigned start_slot, unsigned count,
2941 const struct pipe_vertex_buffer *buffers)
2942 {
2943 struct si_context *sctx = (struct si_context *)ctx;
2944 struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot;
2945 int i;
2946
2947 assert(start_slot + count <= Elements(sctx->vertex_buffer));
2948
2949 if (buffers) {
2950 for (i = 0; i < count; i++) {
2951 const struct pipe_vertex_buffer *src = buffers + i;
2952 struct pipe_vertex_buffer *dsti = dst + i;
2953
2954 pipe_resource_reference(&dsti->buffer, src->buffer);
2955 dsti->buffer_offset = src->buffer_offset;
2956 dsti->stride = src->stride;
2957 }
2958 } else {
2959 for (i = 0; i < count; i++) {
2960 pipe_resource_reference(&dst[i].buffer, NULL);
2961 }
2962 }
2963 sctx->vertex_buffers_dirty = true;
2964 }
2965
2966 static void si_set_index_buffer(struct pipe_context *ctx,
2967 const struct pipe_index_buffer *ib)
2968 {
2969 struct si_context *sctx = (struct si_context *)ctx;
2970
2971 if (ib) {
2972 pipe_resource_reference(&sctx->index_buffer.buffer, ib->buffer);
2973 memcpy(&sctx->index_buffer, ib, sizeof(*ib));
2974 } else {
2975 pipe_resource_reference(&sctx->index_buffer.buffer, NULL);
2976 }
2977 }
2978
2979 /*
2980 * Misc
2981 */
2982 static void si_set_polygon_stipple(struct pipe_context *ctx,
2983 const struct pipe_poly_stipple *state)
2984 {
2985 }
2986
2987 static void si_texture_barrier(struct pipe_context *ctx)
2988 {
2989 struct si_context *sctx = (struct si_context *)ctx;
2990
2991 sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
2992 R600_CONTEXT_FLUSH_AND_INV_CB;
2993 }
2994
2995 static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
2996 {
2997 struct pipe_blend_state blend;
2998
2999 memset(&blend, 0, sizeof(blend));
3000 blend.independent_blend_enable = true;
3001 blend.rt[0].colormask = 0xf;
3002 return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
3003 }
3004
3005 static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
3006 bool include_draw_vbo)
3007 {
3008 si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo);
3009 }
3010
3011 void si_init_state_functions(struct si_context *sctx)
3012 {
3013 si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0);
3014 si_init_atom(&sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
3015
3016 sctx->b.b.create_blend_state = si_create_blend_state;
3017 sctx->b.b.bind_blend_state = si_bind_blend_state;
3018 sctx->b.b.delete_blend_state = si_delete_blend_state;
3019 sctx->b.b.set_blend_color = si_set_blend_color;
3020
3021 sctx->b.b.create_rasterizer_state = si_create_rs_state;
3022 sctx->b.b.bind_rasterizer_state = si_bind_rs_state;
3023 sctx->b.b.delete_rasterizer_state = si_delete_rs_state;
3024
3025 sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state;
3026 sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
3027 sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
3028
3029 sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);
3030 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
3031 sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);
3032 sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);
3033
3034 sctx->b.b.set_clip_state = si_set_clip_state;
3035 sctx->b.b.set_scissor_states = si_set_scissor_states;
3036 sctx->b.b.set_viewport_states = si_set_viewport_states;
3037 sctx->b.b.set_stencil_ref = si_set_pipe_stencil_ref;
3038
3039 sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
3040 sctx->b.b.get_sample_position = cayman_get_sample_position;
3041
3042 sctx->b.b.create_vs_state = si_create_vs_state;
3043 sctx->b.b.create_fs_state = si_create_fs_state;
3044 sctx->b.b.bind_vs_state = si_bind_vs_shader;
3045 sctx->b.b.bind_fs_state = si_bind_ps_shader;
3046 sctx->b.b.delete_vs_state = si_delete_vs_shader;
3047 sctx->b.b.delete_fs_state = si_delete_ps_shader;
3048
3049 sctx->b.b.create_gs_state = si_create_gs_state;
3050 sctx->b.b.bind_gs_state = si_bind_gs_shader;
3051 sctx->b.b.delete_gs_state = si_delete_gs_shader;
3052
3053 sctx->b.b.create_sampler_state = si_create_sampler_state;
3054 sctx->b.b.bind_sampler_states = si_bind_sampler_states;
3055 sctx->b.b.delete_sampler_state = si_delete_sampler_state;
3056
3057 sctx->b.b.create_sampler_view = si_create_sampler_view;
3058 sctx->b.b.sampler_view_destroy = si_sampler_view_destroy;
3059
3060 sctx->b.b.set_sample_mask = si_set_sample_mask;
3061
3062 sctx->b.b.create_vertex_elements_state = si_create_vertex_elements;
3063 sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements;
3064 sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element;
3065 sctx->b.b.set_vertex_buffers = si_set_vertex_buffers;
3066 sctx->b.b.set_index_buffer = si_set_index_buffer;
3067
3068 sctx->b.b.texture_barrier = si_texture_barrier;
3069 sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
3070 sctx->b.b.set_min_samples = si_set_min_samples;
3071
3072 sctx->b.dma_copy = si_dma_copy;
3073 sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
3074 sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
3075
3076 sctx->b.b.draw_vbo = si_draw_vbo;
3077 }
3078
3079 void si_init_config(struct si_context *sctx)
3080 {
3081 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
3082
3083 if (pm4 == NULL)
3084 return;
3085
3086 si_cmd_context_control(pm4);
3087
3088 si_pm4_set_reg(pm4, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0);
3089 si_pm4_set_reg(pm4, R_028A14_VGT_HOS_CNTL, 0x0);
3090 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0);
3091 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0);
3092 si_pm4_set_reg(pm4, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0);
3093 si_pm4_set_reg(pm4, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0);
3094 si_pm4_set_reg(pm4, R_028A28_VGT_GROUP_FIRST_DECR, 0x0);
3095 si_pm4_set_reg(pm4, R_028A2C_VGT_GROUP_DECR, 0x0);
3096 si_pm4_set_reg(pm4, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0);
3097 si_pm4_set_reg(pm4, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0);
3098 si_pm4_set_reg(pm4, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0);
3099 si_pm4_set_reg(pm4, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0);
3100
3101 /* FIXME calculate these values somehow ??? */
3102 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80);
3103 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
3104 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
3105
3106 si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0x0);
3107 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
3108 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0);
3109 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
3110
3111 si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, 0);
3112 si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, 0);
3113 si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, 0);
3114 si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0);
3115
3116 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
3117 si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0x00000000);
3118 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
3119 if (sctx->b.chip_class < CIK)
3120 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
3121 S_008A14_CLIP_VTX_REORDER_ENA(1));
3122
3123 si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
3124 si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
3125
3126 si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
3127
3128 if (sctx->b.chip_class >= CIK) {
3129 switch (sctx->screen->b.family) {
3130 case CHIP_BONAIRE:
3131 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012);
3132 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000);
3133 break;
3134 case CHIP_HAWAII:
3135 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x3a00161a);
3136 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002e);
3137 break;
3138 case CHIP_KAVERI:
3139 /* XXX todo */
3140 case CHIP_KABINI:
3141 /* XXX todo */
3142 case CHIP_MULLINS:
3143 /* XXX todo */
3144 default:
3145 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
3146 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000);
3147 break;
3148 }
3149 } else {
3150 switch (sctx->screen->b.family) {
3151 case CHIP_TAHITI:
3152 case CHIP_PITCAIRN:
3153 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x2a00126a);
3154 break;
3155 case CHIP_VERDE:
3156 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x0000124a);
3157 break;
3158 case CHIP_OLAND:
3159 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000082);
3160 break;
3161 case CHIP_HAINAN:
3162 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
3163 break;
3164 default:
3165 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
3166 break;
3167 }
3168 }
3169
3170 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
3171 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
3172 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
3173 S_028244_BR_X(16384) | S_028244_BR_Y(16384));
3174 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
3175 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
3176 S_028034_BR_X(16384) | S_028034_BR_Y(16384));
3177
3178 si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
3179 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
3180 si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000);
3181 si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000);
3182 si_pm4_set_reg(pm4, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
3183 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0x00000000);
3184 si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000);
3185 si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000);
3186 si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000);
3187 si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000);
3188 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 0x00000000);
3189 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 0x00000000);
3190 si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0x00000000);
3191 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
3192 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
3193 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
3194
3195 /* There is a hang if stencil is used and fast stencil is enabled
3196 * regardless of whether HTILE is depth-only or not.
3197 */
3198 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE,
3199 S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
3200 S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) |
3201 S_02800C_FAST_STENCIL_DISABLE(1));
3202
3203 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
3204 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
3205 si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
3206
3207 if (sctx->b.chip_class >= CIK) {
3208 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
3209 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0));
3210 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
3211 }
3212
3213 si_pm4_set_state(sctx, init, pm4);
3214 }