radeonsi: compute color surface registers only once
[mesa.git] / src / gallium / drivers / radeonsi / si_state.c
1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Christian König <christian.koenig@amd.com>
25 */
26
27 #include "si_pipe.h"
28 #include "si_shader.h"
29 #include "sid.h"
30 #include "../radeon/r600_cs.h"
31
32 #include "tgsi/tgsi_parse.h"
33 #include "tgsi/tgsi_scan.h"
34 #include "util/u_format.h"
35 #include "util/u_format_s3tc.h"
36 #include "util/u_framebuffer.h"
37 #include "util/u_helpers.h"
38 #include "util/u_memory.h"
39
40 static uint32_t cik_num_banks(struct si_screen *sscreen, unsigned bpe, unsigned tile_split)
41 {
42 if (sscreen->b.info.cik_macrotile_mode_array_valid) {
43 unsigned index, tileb;
44
45 tileb = 8 * 8 * bpe;
46 tileb = MIN2(tile_split, tileb);
47
48 for (index = 0; tileb > 64; index++) {
49 tileb >>= 1;
50 }
51
52 assert(index < 16);
53
54 return (sscreen->b.info.cik_macrotile_mode_array[index] >> 6) & 0x3;
55 }
56
57 /* The old way. */
58 switch (sscreen->b.tiling_info.num_banks) {
59 case 2:
60 return V_02803C_ADDR_SURF_2_BANK;
61 case 4:
62 return V_02803C_ADDR_SURF_4_BANK;
63 case 8:
64 default:
65 return V_02803C_ADDR_SURF_8_BANK;
66 case 16:
67 return V_02803C_ADDR_SURF_16_BANK;
68 }
69 }
70
71 static unsigned cik_tile_split(unsigned tile_split)
72 {
73 switch (tile_split) {
74 case 64:
75 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_64B;
76 break;
77 case 128:
78 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_128B;
79 break;
80 case 256:
81 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_256B;
82 break;
83 case 512:
84 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_512B;
85 break;
86 default:
87 case 1024:
88 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_1KB;
89 break;
90 case 2048:
91 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_2KB;
92 break;
93 case 4096:
94 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_4KB;
95 break;
96 }
97 return tile_split;
98 }
99
100 static unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect)
101 {
102 switch (macro_tile_aspect) {
103 default:
104 case 1:
105 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_1;
106 break;
107 case 2:
108 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_2;
109 break;
110 case 4:
111 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_4;
112 break;
113 case 8:
114 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_8;
115 break;
116 }
117 return macro_tile_aspect;
118 }
119
120 static unsigned cik_bank_wh(unsigned bankwh)
121 {
122 switch (bankwh) {
123 default:
124 case 1:
125 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_1;
126 break;
127 case 2:
128 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_2;
129 break;
130 case 4:
131 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_4;
132 break;
133 case 8:
134 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_8;
135 break;
136 }
137 return bankwh;
138 }
139
140 static unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode)
141 {
142 if (sscreen->b.info.si_tile_mode_array_valid) {
143 uint32_t gb_tile_mode = sscreen->b.info.si_tile_mode_array[tile_mode];
144
145 return G_009910_PIPE_CONFIG(gb_tile_mode);
146 }
147
148 /* This is probably broken for a lot of chips, but it's only used
149 * if the kernel cannot return the tile mode array for CIK. */
150 switch (sscreen->b.info.r600_num_tile_pipes) {
151 case 16:
152 return V_02803C_X_ADDR_SURF_P16_32X32_16X16;
153 case 8:
154 return V_02803C_X_ADDR_SURF_P8_32X32_16X16;
155 case 4:
156 default:
157 if (sscreen->b.info.r600_num_backends == 4)
158 return V_02803C_X_ADDR_SURF_P4_16X16;
159 else
160 return V_02803C_X_ADDR_SURF_P4_8X16;
161 case 2:
162 return V_02803C_ADDR_SURF_P2;
163 }
164 }
165
166 static unsigned si_map_swizzle(unsigned swizzle)
167 {
168 switch (swizzle) {
169 case UTIL_FORMAT_SWIZZLE_Y:
170 return V_008F0C_SQ_SEL_Y;
171 case UTIL_FORMAT_SWIZZLE_Z:
172 return V_008F0C_SQ_SEL_Z;
173 case UTIL_FORMAT_SWIZZLE_W:
174 return V_008F0C_SQ_SEL_W;
175 case UTIL_FORMAT_SWIZZLE_0:
176 return V_008F0C_SQ_SEL_0;
177 case UTIL_FORMAT_SWIZZLE_1:
178 return V_008F0C_SQ_SEL_1;
179 default: /* UTIL_FORMAT_SWIZZLE_X */
180 return V_008F0C_SQ_SEL_X;
181 }
182 }
183
184 static uint32_t S_FIXED(float value, uint32_t frac_bits)
185 {
186 return value * (1 << frac_bits);
187 }
188
189 /* 12.4 fixed-point */
190 static unsigned si_pack_float_12p4(float x)
191 {
192 return x <= 0 ? 0 :
193 x >= 4096 ? 0xffff : x * 16;
194 }
195
196 /*
197 * inferred framebuffer and blender state
198 */
199 static void si_update_fb_blend_state(struct si_context *sctx)
200 {
201 struct si_pm4_state *pm4;
202 struct si_state_blend *blend = sctx->queued.named.blend;
203 uint32_t mask;
204
205 if (blend == NULL)
206 return;
207
208 pm4 = si_pm4_alloc_state(sctx);
209 if (pm4 == NULL)
210 return;
211
212 mask = (1ULL << ((unsigned)sctx->framebuffer.nr_cbufs * 4)) - 1;
213 mask &= blend->cb_target_mask;
214 si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
215
216 si_pm4_set_state(sctx, fb_blend, pm4);
217 }
218
219 /*
220 * Blender functions
221 */
222
223 static uint32_t si_translate_blend_function(int blend_func)
224 {
225 switch (blend_func) {
226 case PIPE_BLEND_ADD:
227 return V_028780_COMB_DST_PLUS_SRC;
228 case PIPE_BLEND_SUBTRACT:
229 return V_028780_COMB_SRC_MINUS_DST;
230 case PIPE_BLEND_REVERSE_SUBTRACT:
231 return V_028780_COMB_DST_MINUS_SRC;
232 case PIPE_BLEND_MIN:
233 return V_028780_COMB_MIN_DST_SRC;
234 case PIPE_BLEND_MAX:
235 return V_028780_COMB_MAX_DST_SRC;
236 default:
237 R600_ERR("Unknown blend function %d\n", blend_func);
238 assert(0);
239 break;
240 }
241 return 0;
242 }
243
244 static uint32_t si_translate_blend_factor(int blend_fact)
245 {
246 switch (blend_fact) {
247 case PIPE_BLENDFACTOR_ONE:
248 return V_028780_BLEND_ONE;
249 case PIPE_BLENDFACTOR_SRC_COLOR:
250 return V_028780_BLEND_SRC_COLOR;
251 case PIPE_BLENDFACTOR_SRC_ALPHA:
252 return V_028780_BLEND_SRC_ALPHA;
253 case PIPE_BLENDFACTOR_DST_ALPHA:
254 return V_028780_BLEND_DST_ALPHA;
255 case PIPE_BLENDFACTOR_DST_COLOR:
256 return V_028780_BLEND_DST_COLOR;
257 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
258 return V_028780_BLEND_SRC_ALPHA_SATURATE;
259 case PIPE_BLENDFACTOR_CONST_COLOR:
260 return V_028780_BLEND_CONSTANT_COLOR;
261 case PIPE_BLENDFACTOR_CONST_ALPHA:
262 return V_028780_BLEND_CONSTANT_ALPHA;
263 case PIPE_BLENDFACTOR_ZERO:
264 return V_028780_BLEND_ZERO;
265 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
266 return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
267 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
268 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
269 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
270 return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
271 case PIPE_BLENDFACTOR_INV_DST_COLOR:
272 return V_028780_BLEND_ONE_MINUS_DST_COLOR;
273 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
274 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
275 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
276 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
277 case PIPE_BLENDFACTOR_SRC1_COLOR:
278 return V_028780_BLEND_SRC1_COLOR;
279 case PIPE_BLENDFACTOR_SRC1_ALPHA:
280 return V_028780_BLEND_SRC1_ALPHA;
281 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
282 return V_028780_BLEND_INV_SRC1_COLOR;
283 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
284 return V_028780_BLEND_INV_SRC1_ALPHA;
285 default:
286 R600_ERR("Bad blend factor %d not supported!\n", blend_fact);
287 assert(0);
288 break;
289 }
290 return 0;
291 }
292
293 static void *si_create_blend_state_mode(struct pipe_context *ctx,
294 const struct pipe_blend_state *state,
295 unsigned mode)
296 {
297 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
298 struct si_pm4_state *pm4 = &blend->pm4;
299
300 uint32_t color_control = 0;
301
302 if (blend == NULL)
303 return NULL;
304
305 blend->alpha_to_one = state->alpha_to_one;
306
307 if (state->logicop_enable) {
308 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
309 } else {
310 color_control |= S_028808_ROP3(0xcc);
311 }
312
313 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
314 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
315 S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
316 S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
317 S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
318 S_028B70_ALPHA_TO_MASK_OFFSET3(2));
319
320 blend->cb_target_mask = 0;
321 for (int i = 0; i < 8; i++) {
322 /* state->rt entries > 0 only written if independent blending */
323 const int j = state->independent_blend_enable ? i : 0;
324
325 unsigned eqRGB = state->rt[j].rgb_func;
326 unsigned srcRGB = state->rt[j].rgb_src_factor;
327 unsigned dstRGB = state->rt[j].rgb_dst_factor;
328 unsigned eqA = state->rt[j].alpha_func;
329 unsigned srcA = state->rt[j].alpha_src_factor;
330 unsigned dstA = state->rt[j].alpha_dst_factor;
331
332 unsigned blend_cntl = 0;
333
334 /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */
335 blend->cb_target_mask |= state->rt[j].colormask << (4 * i);
336
337 if (!state->rt[j].blend_enable) {
338 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
339 continue;
340 }
341
342 blend_cntl |= S_028780_ENABLE(1);
343 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
344 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
345 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
346
347 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
348 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
349 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
350 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
351 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
352 }
353 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
354 }
355
356 if (blend->cb_target_mask) {
357 color_control |= S_028808_MODE(mode);
358 } else {
359 color_control |= S_028808_MODE(V_028808_CB_DISABLE);
360 }
361 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
362
363 return blend;
364 }
365
366 static void *si_create_blend_state(struct pipe_context *ctx,
367 const struct pipe_blend_state *state)
368 {
369 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
370 }
371
372 static void si_bind_blend_state(struct pipe_context *ctx, void *state)
373 {
374 struct si_context *sctx = (struct si_context *)ctx;
375 si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
376 si_update_fb_blend_state(sctx);
377 }
378
379 static void si_delete_blend_state(struct pipe_context *ctx, void *state)
380 {
381 struct si_context *sctx = (struct si_context *)ctx;
382 si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
383 }
384
385 static void si_set_blend_color(struct pipe_context *ctx,
386 const struct pipe_blend_color *state)
387 {
388 struct si_context *sctx = (struct si_context *)ctx;
389 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
390
391 if (pm4 == NULL)
392 return;
393
394 si_pm4_set_reg(pm4, R_028414_CB_BLEND_RED, fui(state->color[0]));
395 si_pm4_set_reg(pm4, R_028418_CB_BLEND_GREEN, fui(state->color[1]));
396 si_pm4_set_reg(pm4, R_02841C_CB_BLEND_BLUE, fui(state->color[2]));
397 si_pm4_set_reg(pm4, R_028420_CB_BLEND_ALPHA, fui(state->color[3]));
398
399 si_pm4_set_state(sctx, blend_color, pm4);
400 }
401
402 /*
403 * Clipping, scissors and viewport
404 */
405
406 static void si_set_clip_state(struct pipe_context *ctx,
407 const struct pipe_clip_state *state)
408 {
409 struct si_context *sctx = (struct si_context *)ctx;
410 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
411 struct pipe_constant_buffer cb;
412
413 if (pm4 == NULL)
414 return;
415
416 for (int i = 0; i < 6; i++) {
417 si_pm4_set_reg(pm4, R_0285BC_PA_CL_UCP_0_X + i * 16,
418 fui(state->ucp[i][0]));
419 si_pm4_set_reg(pm4, R_0285C0_PA_CL_UCP_0_Y + i * 16,
420 fui(state->ucp[i][1]));
421 si_pm4_set_reg(pm4, R_0285C4_PA_CL_UCP_0_Z + i * 16,
422 fui(state->ucp[i][2]));
423 si_pm4_set_reg(pm4, R_0285C8_PA_CL_UCP_0_W + i * 16,
424 fui(state->ucp[i][3]));
425 }
426
427 cb.buffer = NULL;
428 cb.user_buffer = state->ucp;
429 cb.buffer_offset = 0;
430 cb.buffer_size = 4*4*8;
431 ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, NUM_PIPE_CONST_BUFFERS, &cb);
432 pipe_resource_reference(&cb.buffer, NULL);
433
434 si_pm4_set_state(sctx, clip, pm4);
435 }
436
437 static void si_set_scissor_states(struct pipe_context *ctx,
438 unsigned start_slot,
439 unsigned num_scissors,
440 const struct pipe_scissor_state *state)
441 {
442 struct si_context *sctx = (struct si_context *)ctx;
443 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
444
445 if (pm4 == NULL)
446 return;
447
448 si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL,
449 S_028250_TL_X(state->minx) | S_028250_TL_Y(state->miny) |
450 S_028250_WINDOW_OFFSET_DISABLE(1));
451 si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR,
452 S_028254_BR_X(state->maxx) | S_028254_BR_Y(state->maxy));
453
454 si_pm4_set_state(sctx, scissor, pm4);
455 }
456
457 static void si_set_viewport_states(struct pipe_context *ctx,
458 unsigned start_slot,
459 unsigned num_viewports,
460 const struct pipe_viewport_state *state)
461 {
462 struct si_context *sctx = (struct si_context *)ctx;
463 struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport);
464 struct si_pm4_state *pm4 = &viewport->pm4;
465
466 if (viewport == NULL)
467 return;
468
469 viewport->viewport = *state;
470 si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]));
471 si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]));
472 si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]));
473 si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]));
474 si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]));
475 si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]));
476
477 si_pm4_set_state(sctx, viewport, viewport);
478 }
479
480 /*
481 * inferred state between framebuffer and rasterizer
482 */
483 static void si_update_fb_rs_state(struct si_context *sctx)
484 {
485 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
486 struct si_pm4_state *pm4;
487 float offset_units;
488
489 if (!rs || !sctx->framebuffer.zsbuf)
490 return;
491
492 offset_units = sctx->queued.named.rasterizer->offset_units;
493 switch (sctx->framebuffer.zsbuf->texture->format) {
494 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
495 case PIPE_FORMAT_X8Z24_UNORM:
496 case PIPE_FORMAT_Z24X8_UNORM:
497 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
498 offset_units *= 2.0f;
499 break;
500 case PIPE_FORMAT_Z32_FLOAT:
501 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
502 offset_units *= 1.0f;
503 break;
504 case PIPE_FORMAT_Z16_UNORM:
505 offset_units *= 4.0f;
506 break;
507 default:
508 return;
509 }
510
511 pm4 = si_pm4_alloc_state(sctx);
512
513 if (pm4 == NULL)
514 return;
515
516 /* FIXME some of those reg can be computed with cso */
517 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
518 fui(sctx->queued.named.rasterizer->offset_scale));
519 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units));
520 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
521 fui(sctx->queued.named.rasterizer->offset_scale));
522 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units));
523
524 si_pm4_set_state(sctx, fb_rs, pm4);
525 }
526
527 /*
528 * Rasterizer
529 */
530
531 static uint32_t si_translate_fill(uint32_t func)
532 {
533 switch(func) {
534 case PIPE_POLYGON_MODE_FILL:
535 return V_028814_X_DRAW_TRIANGLES;
536 case PIPE_POLYGON_MODE_LINE:
537 return V_028814_X_DRAW_LINES;
538 case PIPE_POLYGON_MODE_POINT:
539 return V_028814_X_DRAW_POINTS;
540 default:
541 assert(0);
542 return V_028814_X_DRAW_POINTS;
543 }
544 }
545
546 static void *si_create_rs_state(struct pipe_context *ctx,
547 const struct pipe_rasterizer_state *state)
548 {
549 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
550 struct si_pm4_state *pm4 = &rs->pm4;
551 unsigned tmp;
552 unsigned prov_vtx = 1, polygon_dual_mode;
553 float psize_min, psize_max;
554
555 if (rs == NULL) {
556 return NULL;
557 }
558
559 rs->two_side = state->light_twoside;
560 rs->multisample_enable = state->multisample;
561 rs->clip_plane_enable = state->clip_plane_enable;
562 rs->line_stipple_enable = state->line_stipple_enable;
563
564 polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL ||
565 state->fill_back != PIPE_POLYGON_MODE_FILL);
566
567 if (state->flatshade_first)
568 prov_vtx = 0;
569
570 rs->flatshade = state->flatshade;
571 rs->sprite_coord_enable = state->sprite_coord_enable;
572 rs->pa_sc_line_stipple = state->line_stipple_enable ?
573 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
574 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
575 rs->pa_su_sc_mode_cntl =
576 S_028814_PROVOKING_VTX_LAST(prov_vtx) |
577 S_028814_CULL_FRONT(state->rasterizer_discard || (state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
578 S_028814_CULL_BACK(state->rasterizer_discard || (state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
579 S_028814_FACE(!state->front_ccw) |
580 S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
581 S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
582 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) |
583 S_028814_POLY_MODE(polygon_dual_mode) |
584 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
585 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back));
586 rs->pa_cl_clip_cntl =
587 S_028810_PS_UCP_MODE(3) |
588 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
589 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
590 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
591 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
592
593 /* offset */
594 rs->offset_units = state->offset_units;
595 rs->offset_scale = state->offset_scale * 12.0f;
596
597 tmp = S_0286D4_FLAT_SHADE_ENA(1);
598 if (state->sprite_coord_enable) {
599 tmp |= S_0286D4_PNT_SPRITE_ENA(1) |
600 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
601 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
602 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
603 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1);
604 if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
605 tmp |= S_0286D4_PNT_SPRITE_TOP_1(1);
606 }
607 }
608 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, tmp);
609
610 /* point size 12.4 fixed point */
611 tmp = (unsigned)(state->point_size * 8.0);
612 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
613
614 if (state->point_size_per_vertex) {
615 psize_min = util_get_min_point_size(state);
616 psize_max = 8192;
617 } else {
618 /* Force the point size to be as if the vertex output was disabled. */
619 psize_min = state->point_size;
620 psize_max = state->point_size;
621 }
622 /* Divide by two, because 0.5 = 1 pixel. */
623 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,
624 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) |
625 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2)));
626
627 tmp = (unsigned)state->line_width * 8;
628 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp));
629 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0,
630 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
631 S_028A48_MSAA_ENABLE(state->multisample) |
632 S_028A48_VPORT_SCISSOR_ENABLE(state->scissor));
633
634 si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL,
635 S_028BE4_PIX_CENTER(state->half_pixel_center) |
636 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
637
638 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));
639
640 return rs;
641 }
642
643 static void si_bind_rs_state(struct pipe_context *ctx, void *state)
644 {
645 struct si_context *sctx = (struct si_context *)ctx;
646 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
647
648 if (state == NULL)
649 return;
650
651 // TODO
652 sctx->sprite_coord_enable = rs->sprite_coord_enable;
653 sctx->pa_sc_line_stipple = rs->pa_sc_line_stipple;
654 sctx->pa_su_sc_mode_cntl = rs->pa_su_sc_mode_cntl;
655
656 si_pm4_bind_state(sctx, rasterizer, rs);
657 si_update_fb_rs_state(sctx);
658 }
659
660 static void si_delete_rs_state(struct pipe_context *ctx, void *state)
661 {
662 struct si_context *sctx = (struct si_context *)ctx;
663 si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state);
664 }
665
666 /*
667 * infeered state between dsa and stencil ref
668 */
669 static void si_update_dsa_stencil_ref(struct si_context *sctx)
670 {
671 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
672 struct pipe_stencil_ref *ref = &sctx->stencil_ref;
673 struct si_state_dsa *dsa = sctx->queued.named.dsa;
674
675 if (pm4 == NULL)
676 return;
677
678 si_pm4_set_reg(pm4, R_028430_DB_STENCILREFMASK,
679 S_028430_STENCILTESTVAL(ref->ref_value[0]) |
680 S_028430_STENCILMASK(dsa->valuemask[0]) |
681 S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
682 S_028430_STENCILOPVAL(1));
683 si_pm4_set_reg(pm4, R_028434_DB_STENCILREFMASK_BF,
684 S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
685 S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
686 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
687 S_028434_STENCILOPVAL_BF(1));
688
689 si_pm4_set_state(sctx, dsa_stencil_ref, pm4);
690 }
691
692 static void si_set_pipe_stencil_ref(struct pipe_context *ctx,
693 const struct pipe_stencil_ref *state)
694 {
695 struct si_context *sctx = (struct si_context *)ctx;
696 sctx->stencil_ref = *state;
697 si_update_dsa_stencil_ref(sctx);
698 }
699
700
701 /*
702 * DSA
703 */
704
705 static uint32_t si_translate_stencil_op(int s_op)
706 {
707 switch (s_op) {
708 case PIPE_STENCIL_OP_KEEP:
709 return V_02842C_STENCIL_KEEP;
710 case PIPE_STENCIL_OP_ZERO:
711 return V_02842C_STENCIL_ZERO;
712 case PIPE_STENCIL_OP_REPLACE:
713 return V_02842C_STENCIL_REPLACE_TEST;
714 case PIPE_STENCIL_OP_INCR:
715 return V_02842C_STENCIL_ADD_CLAMP;
716 case PIPE_STENCIL_OP_DECR:
717 return V_02842C_STENCIL_SUB_CLAMP;
718 case PIPE_STENCIL_OP_INCR_WRAP:
719 return V_02842C_STENCIL_ADD_WRAP;
720 case PIPE_STENCIL_OP_DECR_WRAP:
721 return V_02842C_STENCIL_SUB_WRAP;
722 case PIPE_STENCIL_OP_INVERT:
723 return V_02842C_STENCIL_INVERT;
724 default:
725 R600_ERR("Unknown stencil op %d", s_op);
726 assert(0);
727 break;
728 }
729 return 0;
730 }
731
732 static void *si_create_dsa_state(struct pipe_context *ctx,
733 const struct pipe_depth_stencil_alpha_state *state)
734 {
735 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
736 struct si_pm4_state *pm4 = &dsa->pm4;
737 unsigned db_depth_control;
738 unsigned db_render_control;
739 uint32_t db_stencil_control = 0;
740
741 if (dsa == NULL) {
742 return NULL;
743 }
744
745 dsa->valuemask[0] = state->stencil[0].valuemask;
746 dsa->valuemask[1] = state->stencil[1].valuemask;
747 dsa->writemask[0] = state->stencil[0].writemask;
748 dsa->writemask[1] = state->stencil[1].writemask;
749
750 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
751 S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
752 S_028800_ZFUNC(state->depth.func);
753
754 /* stencil */
755 if (state->stencil[0].enabled) {
756 db_depth_control |= S_028800_STENCIL_ENABLE(1);
757 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func);
758 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op));
759 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op));
760 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op));
761
762 if (state->stencil[1].enabled) {
763 db_depth_control |= S_028800_BACKFACE_ENABLE(1);
764 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func);
765 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op));
766 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op));
767 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op));
768 }
769 }
770
771 /* alpha */
772 if (state->alpha.enabled) {
773 dsa->alpha_func = state->alpha.func;
774 dsa->alpha_ref = state->alpha.ref_value;
775
776 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
777 SI_SGPR_ALPHA_REF * 4, fui(dsa->alpha_ref));
778 } else {
779 dsa->alpha_func = PIPE_FUNC_ALWAYS;
780 }
781
782 /* misc */
783 db_render_control = 0;
784 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
785 si_pm4_set_reg(pm4, R_028000_DB_RENDER_CONTROL, db_render_control);
786 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
787
788 return dsa;
789 }
790
791 static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
792 {
793 struct si_context *sctx = (struct si_context *)ctx;
794 struct si_state_dsa *dsa = state;
795
796 if (state == NULL)
797 return;
798
799 si_pm4_bind_state(sctx, dsa, dsa);
800 si_update_dsa_stencil_ref(sctx);
801 }
802
803 static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
804 {
805 struct si_context *sctx = (struct si_context *)ctx;
806 si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
807 }
808
809 static void *si_create_db_flush_dsa(struct si_context *sctx, bool copy_depth,
810 bool copy_stencil, int sample)
811 {
812 struct pipe_depth_stencil_alpha_state dsa;
813 struct si_state_dsa *state;
814
815 memset(&dsa, 0, sizeof(dsa));
816
817 state = sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa);
818 if (copy_depth || copy_stencil) {
819 si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL,
820 S_028000_DEPTH_COPY(copy_depth) |
821 S_028000_STENCIL_COPY(copy_stencil) |
822 S_028000_COPY_CENTROID(1) |
823 S_028000_COPY_SAMPLE(sample));
824 } else {
825 si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL,
826 S_028000_DEPTH_COMPRESS_DISABLE(1) |
827 S_028000_STENCIL_COMPRESS_DISABLE(1));
828 }
829
830 return state;
831 }
832
833 /*
834 * format translation
835 */
836 static uint32_t si_translate_colorformat(enum pipe_format format)
837 {
838 const struct util_format_description *desc = util_format_description(format);
839
840 #define HAS_SIZE(x,y,z,w) \
841 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
842 desc->channel[2].size == (z) && desc->channel[3].size == (w))
843
844 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
845 return V_028C70_COLOR_10_11_11;
846
847 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
848 return V_028C70_COLOR_INVALID;
849
850 switch (desc->nr_channels) {
851 case 1:
852 switch (desc->channel[0].size) {
853 case 8:
854 return V_028C70_COLOR_8;
855 case 16:
856 return V_028C70_COLOR_16;
857 case 32:
858 return V_028C70_COLOR_32;
859 }
860 break;
861 case 2:
862 if (desc->channel[0].size == desc->channel[1].size) {
863 switch (desc->channel[0].size) {
864 case 8:
865 return V_028C70_COLOR_8_8;
866 case 16:
867 return V_028C70_COLOR_16_16;
868 case 32:
869 return V_028C70_COLOR_32_32;
870 }
871 } else if (HAS_SIZE(8,24,0,0)) {
872 return V_028C70_COLOR_24_8;
873 } else if (HAS_SIZE(24,8,0,0)) {
874 return V_028C70_COLOR_8_24;
875 }
876 break;
877 case 3:
878 if (HAS_SIZE(5,6,5,0)) {
879 return V_028C70_COLOR_5_6_5;
880 } else if (HAS_SIZE(32,8,24,0)) {
881 return V_028C70_COLOR_X24_8_32_FLOAT;
882 }
883 break;
884 case 4:
885 if (desc->channel[0].size == desc->channel[1].size &&
886 desc->channel[0].size == desc->channel[2].size &&
887 desc->channel[0].size == desc->channel[3].size) {
888 switch (desc->channel[0].size) {
889 case 4:
890 return V_028C70_COLOR_4_4_4_4;
891 case 8:
892 return V_028C70_COLOR_8_8_8_8;
893 case 16:
894 return V_028C70_COLOR_16_16_16_16;
895 case 32:
896 return V_028C70_COLOR_32_32_32_32;
897 }
898 } else if (HAS_SIZE(5,5,5,1)) {
899 return V_028C70_COLOR_1_5_5_5;
900 } else if (HAS_SIZE(10,10,10,2)) {
901 return V_028C70_COLOR_2_10_10_10;
902 }
903 break;
904 }
905 return V_028C70_COLOR_INVALID;
906 }
907
908 static uint32_t si_translate_colorswap(enum pipe_format format)
909 {
910 const struct util_format_description *desc = util_format_description(format);
911
912 #define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == UTIL_FORMAT_SWIZZLE_##swz)
913
914 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
915 return V_028C70_SWAP_STD;
916
917 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
918 return ~0;
919
920 switch (desc->nr_channels) {
921 case 1:
922 if (HAS_SWIZZLE(0,X))
923 return V_028C70_SWAP_STD; /* X___ */
924 else if (HAS_SWIZZLE(3,X))
925 return V_028C70_SWAP_ALT_REV; /* ___X */
926 break;
927 case 2:
928 if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) ||
929 (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) ||
930 (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y)))
931 return V_028C70_SWAP_STD; /* XY__ */
932 else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) ||
933 (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) ||
934 (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X)))
935 return V_028C70_SWAP_STD_REV; /* YX__ */
936 else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y))
937 return V_028C70_SWAP_ALT; /* X__Y */
938 break;
939 case 3:
940 if (HAS_SWIZZLE(0,X))
941 return V_028C70_SWAP_STD; /* XYZ */
942 else if (HAS_SWIZZLE(0,Z))
943 return V_028C70_SWAP_STD_REV; /* ZYX */
944 break;
945 case 4:
946 /* check the middle channels, the 1st and 4th channel can be NONE */
947 if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z))
948 return V_028C70_SWAP_STD; /* XYZW */
949 else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y))
950 return V_028C70_SWAP_STD_REV; /* WZYX */
951 else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X))
952 return V_028C70_SWAP_ALT; /* ZYXW */
953 else if (HAS_SWIZZLE(1,X) && HAS_SWIZZLE(2,Y))
954 return V_028C70_SWAP_ALT_REV; /* WXYZ */
955 break;
956 }
957 return ~0U;
958 }
959
960 static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
961 {
962 if (SI_BIG_ENDIAN) {
963 switch(colorformat) {
964 /* 8-bit buffers. */
965 case V_028C70_COLOR_8:
966 return V_028C70_ENDIAN_NONE;
967
968 /* 16-bit buffers. */
969 case V_028C70_COLOR_5_6_5:
970 case V_028C70_COLOR_1_5_5_5:
971 case V_028C70_COLOR_4_4_4_4:
972 case V_028C70_COLOR_16:
973 case V_028C70_COLOR_8_8:
974 return V_028C70_ENDIAN_8IN16;
975
976 /* 32-bit buffers. */
977 case V_028C70_COLOR_8_8_8_8:
978 case V_028C70_COLOR_2_10_10_10:
979 case V_028C70_COLOR_8_24:
980 case V_028C70_COLOR_24_8:
981 case V_028C70_COLOR_16_16:
982 return V_028C70_ENDIAN_8IN32;
983
984 /* 64-bit buffers. */
985 case V_028C70_COLOR_16_16_16_16:
986 return V_028C70_ENDIAN_8IN16;
987
988 case V_028C70_COLOR_32_32:
989 return V_028C70_ENDIAN_8IN32;
990
991 /* 128-bit buffers. */
992 case V_028C70_COLOR_32_32_32_32:
993 return V_028C70_ENDIAN_8IN32;
994 default:
995 return V_028C70_ENDIAN_NONE; /* Unsupported. */
996 }
997 } else {
998 return V_028C70_ENDIAN_NONE;
999 }
1000 }
1001
1002 /* Returns the size in bits of the widest component of a CB format */
1003 static unsigned si_colorformat_max_comp_size(uint32_t colorformat)
1004 {
1005 switch(colorformat) {
1006 case V_028C70_COLOR_4_4_4_4:
1007 return 4;
1008
1009 case V_028C70_COLOR_1_5_5_5:
1010 case V_028C70_COLOR_5_5_5_1:
1011 return 5;
1012
1013 case V_028C70_COLOR_5_6_5:
1014 return 6;
1015
1016 case V_028C70_COLOR_8:
1017 case V_028C70_COLOR_8_8:
1018 case V_028C70_COLOR_8_8_8_8:
1019 return 8;
1020
1021 case V_028C70_COLOR_10_10_10_2:
1022 case V_028C70_COLOR_2_10_10_10:
1023 return 10;
1024
1025 case V_028C70_COLOR_10_11_11:
1026 case V_028C70_COLOR_11_11_10:
1027 return 11;
1028
1029 case V_028C70_COLOR_16:
1030 case V_028C70_COLOR_16_16:
1031 case V_028C70_COLOR_16_16_16_16:
1032 return 16;
1033
1034 case V_028C70_COLOR_8_24:
1035 case V_028C70_COLOR_24_8:
1036 return 24;
1037
1038 case V_028C70_COLOR_32:
1039 case V_028C70_COLOR_32_32:
1040 case V_028C70_COLOR_32_32_32_32:
1041 case V_028C70_COLOR_X24_8_32_FLOAT:
1042 return 32;
1043 }
1044
1045 assert(!"Unknown maximum component size");
1046 return 0;
1047 }
1048
1049 static uint32_t si_translate_dbformat(enum pipe_format format)
1050 {
1051 switch (format) {
1052 case PIPE_FORMAT_Z16_UNORM:
1053 return V_028040_Z_16;
1054 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1055 case PIPE_FORMAT_X8Z24_UNORM:
1056 case PIPE_FORMAT_Z24X8_UNORM:
1057 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1058 return V_028040_Z_24; /* deprecated on SI */
1059 case PIPE_FORMAT_Z32_FLOAT:
1060 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1061 return V_028040_Z_32_FLOAT;
1062 default:
1063 return V_028040_Z_INVALID;
1064 }
1065 }
1066
1067 /*
1068 * Texture translation
1069 */
1070
1071 static uint32_t si_translate_texformat(struct pipe_screen *screen,
1072 enum pipe_format format,
1073 const struct util_format_description *desc,
1074 int first_non_void)
1075 {
1076 struct si_screen *sscreen = (struct si_screen*)screen;
1077 bool enable_s3tc = sscreen->b.info.drm_minor >= 31;
1078 boolean uniform = TRUE;
1079 int i;
1080
1081 /* Colorspace (return non-RGB formats directly). */
1082 switch (desc->colorspace) {
1083 /* Depth stencil formats */
1084 case UTIL_FORMAT_COLORSPACE_ZS:
1085 switch (format) {
1086 case PIPE_FORMAT_Z16_UNORM:
1087 return V_008F14_IMG_DATA_FORMAT_16;
1088 case PIPE_FORMAT_X24S8_UINT:
1089 case PIPE_FORMAT_Z24X8_UNORM:
1090 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1091 return V_008F14_IMG_DATA_FORMAT_8_24;
1092 case PIPE_FORMAT_X8Z24_UNORM:
1093 case PIPE_FORMAT_S8X24_UINT:
1094 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1095 return V_008F14_IMG_DATA_FORMAT_24_8;
1096 case PIPE_FORMAT_S8_UINT:
1097 return V_008F14_IMG_DATA_FORMAT_8;
1098 case PIPE_FORMAT_Z32_FLOAT:
1099 return V_008F14_IMG_DATA_FORMAT_32;
1100 case PIPE_FORMAT_X32_S8X24_UINT:
1101 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1102 return V_008F14_IMG_DATA_FORMAT_X24_8_32;
1103 default:
1104 goto out_unknown;
1105 }
1106
1107 case UTIL_FORMAT_COLORSPACE_YUV:
1108 goto out_unknown; /* TODO */
1109
1110 case UTIL_FORMAT_COLORSPACE_SRGB:
1111 if (desc->nr_channels != 4 && desc->nr_channels != 1)
1112 goto out_unknown;
1113 break;
1114
1115 default:
1116 break;
1117 }
1118
1119 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
1120 if (!enable_s3tc)
1121 goto out_unknown;
1122
1123 switch (format) {
1124 case PIPE_FORMAT_RGTC1_SNORM:
1125 case PIPE_FORMAT_LATC1_SNORM:
1126 case PIPE_FORMAT_RGTC1_UNORM:
1127 case PIPE_FORMAT_LATC1_UNORM:
1128 return V_008F14_IMG_DATA_FORMAT_BC4;
1129 case PIPE_FORMAT_RGTC2_SNORM:
1130 case PIPE_FORMAT_LATC2_SNORM:
1131 case PIPE_FORMAT_RGTC2_UNORM:
1132 case PIPE_FORMAT_LATC2_UNORM:
1133 return V_008F14_IMG_DATA_FORMAT_BC5;
1134 default:
1135 goto out_unknown;
1136 }
1137 }
1138
1139 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1140
1141 if (!enable_s3tc)
1142 goto out_unknown;
1143
1144 if (!util_format_s3tc_enabled) {
1145 goto out_unknown;
1146 }
1147
1148 switch (format) {
1149 case PIPE_FORMAT_DXT1_RGB:
1150 case PIPE_FORMAT_DXT1_RGBA:
1151 case PIPE_FORMAT_DXT1_SRGB:
1152 case PIPE_FORMAT_DXT1_SRGBA:
1153 return V_008F14_IMG_DATA_FORMAT_BC1;
1154 case PIPE_FORMAT_DXT3_RGBA:
1155 case PIPE_FORMAT_DXT3_SRGBA:
1156 return V_008F14_IMG_DATA_FORMAT_BC2;
1157 case PIPE_FORMAT_DXT5_RGBA:
1158 case PIPE_FORMAT_DXT5_SRGBA:
1159 return V_008F14_IMG_DATA_FORMAT_BC3;
1160 default:
1161 goto out_unknown;
1162 }
1163 }
1164
1165 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
1166 return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
1167 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
1168 return V_008F14_IMG_DATA_FORMAT_10_11_11;
1169 }
1170
1171 /* R8G8Bx_SNORM - TODO CxV8U8 */
1172
1173 /* See whether the components are of the same size. */
1174 for (i = 1; i < desc->nr_channels; i++) {
1175 uniform = uniform && desc->channel[0].size == desc->channel[i].size;
1176 }
1177
1178 /* Non-uniform formats. */
1179 if (!uniform) {
1180 switch(desc->nr_channels) {
1181 case 3:
1182 if (desc->channel[0].size == 5 &&
1183 desc->channel[1].size == 6 &&
1184 desc->channel[2].size == 5) {
1185 return V_008F14_IMG_DATA_FORMAT_5_6_5;
1186 }
1187 goto out_unknown;
1188 case 4:
1189 if (desc->channel[0].size == 5 &&
1190 desc->channel[1].size == 5 &&
1191 desc->channel[2].size == 5 &&
1192 desc->channel[3].size == 1) {
1193 return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
1194 }
1195 if (desc->channel[0].size == 10 &&
1196 desc->channel[1].size == 10 &&
1197 desc->channel[2].size == 10 &&
1198 desc->channel[3].size == 2) {
1199 return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
1200 }
1201 goto out_unknown;
1202 }
1203 goto out_unknown;
1204 }
1205
1206 if (first_non_void < 0 || first_non_void > 3)
1207 goto out_unknown;
1208
1209 /* uniform formats */
1210 switch (desc->channel[first_non_void].size) {
1211 case 4:
1212 switch (desc->nr_channels) {
1213 #if 0 /* Not supported for render targets */
1214 case 2:
1215 return V_008F14_IMG_DATA_FORMAT_4_4;
1216 #endif
1217 case 4:
1218 return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
1219 }
1220 break;
1221 case 8:
1222 switch (desc->nr_channels) {
1223 case 1:
1224 return V_008F14_IMG_DATA_FORMAT_8;
1225 case 2:
1226 return V_008F14_IMG_DATA_FORMAT_8_8;
1227 case 4:
1228 return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
1229 }
1230 break;
1231 case 16:
1232 switch (desc->nr_channels) {
1233 case 1:
1234 return V_008F14_IMG_DATA_FORMAT_16;
1235 case 2:
1236 return V_008F14_IMG_DATA_FORMAT_16_16;
1237 case 4:
1238 return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
1239 }
1240 break;
1241 case 32:
1242 switch (desc->nr_channels) {
1243 case 1:
1244 return V_008F14_IMG_DATA_FORMAT_32;
1245 case 2:
1246 return V_008F14_IMG_DATA_FORMAT_32_32;
1247 #if 0 /* Not supported for render targets */
1248 case 3:
1249 return V_008F14_IMG_DATA_FORMAT_32_32_32;
1250 #endif
1251 case 4:
1252 return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
1253 }
1254 }
1255
1256 out_unknown:
1257 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */
1258 return ~0;
1259 }
1260
1261 static unsigned si_tex_wrap(unsigned wrap)
1262 {
1263 switch (wrap) {
1264 default:
1265 case PIPE_TEX_WRAP_REPEAT:
1266 return V_008F30_SQ_TEX_WRAP;
1267 case PIPE_TEX_WRAP_CLAMP:
1268 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER;
1269 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1270 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
1271 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1272 return V_008F30_SQ_TEX_CLAMP_BORDER;
1273 case PIPE_TEX_WRAP_MIRROR_REPEAT:
1274 return V_008F30_SQ_TEX_MIRROR;
1275 case PIPE_TEX_WRAP_MIRROR_CLAMP:
1276 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER;
1277 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1278 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
1279 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1280 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER;
1281 }
1282 }
1283
1284 static unsigned si_tex_filter(unsigned filter)
1285 {
1286 switch (filter) {
1287 default:
1288 case PIPE_TEX_FILTER_NEAREST:
1289 return V_008F38_SQ_TEX_XY_FILTER_POINT;
1290 case PIPE_TEX_FILTER_LINEAR:
1291 return V_008F38_SQ_TEX_XY_FILTER_BILINEAR;
1292 }
1293 }
1294
1295 static unsigned si_tex_mipfilter(unsigned filter)
1296 {
1297 switch (filter) {
1298 case PIPE_TEX_MIPFILTER_NEAREST:
1299 return V_008F38_SQ_TEX_Z_FILTER_POINT;
1300 case PIPE_TEX_MIPFILTER_LINEAR:
1301 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
1302 default:
1303 case PIPE_TEX_MIPFILTER_NONE:
1304 return V_008F38_SQ_TEX_Z_FILTER_NONE;
1305 }
1306 }
1307
1308 static unsigned si_tex_compare(unsigned compare)
1309 {
1310 switch (compare) {
1311 default:
1312 case PIPE_FUNC_NEVER:
1313 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
1314 case PIPE_FUNC_LESS:
1315 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
1316 case PIPE_FUNC_EQUAL:
1317 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
1318 case PIPE_FUNC_LEQUAL:
1319 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
1320 case PIPE_FUNC_GREATER:
1321 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
1322 case PIPE_FUNC_NOTEQUAL:
1323 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
1324 case PIPE_FUNC_GEQUAL:
1325 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
1326 case PIPE_FUNC_ALWAYS:
1327 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
1328 }
1329 }
1330
1331 static unsigned si_tex_dim(unsigned dim, unsigned nr_samples)
1332 {
1333 switch (dim) {
1334 default:
1335 case PIPE_TEXTURE_1D:
1336 return V_008F1C_SQ_RSRC_IMG_1D;
1337 case PIPE_TEXTURE_1D_ARRAY:
1338 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;
1339 case PIPE_TEXTURE_2D:
1340 case PIPE_TEXTURE_RECT:
1341 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA :
1342 V_008F1C_SQ_RSRC_IMG_2D;
1343 case PIPE_TEXTURE_2D_ARRAY:
1344 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY :
1345 V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
1346 case PIPE_TEXTURE_3D:
1347 return V_008F1C_SQ_RSRC_IMG_3D;
1348 case PIPE_TEXTURE_CUBE:
1349 return V_008F1C_SQ_RSRC_IMG_CUBE;
1350 }
1351 }
1352
1353 /*
1354 * Format support testing
1355 */
1356
1357 static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
1358 {
1359 return si_translate_texformat(screen, format, util_format_description(format),
1360 util_format_get_first_non_void_channel(format)) != ~0U;
1361 }
1362
1363 static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
1364 const struct util_format_description *desc,
1365 int first_non_void)
1366 {
1367 unsigned type = desc->channel[first_non_void].type;
1368 int i;
1369
1370 if (type == UTIL_FORMAT_TYPE_FIXED)
1371 return V_008F0C_BUF_DATA_FORMAT_INVALID;
1372
1373 if (desc->nr_channels == 4 &&
1374 desc->channel[0].size == 10 &&
1375 desc->channel[1].size == 10 &&
1376 desc->channel[2].size == 10 &&
1377 desc->channel[3].size == 2)
1378 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
1379
1380 /* See whether the components are of the same size. */
1381 for (i = 0; i < desc->nr_channels; i++) {
1382 if (desc->channel[first_non_void].size != desc->channel[i].size)
1383 return V_008F0C_BUF_DATA_FORMAT_INVALID;
1384 }
1385
1386 switch (desc->channel[first_non_void].size) {
1387 case 8:
1388 switch (desc->nr_channels) {
1389 case 1:
1390 return V_008F0C_BUF_DATA_FORMAT_8;
1391 case 2:
1392 return V_008F0C_BUF_DATA_FORMAT_8_8;
1393 case 3:
1394 case 4:
1395 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
1396 }
1397 break;
1398 case 16:
1399 switch (desc->nr_channels) {
1400 case 1:
1401 return V_008F0C_BUF_DATA_FORMAT_16;
1402 case 2:
1403 return V_008F0C_BUF_DATA_FORMAT_16_16;
1404 case 3:
1405 case 4:
1406 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
1407 }
1408 break;
1409 case 32:
1410 /* From the Southern Islands ISA documentation about MTBUF:
1411 * 'Memory reads of data in memory that is 32 or 64 bits do not
1412 * undergo any format conversion.'
1413 */
1414 if (type != UTIL_FORMAT_TYPE_FLOAT &&
1415 !desc->channel[first_non_void].pure_integer)
1416 return V_008F0C_BUF_DATA_FORMAT_INVALID;
1417
1418 switch (desc->nr_channels) {
1419 case 1:
1420 return V_008F0C_BUF_DATA_FORMAT_32;
1421 case 2:
1422 return V_008F0C_BUF_DATA_FORMAT_32_32;
1423 case 3:
1424 return V_008F0C_BUF_DATA_FORMAT_32_32_32;
1425 case 4:
1426 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
1427 }
1428 break;
1429 }
1430
1431 return V_008F0C_BUF_DATA_FORMAT_INVALID;
1432 }
1433
1434 static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen,
1435 const struct util_format_description *desc,
1436 int first_non_void)
1437 {
1438 switch (desc->channel[first_non_void].type) {
1439 case UTIL_FORMAT_TYPE_SIGNED:
1440 if (desc->channel[first_non_void].normalized)
1441 return V_008F0C_BUF_NUM_FORMAT_SNORM;
1442 else if (desc->channel[first_non_void].pure_integer)
1443 return V_008F0C_BUF_NUM_FORMAT_SINT;
1444 else
1445 return V_008F0C_BUF_NUM_FORMAT_SSCALED;
1446 break;
1447 case UTIL_FORMAT_TYPE_UNSIGNED:
1448 if (desc->channel[first_non_void].normalized)
1449 return V_008F0C_BUF_NUM_FORMAT_UNORM;
1450 else if (desc->channel[first_non_void].pure_integer)
1451 return V_008F0C_BUF_NUM_FORMAT_UINT;
1452 else
1453 return V_008F0C_BUF_NUM_FORMAT_USCALED;
1454 break;
1455 case UTIL_FORMAT_TYPE_FLOAT:
1456 default:
1457 return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1458 }
1459 }
1460
1461 static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format)
1462 {
1463 const struct util_format_description *desc;
1464 int first_non_void;
1465 unsigned data_format;
1466
1467 desc = util_format_description(format);
1468 first_non_void = util_format_get_first_non_void_channel(format);
1469 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void);
1470 return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID;
1471 }
1472
1473 static bool si_is_colorbuffer_format_supported(enum pipe_format format)
1474 {
1475 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
1476 si_translate_colorswap(format) != ~0U;
1477 }
1478
1479 static bool si_is_zs_format_supported(enum pipe_format format)
1480 {
1481 return si_translate_dbformat(format) != V_028040_Z_INVALID;
1482 }
1483
1484 boolean si_is_format_supported(struct pipe_screen *screen,
1485 enum pipe_format format,
1486 enum pipe_texture_target target,
1487 unsigned sample_count,
1488 unsigned usage)
1489 {
1490 struct si_screen *sscreen = (struct si_screen *)screen;
1491 unsigned retval = 0;
1492
1493 if (target >= PIPE_MAX_TEXTURE_TYPES) {
1494 R600_ERR("r600: unsupported texture type %d\n", target);
1495 return FALSE;
1496 }
1497
1498 if (!util_format_is_supported(format, usage))
1499 return FALSE;
1500
1501 if (sample_count > 1) {
1502 if (HAVE_LLVM < 0x0304)
1503 return FALSE;
1504
1505 /* 2D tiling on CIK is supported since DRM 2.35.0 */
1506 if (sscreen->b.chip_class >= CIK && sscreen->b.info.drm_minor < 35)
1507 return FALSE;
1508
1509 switch (sample_count) {
1510 case 2:
1511 case 4:
1512 case 8:
1513 break;
1514 default:
1515 return FALSE;
1516 }
1517 }
1518
1519 if (usage & PIPE_BIND_SAMPLER_VIEW) {
1520 if (target == PIPE_BUFFER) {
1521 if (si_is_vertex_format_supported(screen, format))
1522 retval |= PIPE_BIND_SAMPLER_VIEW;
1523 } else {
1524 if (si_is_sampler_format_supported(screen, format))
1525 retval |= PIPE_BIND_SAMPLER_VIEW;
1526 }
1527 }
1528
1529 if ((usage & (PIPE_BIND_RENDER_TARGET |
1530 PIPE_BIND_DISPLAY_TARGET |
1531 PIPE_BIND_SCANOUT |
1532 PIPE_BIND_SHARED)) &&
1533 si_is_colorbuffer_format_supported(format)) {
1534 retval |= usage &
1535 (PIPE_BIND_RENDER_TARGET |
1536 PIPE_BIND_DISPLAY_TARGET |
1537 PIPE_BIND_SCANOUT |
1538 PIPE_BIND_SHARED);
1539 }
1540
1541 if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
1542 si_is_zs_format_supported(format)) {
1543 retval |= PIPE_BIND_DEPTH_STENCIL;
1544 }
1545
1546 if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
1547 si_is_vertex_format_supported(screen, format)) {
1548 retval |= PIPE_BIND_VERTEX_BUFFER;
1549 }
1550
1551 if (usage & PIPE_BIND_TRANSFER_READ)
1552 retval |= PIPE_BIND_TRANSFER_READ;
1553 if (usage & PIPE_BIND_TRANSFER_WRITE)
1554 retval |= PIPE_BIND_TRANSFER_WRITE;
1555
1556 return retval == usage;
1557 }
1558
1559 static unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil)
1560 {
1561 unsigned tile_mode_index = 0;
1562
1563 if (stencil) {
1564 tile_mode_index = rtex->surface.stencil_tiling_index[level];
1565 } else {
1566 tile_mode_index = rtex->surface.tiling_index[level];
1567 }
1568 return tile_mode_index;
1569 }
1570
1571 /*
1572 * framebuffer handling
1573 */
1574
1575 static void si_initialize_color_surface(struct si_context *sctx,
1576 struct r600_surface *surf)
1577 {
1578 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
1579 unsigned level = surf->base.u.tex.level;
1580 uint64_t offset = rtex->surface.level[level].offset;
1581 unsigned pitch, slice;
1582 unsigned color_info, color_attrib, color_pitch, color_view;
1583 unsigned tile_mode_index;
1584 unsigned format, swap, ntype, endian;
1585 const struct util_format_description *desc;
1586 int i;
1587 unsigned blend_clamp = 0, blend_bypass = 0;
1588 unsigned max_comp_size;
1589
1590 /* Layered rendering doesn't work with LINEAR_GENERAL.
1591 * (LINEAR_ALIGNED and others work) */
1592 if (rtex->surface.level[level].mode == RADEON_SURF_MODE_LINEAR) {
1593 assert(surf->base.u.tex.first_layer == surf->base.u.tex.last_layer);
1594 offset += rtex->surface.level[level].slice_size *
1595 surf->base.u.tex.first_layer;
1596 color_view = 0;
1597 } else {
1598 color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
1599 S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer);
1600 }
1601
1602 pitch = (rtex->surface.level[level].nblk_x) / 8 - 1;
1603 slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64;
1604 if (slice) {
1605 slice = slice - 1;
1606 }
1607
1608 tile_mode_index = si_tile_mode_index(rtex, level, false);
1609
1610 desc = util_format_description(surf->base.format);
1611 for (i = 0; i < 4; i++) {
1612 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
1613 break;
1614 }
1615 }
1616 if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
1617 ntype = V_028C70_NUMBER_FLOAT;
1618 } else {
1619 ntype = V_028C70_NUMBER_UNORM;
1620 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
1621 ntype = V_028C70_NUMBER_SRGB;
1622 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
1623 if (desc->channel[i].pure_integer) {
1624 ntype = V_028C70_NUMBER_SINT;
1625 } else {
1626 assert(desc->channel[i].normalized);
1627 ntype = V_028C70_NUMBER_SNORM;
1628 }
1629 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1630 if (desc->channel[i].pure_integer) {
1631 ntype = V_028C70_NUMBER_UINT;
1632 } else {
1633 assert(desc->channel[i].normalized);
1634 ntype = V_028C70_NUMBER_UNORM;
1635 }
1636 }
1637 }
1638
1639 format = si_translate_colorformat(surf->base.format);
1640 if (format == V_028C70_COLOR_INVALID) {
1641 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
1642 }
1643 assert(format != V_028C70_COLOR_INVALID);
1644 swap = si_translate_colorswap(surf->base.format);
1645 if (rtex->resource.b.b.usage == PIPE_USAGE_STAGING) {
1646 endian = V_028C70_ENDIAN_NONE;
1647 } else {
1648 endian = si_colorformat_endian_swap(format);
1649 }
1650
1651 /* blend clamp should be set for all NORM/SRGB types */
1652 if (ntype == V_028C70_NUMBER_UNORM ||
1653 ntype == V_028C70_NUMBER_SNORM ||
1654 ntype == V_028C70_NUMBER_SRGB)
1655 blend_clamp = 1;
1656
1657 /* set blend bypass according to docs if SINT/UINT or
1658 8/24 COLOR variants */
1659 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
1660 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
1661 format == V_028C70_COLOR_X24_8_32_FLOAT) {
1662 blend_clamp = 0;
1663 blend_bypass = 1;
1664 }
1665
1666 color_info = S_028C70_FORMAT(format) |
1667 S_028C70_COMP_SWAP(swap) |
1668 S_028C70_BLEND_CLAMP(blend_clamp) |
1669 S_028C70_BLEND_BYPASS(blend_bypass) |
1670 S_028C70_NUMBER_TYPE(ntype) |
1671 S_028C70_ENDIAN(endian);
1672
1673 color_pitch = S_028C64_TILE_MAX(pitch);
1674
1675 color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) |
1676 S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1);
1677
1678 if (rtex->resource.b.b.nr_samples > 1) {
1679 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
1680
1681 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
1682 S_028C74_NUM_FRAGMENTS(log_samples);
1683
1684 if (rtex->fmask.size) {
1685 color_info |= S_028C70_COMPRESSION(1);
1686 unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height);
1687
1688 color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index);
1689
1690 if (sctx->b.chip_class == SI) {
1691 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */
1692 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
1693 }
1694 if (sctx->b.chip_class >= CIK) {
1695 color_pitch |= S_028C64_FMASK_TILE_MAX(rtex->fmask.pitch / 8 - 1);
1696 }
1697 }
1698 }
1699
1700 if (rtex->cmask.size) {
1701 color_info |= S_028C70_FAST_CLEAR(1);
1702 }
1703
1704 offset += r600_resource_va(sctx->b.b.screen, surf->base.texture);
1705
1706 surf->cb_color_base = offset >> 8;
1707 surf->cb_color_pitch = color_pitch;
1708 surf->cb_color_slice = S_028C68_TILE_MAX(slice);
1709 surf->cb_color_view = color_view;
1710 surf->cb_color_info = color_info;
1711 surf->cb_color_attrib = color_attrib;
1712
1713 if (rtex->cmask.size) {
1714 surf->cb_color_cmask = (offset + rtex->cmask.offset) >> 8;
1715 surf->cb_color_cmask_slice = S_028C80_TILE_MAX(rtex->cmask.slice_tile_max);
1716 }
1717 if (rtex->fmask.size) {
1718 surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8;
1719 surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
1720 }
1721
1722 /* Determine pixel shader export format */
1723 max_comp_size = si_colorformat_max_comp_size(format);
1724 if (ntype == V_028C70_NUMBER_SRGB ||
1725 ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
1726 max_comp_size <= 10) ||
1727 (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) {
1728 surf->export_16bpc = true;
1729 }
1730
1731 surf->color_initialized = true;
1732 }
1733
1734 static void si_db(struct si_context *sctx, struct si_pm4_state *pm4,
1735 const struct pipe_framebuffer_state *state)
1736 {
1737 struct si_screen *sscreen = sctx->screen;
1738 struct r600_texture *rtex;
1739 struct r600_surface *surf;
1740 unsigned level, pitch, slice, format, tile_mode_index, array_mode;
1741 unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config;
1742 uint32_t z_info, s_info, db_depth_info;
1743 uint64_t z_offs, s_offs;
1744 uint32_t db_htile_data_base, db_htile_surface, pa_su_poly_offset_db_fmt_cntl;
1745
1746 if (state->zsbuf == NULL) {
1747 si_pm4_set_reg(pm4, R_028040_DB_Z_INFO, S_028040_FORMAT(V_028040_Z_INVALID));
1748 si_pm4_set_reg(pm4, R_028044_DB_STENCIL_INFO, S_028044_FORMAT(V_028044_STENCIL_INVALID));
1749 return;
1750 }
1751
1752 surf = (struct r600_surface *)state->zsbuf;
1753 level = surf->base.u.tex.level;
1754 rtex = (struct r600_texture*)surf->base.texture;
1755
1756 format = si_translate_dbformat(rtex->resource.b.b.format);
1757
1758 switch (sctx->framebuffer.zsbuf->texture->format) {
1759 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1760 case PIPE_FORMAT_X8Z24_UNORM:
1761 case PIPE_FORMAT_Z24X8_UNORM:
1762 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1763 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
1764 break;
1765 case PIPE_FORMAT_Z32_FLOAT:
1766 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1767 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
1768 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
1769 break;
1770 case PIPE_FORMAT_Z16_UNORM:
1771 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
1772 break;
1773 default:
1774 assert(0);
1775 }
1776
1777 if (format == V_028040_Z_INVALID) {
1778 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
1779 }
1780 assert(format != V_028040_Z_INVALID);
1781
1782 s_offs = z_offs = r600_resource_va(sctx->b.b.screen, surf->base.texture);
1783 z_offs += rtex->surface.level[level].offset;
1784 s_offs += rtex->surface.stencil_level[level].offset;
1785
1786 z_offs >>= 8;
1787 s_offs >>= 8;
1788
1789 pitch = (rtex->surface.level[level].nblk_x / 8) - 1;
1790 slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64;
1791 if (slice) {
1792 slice = slice - 1;
1793 }
1794
1795 db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
1796
1797 z_info = S_028040_FORMAT(format);
1798 if (rtex->resource.b.b.nr_samples > 1) {
1799 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
1800 }
1801
1802 if (rtex->surface.flags & RADEON_SURF_SBUFFER)
1803 s_info = S_028044_FORMAT(V_028044_STENCIL_8);
1804 else
1805 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
1806
1807 if (sctx->b.chip_class >= CIK) {
1808 switch (rtex->surface.level[level].mode) {
1809 case RADEON_SURF_MODE_2D:
1810 array_mode = V_02803C_ARRAY_2D_TILED_THIN1;
1811 break;
1812 case RADEON_SURF_MODE_1D:
1813 case RADEON_SURF_MODE_LINEAR_ALIGNED:
1814 case RADEON_SURF_MODE_LINEAR:
1815 default:
1816 array_mode = V_02803C_ARRAY_1D_TILED_THIN1;
1817 break;
1818 }
1819 tile_split = rtex->surface.tile_split;
1820 stile_split = rtex->surface.stencil_tile_split;
1821 macro_aspect = rtex->surface.mtilea;
1822 bankw = rtex->surface.bankw;
1823 bankh = rtex->surface.bankh;
1824 tile_split = cik_tile_split(tile_split);
1825 stile_split = cik_tile_split(stile_split);
1826 macro_aspect = cik_macro_tile_aspect(macro_aspect);
1827 bankw = cik_bank_wh(bankw);
1828 bankh = cik_bank_wh(bankh);
1829 nbanks = cik_num_banks(sscreen, rtex->surface.bpe, rtex->surface.tile_split);
1830 tile_mode_index = si_tile_mode_index(rtex, level, false);
1831 pipe_config = cik_db_pipe_config(sscreen, tile_mode_index);
1832
1833 db_depth_info |= S_02803C_ARRAY_MODE(array_mode) |
1834 S_02803C_PIPE_CONFIG(pipe_config) |
1835 S_02803C_BANK_WIDTH(bankw) |
1836 S_02803C_BANK_HEIGHT(bankh) |
1837 S_02803C_MACRO_TILE_ASPECT(macro_aspect) |
1838 S_02803C_NUM_BANKS(nbanks);
1839 z_info |= S_028040_TILE_SPLIT(tile_split);
1840 s_info |= S_028044_TILE_SPLIT(stile_split);
1841 } else {
1842 tile_mode_index = si_tile_mode_index(rtex, level, false);
1843 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
1844 tile_mode_index = si_tile_mode_index(rtex, level, true);
1845 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
1846 }
1847
1848 /* HiZ aka depth buffer htile */
1849 /* use htile only for first level */
1850 if (rtex->htile_buffer && !level) {
1851 const struct util_format_description *fmt_desc;
1852
1853 z_info |= S_028040_TILE_SURFACE_ENABLE(1);
1854
1855 /* This is optimal for the clear value of 1.0 and using
1856 * the LESS and LEQUAL test functions. Set this to 0
1857 * for the opposite case. This can only be changed when
1858 * clearing. */
1859 z_info |= S_028040_ZRANGE_PRECISION(1);
1860
1861 fmt_desc = util_format_description(rtex->resource.b.b.format);
1862 if (!util_format_has_stencil(fmt_desc)) {
1863 /* Use all of the htile_buffer for depth */
1864 s_info |= S_028044_TILE_STENCIL_DISABLE(1);
1865 }
1866
1867 uint64_t va = r600_resource_va(&sctx->screen->b.b, &rtex->htile_buffer->b.b);
1868 db_htile_data_base = va >> 8;
1869 db_htile_surface = S_028ABC_FULL_CACHE(1);
1870
1871 si_pm4_add_bo(pm4, rtex->htile_buffer, RADEON_USAGE_READWRITE);
1872 } else {
1873 db_htile_data_base = 0;
1874 db_htile_surface = 0;
1875 }
1876
1877 si_pm4_set_reg(pm4, R_028008_DB_DEPTH_VIEW,
1878 S_028008_SLICE_START(state->zsbuf->u.tex.first_layer) |
1879 S_028008_SLICE_MAX(state->zsbuf->u.tex.last_layer));
1880 si_pm4_set_reg(pm4, R_028014_DB_HTILE_DATA_BASE, db_htile_data_base);
1881
1882 si_pm4_set_reg(pm4, R_02803C_DB_DEPTH_INFO, db_depth_info);
1883 si_pm4_set_reg(pm4, R_028040_DB_Z_INFO, z_info);
1884 si_pm4_set_reg(pm4, R_028044_DB_STENCIL_INFO, s_info);
1885
1886 si_pm4_add_bo(pm4, &rtex->resource, RADEON_USAGE_READWRITE);
1887 si_pm4_set_reg(pm4, R_028048_DB_Z_READ_BASE, z_offs);
1888 si_pm4_set_reg(pm4, R_02804C_DB_STENCIL_READ_BASE, s_offs);
1889 si_pm4_set_reg(pm4, R_028050_DB_Z_WRITE_BASE, z_offs);
1890 si_pm4_set_reg(pm4, R_028054_DB_STENCIL_WRITE_BASE, s_offs);
1891
1892 si_pm4_set_reg(pm4, R_028058_DB_DEPTH_SIZE, S_028058_PITCH_TILE_MAX(pitch));
1893 si_pm4_set_reg(pm4, R_02805C_DB_DEPTH_SLICE, S_02805C_SLICE_TILE_MAX(slice));
1894
1895 si_pm4_set_reg(pm4, R_028ABC_DB_HTILE_SURFACE, db_htile_surface);
1896 si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, pa_su_poly_offset_db_fmt_cntl);
1897 }
1898
1899 #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \
1900 (((s0x) & 0xf) | (((s0y) & 0xf) << 4) | \
1901 (((s1x) & 0xf) << 8) | (((s1y) & 0xf) << 12) | \
1902 (((s2x) & 0xf) << 16) | (((s2y) & 0xf) << 20) | \
1903 (((s3x) & 0xf) << 24) | (((s3y) & 0xf) << 28))
1904
1905 /* 2xMSAA
1906 * There are two locations (-4, 4), (4, -4). */
1907 static uint32_t sample_locs_2x[] = {
1908 FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
1909 FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
1910 FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
1911 FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
1912 };
1913 static unsigned max_dist_2x = 4;
1914 /* 4xMSAA
1915 * There are 4 locations: (-2, -2), (2, 2), (-6, 6), (6, -6). */
1916 static uint32_t sample_locs_4x[] = {
1917 FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
1918 FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
1919 FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
1920 FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
1921 };
1922 static unsigned max_dist_4x = 6;
1923 /* Cayman/SI 8xMSAA */
1924 static uint32_t cm_sample_locs_8x[] = {
1925 FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
1926 FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
1927 FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
1928 FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
1929 FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
1930 FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
1931 FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
1932 FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
1933 };
1934 static unsigned cm_max_dist_8x = 8;
1935 /* Cayman/SI 16xMSAA */
1936 static uint32_t cm_sample_locs_16x[] = {
1937 FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
1938 FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
1939 FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
1940 FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
1941 FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
1942 FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
1943 FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
1944 FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
1945 FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
1946 FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
1947 FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
1948 FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
1949 FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
1950 FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
1951 FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
1952 FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
1953 };
1954 static unsigned cm_max_dist_16x = 8;
1955
1956 static void si_get_sample_position(struct pipe_context *ctx,
1957 unsigned sample_count,
1958 unsigned sample_index,
1959 float *out_value)
1960 {
1961 int offset, index;
1962 struct {
1963 int idx:4;
1964 } val;
1965 switch (sample_count) {
1966 case 1:
1967 default:
1968 out_value[0] = out_value[1] = 0.5;
1969 break;
1970 case 2:
1971 offset = 4 * (sample_index * 2);
1972 val.idx = (sample_locs_2x[0] >> offset) & 0xf;
1973 out_value[0] = (float)(val.idx + 8) / 16.0f;
1974 val.idx = (sample_locs_2x[0] >> (offset + 4)) & 0xf;
1975 out_value[1] = (float)(val.idx + 8) / 16.0f;
1976 break;
1977 case 4:
1978 offset = 4 * (sample_index * 2);
1979 val.idx = (sample_locs_4x[0] >> offset) & 0xf;
1980 out_value[0] = (float)(val.idx + 8) / 16.0f;
1981 val.idx = (sample_locs_4x[0] >> (offset + 4)) & 0xf;
1982 out_value[1] = (float)(val.idx + 8) / 16.0f;
1983 break;
1984 case 8:
1985 offset = 4 * (sample_index % 4 * 2);
1986 index = (sample_index / 4) * 4;
1987 val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
1988 out_value[0] = (float)(val.idx + 8) / 16.0f;
1989 val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
1990 out_value[1] = (float)(val.idx + 8) / 16.0f;
1991 break;
1992 case 16:
1993 offset = 4 * (sample_index % 4 * 2);
1994 index = (sample_index / 4) * 4;
1995 val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
1996 out_value[0] = (float)(val.idx + 8) / 16.0f;
1997 val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
1998 out_value[1] = (float)(val.idx + 8) / 16.0f;
1999 break;
2000 }
2001 }
2002
2003 static void si_set_msaa_state(struct si_context *sctx, struct si_pm4_state *pm4, int nr_samples)
2004 {
2005 unsigned max_dist = 0;
2006
2007 switch (nr_samples) {
2008 default:
2009 nr_samples = 0;
2010 break;
2011 case 2:
2012 si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_2x[0]);
2013 si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_2x[1]);
2014 si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_2x[2]);
2015 si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_2x[3]);
2016 max_dist = max_dist_2x;
2017 break;
2018 case 4:
2019 si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_4x[0]);
2020 si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_4x[1]);
2021 si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_4x[2]);
2022 si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_4x[3]);
2023 max_dist = max_dist_4x;
2024 break;
2025 case 8:
2026 si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, cm_sample_locs_8x[0]);
2027 si_pm4_set_reg(pm4, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, cm_sample_locs_8x[4]);
2028 si_pm4_set_reg(pm4, R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2, 0);
2029 si_pm4_set_reg(pm4, R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3, 0);
2030 si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, cm_sample_locs_8x[1]);
2031 si_pm4_set_reg(pm4, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, cm_sample_locs_8x[5]);
2032 si_pm4_set_reg(pm4, R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2, 0);
2033 si_pm4_set_reg(pm4, R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3, 0);
2034 si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, cm_sample_locs_8x[2]);
2035 si_pm4_set_reg(pm4, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, cm_sample_locs_8x[6]);
2036 si_pm4_set_reg(pm4, R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2, 0);
2037 si_pm4_set_reg(pm4, R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3, 0);
2038 si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, cm_sample_locs_8x[3]);
2039 si_pm4_set_reg(pm4, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, cm_sample_locs_8x[7]);
2040 max_dist = cm_max_dist_8x;
2041 break;
2042 case 16:
2043 si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, cm_sample_locs_16x[0]);
2044 si_pm4_set_reg(pm4, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, cm_sample_locs_16x[4]);
2045 si_pm4_set_reg(pm4, R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2, cm_sample_locs_16x[8]);
2046 si_pm4_set_reg(pm4, R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3, cm_sample_locs_16x[12]);
2047 si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, cm_sample_locs_16x[1]);
2048 si_pm4_set_reg(pm4, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, cm_sample_locs_16x[5]);
2049 si_pm4_set_reg(pm4, R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2, cm_sample_locs_16x[9]);
2050 si_pm4_set_reg(pm4, R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3, cm_sample_locs_16x[13]);
2051 si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, cm_sample_locs_16x[2]);
2052 si_pm4_set_reg(pm4, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, cm_sample_locs_16x[6]);
2053 si_pm4_set_reg(pm4, R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2, cm_sample_locs_16x[10]);
2054 si_pm4_set_reg(pm4, R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3, cm_sample_locs_16x[14]);
2055 si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, cm_sample_locs_16x[3]);
2056 si_pm4_set_reg(pm4, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, cm_sample_locs_16x[7]);
2057 si_pm4_set_reg(pm4, R_028C30_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2, cm_sample_locs_16x[11]);
2058 si_pm4_set_reg(pm4, R_028C34_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3, cm_sample_locs_16x[15]);
2059 max_dist = cm_max_dist_16x;
2060 break;
2061 }
2062
2063 if (nr_samples > 1) {
2064 unsigned log_samples = util_logbase2(nr_samples);
2065
2066 si_pm4_set_reg(pm4, R_028BDC_PA_SC_LINE_CNTL,
2067 S_028BDC_LAST_PIXEL(1) |
2068 S_028BDC_EXPAND_LINE_WIDTH(1));
2069 si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG,
2070 S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
2071 S_028BE0_MAX_SAMPLE_DIST(max_dist) |
2072 S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples));
2073
2074 si_pm4_set_reg(pm4, R_028804_DB_EQAA,
2075 S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
2076 S_028804_PS_ITER_SAMPLES(log_samples) |
2077 S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
2078 S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
2079 S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
2080 S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
2081 } else {
2082 si_pm4_set_reg(pm4, R_028BDC_PA_SC_LINE_CNTL, S_028BDC_LAST_PIXEL(1));
2083 si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG, 0);
2084
2085 si_pm4_set_reg(pm4, R_028804_DB_EQAA,
2086 S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
2087 S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
2088 }
2089 }
2090
2091 static void si_set_framebuffer_state(struct pipe_context *ctx,
2092 const struct pipe_framebuffer_state *state)
2093 {
2094 struct si_context *sctx = (struct si_context *)ctx;
2095 struct r600_surface *surf = NULL;
2096 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
2097 int nr_samples, i;
2098
2099 if (pm4 == NULL)
2100 return;
2101
2102 if (sctx->framebuffer.nr_cbufs) {
2103 sctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB |
2104 R600_CONTEXT_FLUSH_AND_INV_CB_META;
2105 }
2106 if (sctx->framebuffer.zsbuf) {
2107 sctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_DB |
2108 R600_CONTEXT_FLUSH_AND_INV_DB_META;
2109 }
2110
2111 util_copy_framebuffer_state(&sctx->framebuffer, state);
2112
2113 /* build states */
2114 sctx->export_16bpc = 0;
2115 sctx->fb_compressed_cb_mask = 0;
2116
2117 for (i = 0; i < state->nr_cbufs; i++) {
2118 struct r600_texture *rtex;
2119
2120 if (!state->cbufs[i]) {
2121 si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + i * 0x3C,
2122 S_028C70_FORMAT(V_028C70_COLOR_INVALID));
2123 continue;
2124 }
2125
2126 surf = (struct r600_surface*)state->cbufs[i];
2127 rtex = (struct r600_texture*)surf->base.texture;
2128
2129 if (!surf->color_initialized) {
2130 si_initialize_color_surface(sctx, surf);
2131 }
2132
2133 if (surf->export_16bpc) {
2134 sctx->export_16bpc |= 1 << i;
2135 }
2136
2137 if (rtex->fmask.size || rtex->cmask.size) {
2138 sctx->fb_compressed_cb_mask |= 1 << i;
2139 }
2140
2141 si_pm4_add_bo(pm4, &rtex->resource, RADEON_USAGE_READWRITE);
2142 si_pm4_set_reg(pm4, R_028C60_CB_COLOR0_BASE + i * 0x3C, surf->cb_color_base);
2143 si_pm4_set_reg(pm4, R_028C64_CB_COLOR0_PITCH + i * 0x3C, surf->cb_color_pitch);
2144 si_pm4_set_reg(pm4, R_028C68_CB_COLOR0_SLICE + i * 0x3C, surf->cb_color_slice);
2145 si_pm4_set_reg(pm4, R_028C6C_CB_COLOR0_VIEW + i * 0x3C, surf->cb_color_view);
2146 si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + i * 0x3C, surf->cb_color_info);
2147 si_pm4_set_reg(pm4, R_028C74_CB_COLOR0_ATTRIB + i * 0x3C, surf->cb_color_attrib);
2148 si_pm4_set_reg(pm4, R_028C7C_CB_COLOR0_CMASK + i * 0x3C, surf->cb_color_cmask);
2149 si_pm4_set_reg(pm4, R_028C80_CB_COLOR0_CMASK_SLICE + i * 0x3C, surf->cb_color_cmask_slice);
2150 si_pm4_set_reg(pm4, R_028C84_CB_COLOR0_FMASK + i * 0x3C, surf->cb_color_fmask);
2151 si_pm4_set_reg(pm4, R_028C88_CB_COLOR0_FMASK_SLICE + i * 0x3C, surf->cb_color_fmask_slice);
2152 }
2153 /* Set CB_COLOR1_INFO for possible dual-src blending. */
2154 if (i == 1 && surf) {
2155 si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, surf->cb_color_info);
2156 /* Also set the 16BPC export. */
2157 if (surf->export_16bpc) {
2158 sctx->export_16bpc |= 1 << 1;
2159 }
2160 i++;
2161 }
2162 for (; i < 8; i++) {
2163 si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + i * 0x3C,
2164 S_028C70_FORMAT(V_028C70_COLOR_INVALID));
2165 }
2166
2167 assert(!(sctx->export_16bpc & ~0xff));
2168 si_db(sctx, pm4, state);
2169
2170 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
2171 si_pm4_set_reg(pm4, R_028208_PA_SC_WINDOW_SCISSOR_BR,
2172 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
2173
2174 nr_samples = util_framebuffer_get_num_samples(state);
2175
2176 si_set_msaa_state(sctx, pm4, nr_samples);
2177 sctx->fb_log_samples = util_logbase2(nr_samples);
2178 sctx->fb_cb0_is_integer = state->nr_cbufs && state->cbufs[0] &&
2179 util_format_is_pure_integer(state->cbufs[0]->format);
2180
2181 si_pm4_set_state(sctx, framebuffer, pm4);
2182 si_update_fb_rs_state(sctx);
2183 si_update_fb_blend_state(sctx);
2184 }
2185
2186 /*
2187 * shaders
2188 */
2189
2190 /* Compute the key for the hw shader variant */
2191 static INLINE void si_shader_selector_key(struct pipe_context *ctx,
2192 struct si_pipe_shader_selector *sel,
2193 union si_shader_key *key)
2194 {
2195 struct si_context *sctx = (struct si_context *)ctx;
2196 memset(key, 0, sizeof(*key));
2197
2198 if ((sel->type == PIPE_SHADER_VERTEX || sel->type == PIPE_SHADER_GEOMETRY) &&
2199 sctx->queued.named.rasterizer) {
2200 if (sctx->queued.named.rasterizer->clip_plane_enable & 0xf0)
2201 key->vs.ucps_enabled |= 0x2;
2202 if (sctx->queued.named.rasterizer->clip_plane_enable & 0xf)
2203 key->vs.ucps_enabled |= 0x1;
2204 }
2205
2206 if (sel->type == PIPE_SHADER_VERTEX) {
2207 unsigned i;
2208 if (!sctx->vertex_elements)
2209 return;
2210
2211 for (i = 0; i < sctx->vertex_elements->count; ++i)
2212 key->vs.instance_divisors[i] = sctx->vertex_elements->elements[i].instance_divisor;
2213
2214 key->vs.as_es = sctx->gs_shader != NULL;
2215 } else if (sel->type == PIPE_SHADER_FRAGMENT) {
2216 if (sel->fs_write_all)
2217 key->ps.nr_cbufs = sctx->framebuffer.nr_cbufs;
2218 key->ps.export_16bpc = sctx->export_16bpc;
2219
2220 if (sctx->queued.named.rasterizer) {
2221 key->ps.color_two_side = sctx->queued.named.rasterizer->two_side;
2222 key->ps.flatshade = sctx->queued.named.rasterizer->flatshade;
2223
2224 if (sctx->queued.named.blend) {
2225 key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&
2226 sctx->queued.named.rasterizer->multisample_enable &&
2227 !sctx->fb_cb0_is_integer;
2228 }
2229 }
2230 if (sctx->queued.named.dsa) {
2231 key->ps.alpha_func = sctx->queued.named.dsa->alpha_func;
2232
2233 /* Alpha-test should be disabled if colorbuffer 0 is integer. */
2234 if (sctx->framebuffer.nr_cbufs &&
2235 sctx->framebuffer.cbufs[0] &&
2236 util_format_is_pure_integer(sctx->framebuffer.cbufs[0]->texture->format))
2237 key->ps.alpha_func = PIPE_FUNC_ALWAYS;
2238 } else {
2239 key->ps.alpha_func = PIPE_FUNC_ALWAYS;
2240 }
2241 }
2242 }
2243
2244 /* Select the hw shader variant depending on the current state. */
2245 int si_shader_select(struct pipe_context *ctx,
2246 struct si_pipe_shader_selector *sel)
2247 {
2248 union si_shader_key key;
2249 struct si_pipe_shader * shader = NULL;
2250 int r;
2251
2252 si_shader_selector_key(ctx, sel, &key);
2253
2254 /* Check if we don't need to change anything.
2255 * This path is also used for most shaders that don't need multiple
2256 * variants, it will cost just a computation of the key and this
2257 * test. */
2258 if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) {
2259 return 0;
2260 }
2261
2262 /* lookup if we have other variants in the list */
2263 if (sel->num_shaders > 1) {
2264 struct si_pipe_shader *p = sel->current, *c = p->next_variant;
2265
2266 while (c && memcmp(&c->key, &key, sizeof(key)) != 0) {
2267 p = c;
2268 c = c->next_variant;
2269 }
2270
2271 if (c) {
2272 p->next_variant = c->next_variant;
2273 shader = c;
2274 }
2275 }
2276
2277 if (shader) {
2278 shader->next_variant = sel->current;
2279 sel->current = shader;
2280 } else {
2281 shader = CALLOC(1, sizeof(struct si_pipe_shader));
2282 shader->selector = sel;
2283 shader->key = key;
2284
2285 shader->next_variant = sel->current;
2286 sel->current = shader;
2287 r = si_pipe_shader_create(ctx, shader);
2288 if (unlikely(r)) {
2289 R600_ERR("Failed to build shader variant (type=%u) %d\n",
2290 sel->type, r);
2291 sel->current = NULL;
2292 FREE(shader);
2293 return r;
2294 }
2295 sel->num_shaders++;
2296 }
2297
2298 return 0;
2299 }
2300
2301 static void *si_create_shader_state(struct pipe_context *ctx,
2302 const struct pipe_shader_state *state,
2303 unsigned pipe_shader_type)
2304 {
2305 struct si_pipe_shader_selector *sel = CALLOC_STRUCT(si_pipe_shader_selector);
2306 int r;
2307
2308 sel->type = pipe_shader_type;
2309 sel->tokens = tgsi_dup_tokens(state->tokens);
2310 sel->so = state->stream_output;
2311
2312 if (pipe_shader_type == PIPE_SHADER_FRAGMENT) {
2313 struct tgsi_shader_info info;
2314
2315 tgsi_scan_shader(state->tokens, &info);
2316 sel->fs_write_all = info.color0_writes_all_cbufs;
2317 }
2318
2319 r = si_shader_select(ctx, sel);
2320 if (r) {
2321 free(sel);
2322 return NULL;
2323 }
2324
2325 return sel;
2326 }
2327
2328 static void *si_create_fs_state(struct pipe_context *ctx,
2329 const struct pipe_shader_state *state)
2330 {
2331 return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT);
2332 }
2333
2334 #if HAVE_LLVM >= 0x0305
2335
2336 static void *si_create_gs_state(struct pipe_context *ctx,
2337 const struct pipe_shader_state *state)
2338 {
2339 return si_create_shader_state(ctx, state, PIPE_SHADER_GEOMETRY);
2340 }
2341
2342 #endif
2343
2344 static void *si_create_vs_state(struct pipe_context *ctx,
2345 const struct pipe_shader_state *state)
2346 {
2347 return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX);
2348 }
2349
2350 static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
2351 {
2352 struct si_context *sctx = (struct si_context *)ctx;
2353 struct si_pipe_shader_selector *sel = state;
2354
2355 if (sctx->vs_shader == sel)
2356 return;
2357
2358 if (!sel || !sel->current)
2359 return;
2360
2361 sctx->vs_shader = sel;
2362 }
2363
2364 #if HAVE_LLVM >= 0x0305
2365
2366 static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
2367 {
2368 struct si_context *sctx = (struct si_context *)ctx;
2369 struct si_pipe_shader_selector *sel = state;
2370
2371 if (sctx->gs_shader == sel)
2372 return;
2373
2374 sctx->gs_shader = sel;
2375 }
2376
2377 #endif
2378
2379 static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
2380 {
2381 struct si_context *sctx = (struct si_context *)ctx;
2382 struct si_pipe_shader_selector *sel = state;
2383
2384 if (sctx->ps_shader == sel)
2385 return;
2386
2387 if (!sel || !sel->current)
2388 sel = sctx->dummy_pixel_shader;
2389
2390 sctx->ps_shader = sel;
2391 }
2392
2393 static void si_delete_shader_selector(struct pipe_context *ctx,
2394 struct si_pipe_shader_selector *sel)
2395 {
2396 struct si_context *sctx = (struct si_context *)ctx;
2397 struct si_pipe_shader *p = sel->current, *c;
2398
2399 while (p) {
2400 c = p->next_variant;
2401 si_pm4_delete_state(sctx, vs, p->pm4);
2402 si_pipe_shader_destroy(ctx, p);
2403 free(p);
2404 p = c;
2405 }
2406
2407 free(sel->tokens);
2408 free(sel);
2409 }
2410
2411 static void si_delete_vs_shader(struct pipe_context *ctx, void *state)
2412 {
2413 struct si_context *sctx = (struct si_context *)ctx;
2414 struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
2415
2416 if (sctx->vs_shader == sel) {
2417 sctx->vs_shader = NULL;
2418 }
2419
2420 si_delete_shader_selector(ctx, sel);
2421 }
2422
2423 #if HAVE_LLVM >= 0x0305
2424
2425 static void si_delete_gs_shader(struct pipe_context *ctx, void *state)
2426 {
2427 struct si_context *sctx = (struct si_context *)ctx;
2428 struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
2429
2430 if (sctx->gs_shader == sel) {
2431 sctx->gs_shader = NULL;
2432 }
2433
2434 si_delete_shader_selector(ctx, sel);
2435 }
2436
2437 #endif
2438
2439 static void si_delete_ps_shader(struct pipe_context *ctx, void *state)
2440 {
2441 struct si_context *sctx = (struct si_context *)ctx;
2442 struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
2443
2444 if (sctx->ps_shader == sel) {
2445 sctx->ps_shader = NULL;
2446 }
2447
2448 si_delete_shader_selector(ctx, sel);
2449 }
2450
2451 /*
2452 * Samplers
2453 */
2454
2455 static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx,
2456 struct pipe_resource *texture,
2457 const struct pipe_sampler_view *state)
2458 {
2459 struct si_pipe_sampler_view *view = CALLOC_STRUCT(si_pipe_sampler_view);
2460 struct r600_texture *tmp = (struct r600_texture*)texture;
2461 const struct util_format_description *desc;
2462 unsigned format, num_format;
2463 uint32_t pitch = 0;
2464 unsigned char state_swizzle[4], swizzle[4];
2465 unsigned height, depth, width;
2466 enum pipe_format pipe_format = state->format;
2467 struct radeon_surface_level *surflevel;
2468 int first_non_void;
2469 uint64_t va;
2470
2471 if (view == NULL)
2472 return NULL;
2473
2474 /* initialize base object */
2475 view->base = *state;
2476 view->base.texture = NULL;
2477 pipe_resource_reference(&view->base.texture, texture);
2478 view->base.reference.count = 1;
2479 view->base.context = ctx;
2480 view->resource = &tmp->resource;
2481
2482 /* Buffer resource. */
2483 if (texture->target == PIPE_BUFFER) {
2484 unsigned stride;
2485
2486 desc = util_format_description(state->format);
2487 first_non_void = util_format_get_first_non_void_channel(state->format);
2488 stride = desc->block.bits / 8;
2489 va = r600_resource_va(ctx->screen, texture) + state->u.buf.first_element*stride;
2490 format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
2491 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
2492
2493 view->state[0] = va;
2494 view->state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
2495 S_008F04_STRIDE(stride);
2496 view->state[2] = state->u.buf.last_element + 1 - state->u.buf.first_element;
2497 view->state[3] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
2498 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
2499 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
2500 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
2501 S_008F0C_NUM_FORMAT(num_format) |
2502 S_008F0C_DATA_FORMAT(format);
2503 return &view->base;
2504 }
2505
2506 state_swizzle[0] = state->swizzle_r;
2507 state_swizzle[1] = state->swizzle_g;
2508 state_swizzle[2] = state->swizzle_b;
2509 state_swizzle[3] = state->swizzle_a;
2510
2511 surflevel = tmp->surface.level;
2512
2513 /* Texturing with separate depth and stencil. */
2514 if (tmp->is_depth && !tmp->is_flushing_texture) {
2515 switch (pipe_format) {
2516 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2517 pipe_format = PIPE_FORMAT_Z32_FLOAT;
2518 break;
2519 case PIPE_FORMAT_X8Z24_UNORM:
2520 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2521 /* Z24 is always stored like this. */
2522 pipe_format = PIPE_FORMAT_Z24X8_UNORM;
2523 break;
2524 case PIPE_FORMAT_X24S8_UINT:
2525 case PIPE_FORMAT_S8X24_UINT:
2526 case PIPE_FORMAT_X32_S8X24_UINT:
2527 pipe_format = PIPE_FORMAT_S8_UINT;
2528 surflevel = tmp->surface.stencil_level;
2529 break;
2530 default:;
2531 }
2532 }
2533
2534 desc = util_format_description(pipe_format);
2535
2536 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
2537 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
2538 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
2539
2540 switch (pipe_format) {
2541 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2542 case PIPE_FORMAT_X24S8_UINT:
2543 case PIPE_FORMAT_X32_S8X24_UINT:
2544 case PIPE_FORMAT_X8Z24_UNORM:
2545 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
2546 break;
2547 default:
2548 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
2549 }
2550 } else {
2551 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
2552 }
2553
2554 first_non_void = util_format_get_first_non_void_channel(pipe_format);
2555
2556 switch (pipe_format) {
2557 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2558 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2559 break;
2560 default:
2561 if (first_non_void < 0) {
2562 if (util_format_is_compressed(pipe_format)) {
2563 switch (pipe_format) {
2564 case PIPE_FORMAT_DXT1_SRGB:
2565 case PIPE_FORMAT_DXT1_SRGBA:
2566 case PIPE_FORMAT_DXT3_SRGBA:
2567 case PIPE_FORMAT_DXT5_SRGBA:
2568 num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2569 break;
2570 case PIPE_FORMAT_RGTC1_SNORM:
2571 case PIPE_FORMAT_LATC1_SNORM:
2572 case PIPE_FORMAT_RGTC2_SNORM:
2573 case PIPE_FORMAT_LATC2_SNORM:
2574 num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2575 break;
2576 default:
2577 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2578 break;
2579 }
2580 } else {
2581 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2582 }
2583 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
2584 num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2585 } else {
2586 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2587
2588 switch (desc->channel[first_non_void].type) {
2589 case UTIL_FORMAT_TYPE_FLOAT:
2590 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2591 break;
2592 case UTIL_FORMAT_TYPE_SIGNED:
2593 if (desc->channel[first_non_void].normalized)
2594 num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2595 else if (desc->channel[first_non_void].pure_integer)
2596 num_format = V_008F14_IMG_NUM_FORMAT_SINT;
2597 else
2598 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;
2599 break;
2600 case UTIL_FORMAT_TYPE_UNSIGNED:
2601 if (desc->channel[first_non_void].normalized)
2602 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2603 else if (desc->channel[first_non_void].pure_integer)
2604 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
2605 else
2606 num_format = V_008F14_IMG_NUM_FORMAT_USCALED;
2607 }
2608 }
2609 }
2610
2611 format = si_translate_texformat(ctx->screen, pipe_format, desc, first_non_void);
2612 if (format == ~0) {
2613 format = 0;
2614 }
2615
2616 /* not supported any more */
2617 //endian = si_colorformat_endian_swap(format);
2618
2619 width = surflevel[0].npix_x;
2620 height = surflevel[0].npix_y;
2621 depth = surflevel[0].npix_z;
2622 pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format);
2623
2624 if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
2625 height = 1;
2626 depth = texture->array_size;
2627 } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) {
2628 depth = texture->array_size;
2629 }
2630
2631 va = r600_resource_va(ctx->screen, texture);
2632 va += surflevel[0].offset;
2633 va += tmp->mipmap_shift * surflevel[texture->last_level].slice_size;
2634 view->state[0] = va >> 8;
2635 view->state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) |
2636 S_008F14_DATA_FORMAT(format) |
2637 S_008F14_NUM_FORMAT(num_format));
2638 view->state[2] = (S_008F18_WIDTH(width - 1) |
2639 S_008F18_HEIGHT(height - 1));
2640 view->state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
2641 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
2642 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
2643 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
2644 S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ?
2645 0 : state->u.tex.first_level - tmp->mipmap_shift) |
2646 S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ?
2647 util_logbase2(texture->nr_samples) :
2648 state->u.tex.last_level - tmp->mipmap_shift) |
2649 S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, 0, false)) |
2650 S_008F1C_POW2_PAD(texture->last_level > 0) |
2651 S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples)));
2652 view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
2653 view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
2654 S_008F24_LAST_ARRAY(state->u.tex.last_layer));
2655 view->state[6] = 0;
2656 view->state[7] = 0;
2657
2658 /* Initialize the sampler view for FMASK. */
2659 if (tmp->fmask.size) {
2660 uint64_t va = r600_resource_va(ctx->screen, texture) + tmp->fmask.offset;
2661 uint32_t fmask_format;
2662
2663 switch (texture->nr_samples) {
2664 case 2:
2665 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
2666 break;
2667 case 4:
2668 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
2669 break;
2670 case 8:
2671 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
2672 break;
2673 default:
2674 assert(0);
2675 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
2676 }
2677
2678 view->fmask_state[0] = va >> 8;
2679 view->fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
2680 S_008F14_DATA_FORMAT(fmask_format) |
2681 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT);
2682 view->fmask_state[2] = S_008F18_WIDTH(width - 1) |
2683 S_008F18_HEIGHT(height - 1);
2684 view->fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
2685 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
2686 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
2687 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
2688 S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) |
2689 S_008F1C_TYPE(si_tex_dim(texture->target, 0));
2690 view->fmask_state[4] = S_008F20_DEPTH(depth - 1) |
2691 S_008F20_PITCH(tmp->fmask.pitch - 1);
2692 view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
2693 S_008F24_LAST_ARRAY(state->u.tex.last_layer);
2694 view->fmask_state[6] = 0;
2695 view->fmask_state[7] = 0;
2696 }
2697
2698 return &view->base;
2699 }
2700
2701 static void si_sampler_view_destroy(struct pipe_context *ctx,
2702 struct pipe_sampler_view *state)
2703 {
2704 struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state;
2705
2706 pipe_resource_reference(&state->texture, NULL);
2707 FREE(resource);
2708 }
2709
2710 static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
2711 {
2712 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
2713 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
2714 (linear_filter &&
2715 (wrap == PIPE_TEX_WRAP_CLAMP ||
2716 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
2717 }
2718
2719 static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
2720 {
2721 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
2722 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
2723
2724 return (state->border_color.ui[0] || state->border_color.ui[1] ||
2725 state->border_color.ui[2] || state->border_color.ui[3]) &&
2726 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) ||
2727 wrap_mode_uses_border_color(state->wrap_t, linear_filter) ||
2728 wrap_mode_uses_border_color(state->wrap_r, linear_filter));
2729 }
2730
2731 static void *si_create_sampler_state(struct pipe_context *ctx,
2732 const struct pipe_sampler_state *state)
2733 {
2734 struct si_pipe_sampler_state *rstate = CALLOC_STRUCT(si_pipe_sampler_state);
2735 unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
2736 unsigned border_color_type;
2737
2738 if (rstate == NULL) {
2739 return NULL;
2740 }
2741
2742 if (sampler_state_needs_border_color(state))
2743 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
2744 else
2745 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2746
2747 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
2748 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
2749 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
2750 (state->max_anisotropy & 0x7) << 9 | /* XXX */
2751 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
2752 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
2753 aniso_flag_offset << 16 | /* XXX */
2754 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map));
2755 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
2756 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)));
2757 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
2758 S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter)) |
2759 S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter)) |
2760 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
2761 rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type);
2762
2763 if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
2764 memcpy(rstate->border_color, state->border_color.ui,
2765 sizeof(rstate->border_color));
2766 }
2767
2768 return rstate;
2769 }
2770
2771 /* XXX consider moving this function to si_descriptors.c for gcc to inline
2772 * the si_set_sampler_view calls. LTO might help too. */
2773 static void si_set_sampler_views(struct pipe_context *ctx,
2774 unsigned shader, unsigned start,
2775 unsigned count,
2776 struct pipe_sampler_view **views)
2777 {
2778 struct si_context *sctx = (struct si_context *)ctx;
2779 struct si_textures_info *samplers = &sctx->samplers[shader];
2780 struct si_pipe_sampler_view **rviews = (struct si_pipe_sampler_view **)views;
2781 int i;
2782
2783 if (shader >= SI_NUM_SHADERS)
2784 return;
2785
2786 assert(start == 0);
2787
2788 for (i = 0; i < count; i++) {
2789 if (!views[i]) {
2790 samplers->depth_texture_mask &= ~(1 << i);
2791 samplers->compressed_colortex_mask &= ~(1 << i);
2792 si_set_sampler_view(sctx, shader, i, NULL, NULL);
2793 si_set_sampler_view(sctx, shader, FMASK_TEX_OFFSET + i,
2794 NULL, NULL);
2795 continue;
2796 }
2797
2798 si_set_sampler_view(sctx, shader, i, views[i], rviews[i]->state);
2799
2800 if (views[i]->texture->target != PIPE_BUFFER) {
2801 struct r600_texture *rtex =
2802 (struct r600_texture*)views[i]->texture;
2803
2804 if (rtex->is_depth && !rtex->is_flushing_texture) {
2805 samplers->depth_texture_mask |= 1 << i;
2806 } else {
2807 samplers->depth_texture_mask &= ~(1 << i);
2808 }
2809 if (rtex->cmask.size || rtex->fmask.size) {
2810 samplers->compressed_colortex_mask |= 1 << i;
2811 } else {
2812 samplers->compressed_colortex_mask &= ~(1 << i);
2813 }
2814
2815 if (rtex->fmask.size) {
2816 si_set_sampler_view(sctx, shader, FMASK_TEX_OFFSET + i,
2817 views[i], rviews[i]->fmask_state);
2818 } else {
2819 si_set_sampler_view(sctx, shader, FMASK_TEX_OFFSET + i,
2820 NULL, NULL);
2821 }
2822 }
2823 }
2824 for (; i < samplers->n_views; i++) {
2825 samplers->depth_texture_mask &= ~(1 << i);
2826 samplers->compressed_colortex_mask &= ~(1 << i);
2827 si_set_sampler_view(sctx, shader, i, NULL, NULL);
2828 si_set_sampler_view(sctx, shader, FMASK_TEX_OFFSET + i,
2829 NULL, NULL);
2830 }
2831
2832 samplers->n_views = count;
2833 sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE;
2834 }
2835
2836 static void si_set_sampler_states(struct si_context *sctx,
2837 struct si_pm4_state *pm4,
2838 unsigned count, void **states,
2839 struct si_textures_info *samplers,
2840 unsigned user_data_reg)
2841 {
2842 struct si_pipe_sampler_state **rstates = (struct si_pipe_sampler_state **)states;
2843 uint32_t *border_color_table = NULL;
2844 int i, j;
2845
2846 if (!count)
2847 goto out;
2848
2849 sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE;
2850
2851 si_pm4_sh_data_begin(pm4);
2852 for (i = 0; i < count; i++) {
2853 if (rstates[i] &&
2854 G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) ==
2855 V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
2856 if (!sctx->border_color_table ||
2857 ((sctx->border_color_offset + count - i) &
2858 C_008F3C_BORDER_COLOR_PTR)) {
2859 r600_resource_reference(&sctx->border_color_table, NULL);
2860 sctx->border_color_offset = 0;
2861
2862 sctx->border_color_table =
2863 si_resource_create_custom(&sctx->screen->b.b,
2864 PIPE_USAGE_STAGING,
2865 4096 * 4 * 4);
2866 }
2867
2868 if (!border_color_table) {
2869 border_color_table =
2870 sctx->b.ws->buffer_map(sctx->border_color_table->cs_buf,
2871 sctx->b.rings.gfx.cs,
2872 PIPE_TRANSFER_WRITE |
2873 PIPE_TRANSFER_UNSYNCHRONIZED);
2874 }
2875
2876 for (j = 0; j < 4; j++) {
2877 border_color_table[4 * sctx->border_color_offset + j] =
2878 util_le32_to_cpu(rstates[i]->border_color[j]);
2879 }
2880
2881 rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR;
2882 rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++);
2883 }
2884
2885 for (j = 0; j < Elements(rstates[i]->val); ++j) {
2886 si_pm4_sh_data_add(pm4, rstates[i] ? rstates[i]->val[j] : 0);
2887 }
2888 }
2889 si_pm4_sh_data_end(pm4, user_data_reg, SI_SGPR_SAMPLER);
2890
2891 if (border_color_table) {
2892 uint64_t va_offset =
2893 r600_resource_va(&sctx->screen->b.b,
2894 (void*)sctx->border_color_table);
2895
2896 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8);
2897 if (sctx->b.chip_class >= CIK)
2898 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40);
2899 sctx->b.ws->buffer_unmap(sctx->border_color_table->cs_buf);
2900 si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ);
2901 }
2902
2903 memcpy(samplers->samplers, states, sizeof(void*) * count);
2904
2905 out:
2906 samplers->n_samplers = count;
2907 }
2908
2909 static void si_bind_vs_sampler_states(struct pipe_context *ctx, unsigned count, void **states)
2910 {
2911 struct si_context *sctx = (struct si_context *)ctx;
2912 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
2913
2914 si_set_sampler_states(sctx, pm4, count, states,
2915 &sctx->samplers[PIPE_SHADER_VERTEX],
2916 R_00B130_SPI_SHADER_USER_DATA_VS_0);
2917 #if HAVE_LLVM >= 0x0305
2918 si_set_sampler_states(sctx, pm4, count, states,
2919 &sctx->samplers[PIPE_SHADER_VERTEX],
2920 R_00B330_SPI_SHADER_USER_DATA_ES_0);
2921 #endif
2922 si_pm4_set_state(sctx, vs_sampler, pm4);
2923 }
2924
2925 static void si_bind_gs_sampler_states(struct pipe_context *ctx, unsigned count, void **states)
2926 {
2927 struct si_context *sctx = (struct si_context *)ctx;
2928 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
2929
2930 si_set_sampler_states(sctx, pm4, count, states,
2931 &sctx->samplers[PIPE_SHADER_GEOMETRY],
2932 R_00B230_SPI_SHADER_USER_DATA_GS_0);
2933 si_pm4_set_state(sctx, gs_sampler, pm4);
2934 }
2935
2936 static void si_bind_ps_sampler_states(struct pipe_context *ctx, unsigned count, void **states)
2937 {
2938 struct si_context *sctx = (struct si_context *)ctx;
2939 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
2940
2941 si_set_sampler_states(sctx, pm4, count, states,
2942 &sctx->samplers[PIPE_SHADER_FRAGMENT],
2943 R_00B030_SPI_SHADER_USER_DATA_PS_0);
2944 si_pm4_set_state(sctx, ps_sampler, pm4);
2945 }
2946
2947
2948 static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
2949 unsigned start, unsigned count,
2950 void **states)
2951 {
2952 assert(start == 0);
2953
2954 switch (shader) {
2955 case PIPE_SHADER_VERTEX:
2956 si_bind_vs_sampler_states(ctx, count, states);
2957 break;
2958 case PIPE_SHADER_GEOMETRY:
2959 si_bind_gs_sampler_states(ctx, count, states);
2960 break;
2961 case PIPE_SHADER_FRAGMENT:
2962 si_bind_ps_sampler_states(ctx, count, states);
2963 break;
2964 default:
2965 ;
2966 }
2967 }
2968
2969
2970
2971 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
2972 {
2973 struct si_context *sctx = (struct si_context *)ctx;
2974 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
2975 uint16_t mask = sample_mask;
2976
2977 if (pm4 == NULL)
2978 return;
2979
2980 si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16));
2981 si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16));
2982
2983 si_pm4_set_state(sctx, sample_mask, pm4);
2984 }
2985
2986 static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
2987 {
2988 free(state);
2989 }
2990
2991 /*
2992 * Vertex elements & buffers
2993 */
2994
2995 static void *si_create_vertex_elements(struct pipe_context *ctx,
2996 unsigned count,
2997 const struct pipe_vertex_element *elements)
2998 {
2999 struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
3000 int i;
3001
3002 assert(count < PIPE_MAX_ATTRIBS);
3003 if (!v)
3004 return NULL;
3005
3006 v->count = count;
3007 for (i = 0; i < count; ++i) {
3008 const struct util_format_description *desc;
3009 unsigned data_format, num_format;
3010 int first_non_void;
3011
3012 desc = util_format_description(elements[i].src_format);
3013 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
3014 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
3015 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
3016
3017 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
3018 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
3019 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
3020 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
3021 S_008F0C_NUM_FORMAT(num_format) |
3022 S_008F0C_DATA_FORMAT(data_format);
3023 }
3024 memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
3025
3026 return v;
3027 }
3028
3029 static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
3030 {
3031 struct si_context *sctx = (struct si_context *)ctx;
3032 struct si_vertex_element *v = (struct si_vertex_element*)state;
3033
3034 sctx->vertex_elements = v;
3035 }
3036
3037 static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
3038 {
3039 struct si_context *sctx = (struct si_context *)ctx;
3040
3041 if (sctx->vertex_elements == state)
3042 sctx->vertex_elements = NULL;
3043 FREE(state);
3044 }
3045
3046 static void si_set_vertex_buffers(struct pipe_context *ctx, unsigned start_slot, unsigned count,
3047 const struct pipe_vertex_buffer *buffers)
3048 {
3049 struct si_context *sctx = (struct si_context *)ctx;
3050
3051 util_set_vertex_buffers_count(sctx->vertex_buffer, &sctx->nr_vertex_buffers, buffers, start_slot, count);
3052 }
3053
3054 static void si_set_index_buffer(struct pipe_context *ctx,
3055 const struct pipe_index_buffer *ib)
3056 {
3057 struct si_context *sctx = (struct si_context *)ctx;
3058
3059 if (ib) {
3060 pipe_resource_reference(&sctx->index_buffer.buffer, ib->buffer);
3061 memcpy(&sctx->index_buffer, ib, sizeof(*ib));
3062 } else {
3063 pipe_resource_reference(&sctx->index_buffer.buffer, NULL);
3064 }
3065 }
3066
3067 /*
3068 * Misc
3069 */
3070 static void si_set_polygon_stipple(struct pipe_context *ctx,
3071 const struct pipe_poly_stipple *state)
3072 {
3073 }
3074
3075 static void si_texture_barrier(struct pipe_context *ctx)
3076 {
3077 struct si_context *sctx = (struct si_context *)ctx;
3078
3079 sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
3080 R600_CONTEXT_FLUSH_AND_INV_CB;
3081 }
3082
3083 static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
3084 {
3085 struct pipe_blend_state blend;
3086
3087 memset(&blend, 0, sizeof(blend));
3088 blend.independent_blend_enable = true;
3089 blend.rt[0].colormask = 0xf;
3090 return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
3091 }
3092
3093 static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
3094 struct pipe_resource *texture,
3095 const struct pipe_surface *surf_tmpl)
3096 {
3097 struct r600_texture *rtex = (struct r600_texture*)texture;
3098 struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
3099 unsigned level = surf_tmpl->u.tex.level;
3100
3101 if (surface == NULL)
3102 return NULL;
3103
3104 assert(surf_tmpl->u.tex.first_layer <= util_max_layer(texture, surf_tmpl->u.tex.level));
3105 assert(surf_tmpl->u.tex.last_layer <= util_max_layer(texture, surf_tmpl->u.tex.level));
3106
3107 pipe_reference_init(&surface->base.reference, 1);
3108 pipe_resource_reference(&surface->base.texture, texture);
3109 surface->base.context = pipe;
3110 surface->base.format = surf_tmpl->format;
3111 surface->base.width = rtex->surface.level[level].npix_x;
3112 surface->base.height = rtex->surface.level[level].npix_y;
3113 surface->base.texture = texture;
3114 surface->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer;
3115 surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
3116 surface->base.u.tex.level = level;
3117
3118 return &surface->base;
3119 }
3120
3121 static void r600_surface_destroy(struct pipe_context *pipe,
3122 struct pipe_surface *surface)
3123 {
3124 pipe_resource_reference(&surface->texture, NULL);
3125 FREE(surface);
3126 }
3127
3128 static boolean si_dma_copy(struct pipe_context *ctx,
3129 struct pipe_resource *dst,
3130 unsigned dst_level,
3131 unsigned dst_x, unsigned dst_y, unsigned dst_z,
3132 struct pipe_resource *src,
3133 unsigned src_level,
3134 const struct pipe_box *src_box)
3135 {
3136 /* XXX implement this or share evergreen_dma_blit with r600g */
3137 return FALSE;
3138 }
3139
3140 static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
3141 {
3142 /* XXX Turn this into a proper state. Right now the queries are
3143 * enabled in draw_vbo, which snoops r600_common_context to see
3144 * if any occlusion queries are active. */
3145 }
3146
3147 static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
3148 bool include_draw_vbo)
3149 {
3150 si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo);
3151 }
3152
3153 void si_init_state_functions(struct si_context *sctx)
3154 {
3155 int i;
3156
3157 sctx->b.b.create_blend_state = si_create_blend_state;
3158 sctx->b.b.bind_blend_state = si_bind_blend_state;
3159 sctx->b.b.delete_blend_state = si_delete_blend_state;
3160 sctx->b.b.set_blend_color = si_set_blend_color;
3161
3162 sctx->b.b.create_rasterizer_state = si_create_rs_state;
3163 sctx->b.b.bind_rasterizer_state = si_bind_rs_state;
3164 sctx->b.b.delete_rasterizer_state = si_delete_rs_state;
3165
3166 sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state;
3167 sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
3168 sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
3169
3170 for (i = 0; i < 8; i++) {
3171 sctx->custom_dsa_flush_depth_stencil[i] = si_create_db_flush_dsa(sctx, true, true, i);
3172 sctx->custom_dsa_flush_depth[i] = si_create_db_flush_dsa(sctx, true, false, i);
3173 sctx->custom_dsa_flush_stencil[i] = si_create_db_flush_dsa(sctx, false, true, i);
3174 }
3175 sctx->custom_dsa_flush_inplace = si_create_db_flush_dsa(sctx, false, false, 0);
3176 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
3177 sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);
3178
3179 sctx->b.b.set_clip_state = si_set_clip_state;
3180 sctx->b.b.set_scissor_states = si_set_scissor_states;
3181 sctx->b.b.set_viewport_states = si_set_viewport_states;
3182 sctx->b.b.set_stencil_ref = si_set_pipe_stencil_ref;
3183
3184 sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
3185 sctx->b.b.get_sample_position = si_get_sample_position;
3186
3187 sctx->b.b.create_vs_state = si_create_vs_state;
3188 sctx->b.b.create_fs_state = si_create_fs_state;
3189 sctx->b.b.bind_vs_state = si_bind_vs_shader;
3190 sctx->b.b.bind_fs_state = si_bind_ps_shader;
3191 sctx->b.b.delete_vs_state = si_delete_vs_shader;
3192 sctx->b.b.delete_fs_state = si_delete_ps_shader;
3193 #if HAVE_LLVM >= 0x0305
3194 sctx->b.b.create_gs_state = si_create_gs_state;
3195 sctx->b.b.bind_gs_state = si_bind_gs_shader;
3196 sctx->b.b.delete_gs_state = si_delete_gs_shader;
3197 #endif
3198
3199 sctx->b.b.create_sampler_state = si_create_sampler_state;
3200 sctx->b.b.bind_sampler_states = si_bind_sampler_states;
3201 sctx->b.b.delete_sampler_state = si_delete_sampler_state;
3202
3203 sctx->b.b.create_sampler_view = si_create_sampler_view;
3204 sctx->b.b.set_sampler_views = si_set_sampler_views;
3205 sctx->b.b.sampler_view_destroy = si_sampler_view_destroy;
3206
3207 sctx->b.b.set_sample_mask = si_set_sample_mask;
3208
3209 sctx->b.b.create_vertex_elements_state = si_create_vertex_elements;
3210 sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements;
3211 sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element;
3212 sctx->b.b.set_vertex_buffers = si_set_vertex_buffers;
3213 sctx->b.b.set_index_buffer = si_set_index_buffer;
3214
3215 sctx->b.b.texture_barrier = si_texture_barrier;
3216 sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
3217 sctx->b.b.create_surface = r600_create_surface;
3218 sctx->b.b.surface_destroy = r600_surface_destroy;
3219 sctx->b.dma_copy = si_dma_copy;
3220 sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
3221 sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
3222
3223 sctx->b.b.draw_vbo = si_draw_vbo;
3224 }
3225
3226 void si_init_config(struct si_context *sctx)
3227 {
3228 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
3229
3230 if (pm4 == NULL)
3231 return;
3232
3233 si_cmd_context_control(pm4);
3234
3235 si_pm4_set_reg(pm4, R_028A4C_PA_SC_MODE_CNTL_1, 0x0);
3236
3237 si_pm4_set_reg(pm4, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0);
3238 si_pm4_set_reg(pm4, R_028A14_VGT_HOS_CNTL, 0x0);
3239 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0);
3240 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0);
3241 si_pm4_set_reg(pm4, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0);
3242 si_pm4_set_reg(pm4, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0);
3243 si_pm4_set_reg(pm4, R_028A28_VGT_GROUP_FIRST_DECR, 0x0);
3244 si_pm4_set_reg(pm4, R_028A2C_VGT_GROUP_DECR, 0x0);
3245 si_pm4_set_reg(pm4, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0);
3246 si_pm4_set_reg(pm4, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0);
3247 si_pm4_set_reg(pm4, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0);
3248 si_pm4_set_reg(pm4, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0);
3249
3250 /* FIXME calculate these values somehow ??? */
3251 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80);
3252 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
3253 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
3254
3255 si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0x0);
3256 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
3257 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0);
3258 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
3259
3260 si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, 0);
3261 si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, 0);
3262 si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, 0);
3263 si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0);
3264
3265 si_pm4_set_reg(pm4, R_028B94_VGT_STRMOUT_CONFIG, 0x0);
3266 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
3267 if (sctx->b.chip_class == SI) {
3268 si_pm4_set_reg(pm4, R_028AA8_IA_MULTI_VGT_PARAM,
3269 S_028AA8_SWITCH_ON_EOP(1) |
3270 S_028AA8_PARTIAL_VS_WAVE_ON(1) |
3271 S_028AA8_PRIMGROUP_SIZE(63));
3272 }
3273 si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0x00000000);
3274 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
3275 if (sctx->b.chip_class < CIK)
3276 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
3277 S_008A14_CLIP_VTX_REORDER_ENA(1));
3278
3279 si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
3280 si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
3281
3282 si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
3283
3284 if (sctx->b.chip_class >= CIK) {
3285 switch (sctx->screen->b.family) {
3286 case CHIP_BONAIRE:
3287 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012);
3288 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000);
3289 break;
3290 case CHIP_HAWAII:
3291 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x3a00161a);
3292 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002e);
3293 break;
3294 case CHIP_KAVERI:
3295 /* XXX todo */
3296 case CHIP_KABINI:
3297 /* XXX todo */
3298 default:
3299 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
3300 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000);
3301 break;
3302 }
3303 } else {
3304 switch (sctx->screen->b.family) {
3305 case CHIP_TAHITI:
3306 case CHIP_PITCAIRN:
3307 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x2a00126a);
3308 break;
3309 case CHIP_VERDE:
3310 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x0000124a);
3311 break;
3312 case CHIP_OLAND:
3313 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000082);
3314 break;
3315 case CHIP_HAINAN:
3316 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
3317 break;
3318 default:
3319 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
3320 break;
3321 }
3322 }
3323
3324 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
3325 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
3326 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
3327 S_028244_BR_X(16384) | S_028244_BR_Y(16384));
3328 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
3329 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
3330 S_028034_BR_X(16384) | S_028034_BR_Y(16384));
3331
3332 si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
3333 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
3334 si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000);
3335 si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000);
3336 si_pm4_set_reg(pm4, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
3337 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0x00000000);
3338 si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000);
3339 si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000);
3340 si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000);
3341 si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000);
3342 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 0x00000000);
3343 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 0x00000000);
3344 si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0x00000000);
3345 si_pm4_set_reg(pm4, R_02802C_DB_DEPTH_CLEAR, 0x3F800000);
3346 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
3347 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
3348 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
3349 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE,
3350 S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
3351 S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));
3352 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
3353 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
3354
3355 if (sctx->b.chip_class >= CIK) {
3356 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
3357 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0));
3358 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
3359 }
3360
3361 si_pm4_set_state(sctx, init, pm4);
3362 }