radeonsi: Ensure fmask_format is initialized in release builds.
[mesa.git] / src / gallium / drivers / radeonsi / si_state.c
1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Christian König <christian.koenig@amd.com>
25 */
26
27 #include "util/u_memory.h"
28 #include "util/u_framebuffer.h"
29 #include "util/u_blitter.h"
30 #include "util/u_helpers.h"
31 #include "util/u_math.h"
32 #include "util/u_pack_color.h"
33 #include "util/u_upload_mgr.h"
34 #include "util/u_format_s3tc.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "radeonsi_pipe.h"
37 #include "radeonsi_shader.h"
38 #include "si_state.h"
39 #include "sid.h"
40
41 static uint32_t cik_num_banks(uint32_t nbanks)
42 {
43 switch (nbanks) {
44 case 2:
45 return V_02803C_ADDR_SURF_2_BANK;
46 case 4:
47 return V_02803C_ADDR_SURF_4_BANK;
48 case 8:
49 default:
50 return V_02803C_ADDR_SURF_8_BANK;
51 case 16:
52 return V_02803C_ADDR_SURF_16_BANK;
53 }
54 }
55
56
57 static unsigned cik_tile_split(unsigned tile_split)
58 {
59 switch (tile_split) {
60 case 64:
61 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_64B;
62 break;
63 case 128:
64 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_128B;
65 break;
66 case 256:
67 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_256B;
68 break;
69 case 512:
70 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_512B;
71 break;
72 default:
73 case 1024:
74 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_1KB;
75 break;
76 case 2048:
77 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_2KB;
78 break;
79 case 4096:
80 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_4KB;
81 break;
82 }
83 return tile_split;
84 }
85
86 static unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect)
87 {
88 switch (macro_tile_aspect) {
89 default:
90 case 1:
91 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_1;
92 break;
93 case 2:
94 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_2;
95 break;
96 case 4:
97 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_4;
98 break;
99 case 8:
100 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_8;
101 break;
102 }
103 return macro_tile_aspect;
104 }
105
106 static unsigned cik_bank_wh(unsigned bankwh)
107 {
108 switch (bankwh) {
109 default:
110 case 1:
111 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_1;
112 break;
113 case 2:
114 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_2;
115 break;
116 case 4:
117 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_4;
118 break;
119 case 8:
120 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_8;
121 break;
122 }
123 return bankwh;
124 }
125
126 static unsigned cik_db_pipe_config(unsigned tile_pipes,
127 unsigned num_rbs)
128 {
129 unsigned pipe_config;
130
131 switch (tile_pipes) {
132 case 8:
133 pipe_config = V_02803C_X_ADDR_SURF_P8_32X32_16X16;
134 break;
135 case 4:
136 default:
137 if (num_rbs == 4)
138 pipe_config = V_02803C_X_ADDR_SURF_P4_16X16;
139 else
140 pipe_config = V_02803C_X_ADDR_SURF_P4_8X16;
141 break;
142 case 2:
143 pipe_config = V_02803C_ADDR_SURF_P2;
144 break;
145 }
146 return pipe_config;
147 }
148
149 /*
150 * inferred framebuffer and blender state
151 */
152 static void si_update_fb_blend_state(struct r600_context *rctx)
153 {
154 struct si_pm4_state *pm4;
155 struct si_state_blend *blend = rctx->queued.named.blend;
156 uint32_t mask;
157
158 if (blend == NULL)
159 return;
160
161 pm4 = si_pm4_alloc_state(rctx);
162 if (pm4 == NULL)
163 return;
164
165 mask = (1ULL << ((unsigned)rctx->framebuffer.nr_cbufs * 4)) - 1;
166 mask &= blend->cb_target_mask;
167 si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
168
169 si_pm4_set_state(rctx, fb_blend, pm4);
170 }
171
172 /*
173 * Blender functions
174 */
175
176 static uint32_t si_translate_blend_function(int blend_func)
177 {
178 switch (blend_func) {
179 case PIPE_BLEND_ADD:
180 return V_028780_COMB_DST_PLUS_SRC;
181 case PIPE_BLEND_SUBTRACT:
182 return V_028780_COMB_SRC_MINUS_DST;
183 case PIPE_BLEND_REVERSE_SUBTRACT:
184 return V_028780_COMB_DST_MINUS_SRC;
185 case PIPE_BLEND_MIN:
186 return V_028780_COMB_MIN_DST_SRC;
187 case PIPE_BLEND_MAX:
188 return V_028780_COMB_MAX_DST_SRC;
189 default:
190 R600_ERR("Unknown blend function %d\n", blend_func);
191 assert(0);
192 break;
193 }
194 return 0;
195 }
196
197 static uint32_t si_translate_blend_factor(int blend_fact)
198 {
199 switch (blend_fact) {
200 case PIPE_BLENDFACTOR_ONE:
201 return V_028780_BLEND_ONE;
202 case PIPE_BLENDFACTOR_SRC_COLOR:
203 return V_028780_BLEND_SRC_COLOR;
204 case PIPE_BLENDFACTOR_SRC_ALPHA:
205 return V_028780_BLEND_SRC_ALPHA;
206 case PIPE_BLENDFACTOR_DST_ALPHA:
207 return V_028780_BLEND_DST_ALPHA;
208 case PIPE_BLENDFACTOR_DST_COLOR:
209 return V_028780_BLEND_DST_COLOR;
210 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
211 return V_028780_BLEND_SRC_ALPHA_SATURATE;
212 case PIPE_BLENDFACTOR_CONST_COLOR:
213 return V_028780_BLEND_CONSTANT_COLOR;
214 case PIPE_BLENDFACTOR_CONST_ALPHA:
215 return V_028780_BLEND_CONSTANT_ALPHA;
216 case PIPE_BLENDFACTOR_ZERO:
217 return V_028780_BLEND_ZERO;
218 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
219 return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
220 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
221 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
222 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
223 return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
224 case PIPE_BLENDFACTOR_INV_DST_COLOR:
225 return V_028780_BLEND_ONE_MINUS_DST_COLOR;
226 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
227 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
228 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
229 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
230 case PIPE_BLENDFACTOR_SRC1_COLOR:
231 return V_028780_BLEND_SRC1_COLOR;
232 case PIPE_BLENDFACTOR_SRC1_ALPHA:
233 return V_028780_BLEND_SRC1_ALPHA;
234 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
235 return V_028780_BLEND_INV_SRC1_COLOR;
236 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
237 return V_028780_BLEND_INV_SRC1_ALPHA;
238 default:
239 R600_ERR("Bad blend factor %d not supported!\n", blend_fact);
240 assert(0);
241 break;
242 }
243 return 0;
244 }
245
246 static void *si_create_blend_state_mode(struct pipe_context *ctx,
247 const struct pipe_blend_state *state,
248 unsigned mode)
249 {
250 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
251 struct si_pm4_state *pm4 = &blend->pm4;
252
253 uint32_t color_control;
254
255 if (blend == NULL)
256 return NULL;
257
258 blend->alpha_to_one = state->alpha_to_one;
259
260 color_control = S_028808_MODE(mode);
261 if (state->logicop_enable) {
262 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
263 } else {
264 color_control |= S_028808_ROP3(0xcc);
265 }
266 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
267
268 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
269 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
270 S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
271 S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
272 S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
273 S_028B70_ALPHA_TO_MASK_OFFSET3(2));
274
275 blend->cb_target_mask = 0;
276 for (int i = 0; i < 8; i++) {
277 /* state->rt entries > 0 only written if independent blending */
278 const int j = state->independent_blend_enable ? i : 0;
279
280 unsigned eqRGB = state->rt[j].rgb_func;
281 unsigned srcRGB = state->rt[j].rgb_src_factor;
282 unsigned dstRGB = state->rt[j].rgb_dst_factor;
283 unsigned eqA = state->rt[j].alpha_func;
284 unsigned srcA = state->rt[j].alpha_src_factor;
285 unsigned dstA = state->rt[j].alpha_dst_factor;
286
287 unsigned blend_cntl = 0;
288
289 /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */
290 blend->cb_target_mask |= state->rt[j].colormask << (4 * i);
291
292 if (!state->rt[j].blend_enable) {
293 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
294 continue;
295 }
296
297 blend_cntl |= S_028780_ENABLE(1);
298 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
299 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
300 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
301
302 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
303 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
304 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
305 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
306 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
307 }
308 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
309 }
310
311 return blend;
312 }
313
314 static void *si_create_blend_state(struct pipe_context *ctx,
315 const struct pipe_blend_state *state)
316 {
317 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
318 }
319
320 static void si_bind_blend_state(struct pipe_context *ctx, void *state)
321 {
322 struct r600_context *rctx = (struct r600_context *)ctx;
323 si_pm4_bind_state(rctx, blend, (struct si_state_blend *)state);
324 si_update_fb_blend_state(rctx);
325 }
326
327 static void si_delete_blend_state(struct pipe_context *ctx, void *state)
328 {
329 struct r600_context *rctx = (struct r600_context *)ctx;
330 si_pm4_delete_state(rctx, blend, (struct si_state_blend *)state);
331 }
332
333 static void si_set_blend_color(struct pipe_context *ctx,
334 const struct pipe_blend_color *state)
335 {
336 struct r600_context *rctx = (struct r600_context *)ctx;
337 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
338
339 if (pm4 == NULL)
340 return;
341
342 si_pm4_set_reg(pm4, R_028414_CB_BLEND_RED, fui(state->color[0]));
343 si_pm4_set_reg(pm4, R_028418_CB_BLEND_GREEN, fui(state->color[1]));
344 si_pm4_set_reg(pm4, R_02841C_CB_BLEND_BLUE, fui(state->color[2]));
345 si_pm4_set_reg(pm4, R_028420_CB_BLEND_ALPHA, fui(state->color[3]));
346
347 si_pm4_set_state(rctx, blend_color, pm4);
348 }
349
350 /*
351 * Clipping, scissors and viewport
352 */
353
354 static void si_set_clip_state(struct pipe_context *ctx,
355 const struct pipe_clip_state *state)
356 {
357 struct r600_context *rctx = (struct r600_context *)ctx;
358 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
359 struct pipe_constant_buffer cb;
360
361 if (pm4 == NULL)
362 return;
363
364 for (int i = 0; i < 6; i++) {
365 si_pm4_set_reg(pm4, R_0285BC_PA_CL_UCP_0_X + i * 16,
366 fui(state->ucp[i][0]));
367 si_pm4_set_reg(pm4, R_0285C0_PA_CL_UCP_0_Y + i * 16,
368 fui(state->ucp[i][1]));
369 si_pm4_set_reg(pm4, R_0285C4_PA_CL_UCP_0_Z + i * 16,
370 fui(state->ucp[i][2]));
371 si_pm4_set_reg(pm4, R_0285C8_PA_CL_UCP_0_W + i * 16,
372 fui(state->ucp[i][3]));
373 }
374
375 cb.buffer = NULL;
376 cb.user_buffer = state->ucp;
377 cb.buffer_offset = 0;
378 cb.buffer_size = 4*4*8;
379 ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, 1, &cb);
380 pipe_resource_reference(&cb.buffer, NULL);
381
382 si_pm4_set_state(rctx, clip, pm4);
383 }
384
385 static void si_set_scissor_states(struct pipe_context *ctx,
386 unsigned start_slot,
387 unsigned num_scissors,
388 const struct pipe_scissor_state *state)
389 {
390 struct r600_context *rctx = (struct r600_context *)ctx;
391 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
392 uint32_t tl, br;
393
394 if (pm4 == NULL)
395 return;
396
397 tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny);
398 br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy);
399 si_pm4_set_reg(pm4, R_028210_PA_SC_CLIPRECT_0_TL, tl);
400 si_pm4_set_reg(pm4, R_028214_PA_SC_CLIPRECT_0_BR, br);
401 si_pm4_set_reg(pm4, R_028218_PA_SC_CLIPRECT_1_TL, tl);
402 si_pm4_set_reg(pm4, R_02821C_PA_SC_CLIPRECT_1_BR, br);
403 si_pm4_set_reg(pm4, R_028220_PA_SC_CLIPRECT_2_TL, tl);
404 si_pm4_set_reg(pm4, R_028224_PA_SC_CLIPRECT_2_BR, br);
405 si_pm4_set_reg(pm4, R_028228_PA_SC_CLIPRECT_3_TL, tl);
406 si_pm4_set_reg(pm4, R_02822C_PA_SC_CLIPRECT_3_BR, br);
407
408 si_pm4_set_state(rctx, scissor, pm4);
409 }
410
411 static void si_set_viewport_states(struct pipe_context *ctx,
412 unsigned start_slot,
413 unsigned num_viewports,
414 const struct pipe_viewport_state *state)
415 {
416 struct r600_context *rctx = (struct r600_context *)ctx;
417 struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport);
418 struct si_pm4_state *pm4 = &viewport->pm4;
419
420 if (viewport == NULL)
421 return;
422
423 viewport->viewport = *state;
424 si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000);
425 si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000);
426 si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]));
427 si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]));
428 si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]));
429 si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]));
430 si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]));
431 si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]));
432 si_pm4_set_reg(pm4, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
433
434 si_pm4_set_state(rctx, viewport, viewport);
435 }
436
437 /*
438 * inferred state between framebuffer and rasterizer
439 */
440 static void si_update_fb_rs_state(struct r600_context *rctx)
441 {
442 struct si_state_rasterizer *rs = rctx->queued.named.rasterizer;
443 struct si_pm4_state *pm4;
444 unsigned offset_db_fmt_cntl = 0, depth;
445 float offset_units;
446
447 if (!rs || !rctx->framebuffer.zsbuf)
448 return;
449
450 offset_units = rctx->queued.named.rasterizer->offset_units;
451 switch (rctx->framebuffer.zsbuf->texture->format) {
452 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
453 case PIPE_FORMAT_X8Z24_UNORM:
454 case PIPE_FORMAT_Z24X8_UNORM:
455 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
456 depth = -24;
457 offset_units *= 2.0f;
458 break;
459 case PIPE_FORMAT_Z32_FLOAT:
460 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
461 depth = -23;
462 offset_units *= 1.0f;
463 offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
464 break;
465 case PIPE_FORMAT_Z16_UNORM:
466 depth = -16;
467 offset_units *= 4.0f;
468 break;
469 default:
470 return;
471 }
472
473 pm4 = si_pm4_alloc_state(rctx);
474
475 if (pm4 == NULL)
476 return;
477
478 /* FIXME some of those reg can be computed with cso */
479 offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
480 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
481 fui(rctx->queued.named.rasterizer->offset_scale));
482 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units));
483 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
484 fui(rctx->queued.named.rasterizer->offset_scale));
485 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units));
486 si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, offset_db_fmt_cntl);
487
488 si_pm4_set_state(rctx, fb_rs, pm4);
489 }
490
491 /*
492 * Rasterizer
493 */
494
495 static uint32_t si_translate_fill(uint32_t func)
496 {
497 switch(func) {
498 case PIPE_POLYGON_MODE_FILL:
499 return V_028814_X_DRAW_TRIANGLES;
500 case PIPE_POLYGON_MODE_LINE:
501 return V_028814_X_DRAW_LINES;
502 case PIPE_POLYGON_MODE_POINT:
503 return V_028814_X_DRAW_POINTS;
504 default:
505 assert(0);
506 return V_028814_X_DRAW_POINTS;
507 }
508 }
509
510 static void *si_create_rs_state(struct pipe_context *ctx,
511 const struct pipe_rasterizer_state *state)
512 {
513 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
514 struct si_pm4_state *pm4 = &rs->pm4;
515 unsigned tmp;
516 unsigned prov_vtx = 1, polygon_dual_mode;
517 unsigned clip_rule;
518 float psize_min, psize_max;
519
520 if (rs == NULL) {
521 return NULL;
522 }
523
524 rs->two_side = state->light_twoside;
525 rs->multisample_enable = state->multisample;
526 rs->clip_plane_enable = state->clip_plane_enable;
527
528 polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL ||
529 state->fill_back != PIPE_POLYGON_MODE_FILL);
530
531 if (state->flatshade_first)
532 prov_vtx = 0;
533
534 rs->flatshade = state->flatshade;
535 rs->sprite_coord_enable = state->sprite_coord_enable;
536 rs->pa_sc_line_stipple = state->line_stipple_enable ?
537 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
538 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
539 rs->pa_su_sc_mode_cntl =
540 S_028814_PROVOKING_VTX_LAST(prov_vtx) |
541 S_028814_CULL_FRONT(state->rasterizer_discard || (state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
542 S_028814_CULL_BACK(state->rasterizer_discard || (state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
543 S_028814_FACE(!state->front_ccw) |
544 S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
545 S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
546 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) |
547 S_028814_POLY_MODE(polygon_dual_mode) |
548 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
549 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back));
550 rs->pa_cl_clip_cntl =
551 S_028810_PS_UCP_MODE(3) |
552 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
553 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
554 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
555
556 clip_rule = state->scissor ? 0xAAAA : 0xFFFF;
557
558 /* offset */
559 rs->offset_units = state->offset_units;
560 rs->offset_scale = state->offset_scale * 12.0f;
561
562 tmp = S_0286D4_FLAT_SHADE_ENA(1);
563 if (state->sprite_coord_enable) {
564 tmp |= S_0286D4_PNT_SPRITE_ENA(1) |
565 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
566 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
567 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
568 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1);
569 if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
570 tmp |= S_0286D4_PNT_SPRITE_TOP_1(1);
571 }
572 }
573 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, tmp);
574
575 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0x00000000);
576 /* point size 12.4 fixed point */
577 tmp = (unsigned)(state->point_size * 8.0);
578 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
579
580 if (state->point_size_per_vertex) {
581 psize_min = util_get_min_point_size(state);
582 psize_max = 8192;
583 } else {
584 /* Force the point size to be as if the vertex output was disabled. */
585 psize_min = state->point_size;
586 psize_max = state->point_size;
587 }
588 /* Divide by two, because 0.5 = 1 pixel. */
589 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,
590 S_028A04_MIN_SIZE(r600_pack_float_12p4(psize_min/2)) |
591 S_028A04_MAX_SIZE(r600_pack_float_12p4(psize_max/2)));
592
593 tmp = (unsigned)state->line_width * 8;
594 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp));
595 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0,
596 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
597 S_028A48_MSAA_ENABLE(state->multisample));
598
599 si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL,
600 S_028BE4_PIX_CENTER(state->half_pixel_center) |
601 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
602 si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000);
603 si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000);
604 si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000);
605 si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000);
606
607 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));
608 si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule);
609
610 return rs;
611 }
612
613 static void si_bind_rs_state(struct pipe_context *ctx, void *state)
614 {
615 struct r600_context *rctx = (struct r600_context *)ctx;
616 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
617
618 if (state == NULL)
619 return;
620
621 // TODO
622 rctx->sprite_coord_enable = rs->sprite_coord_enable;
623 rctx->pa_sc_line_stipple = rs->pa_sc_line_stipple;
624 rctx->pa_su_sc_mode_cntl = rs->pa_su_sc_mode_cntl;
625
626 si_pm4_bind_state(rctx, rasterizer, rs);
627 si_update_fb_rs_state(rctx);
628 }
629
630 static void si_delete_rs_state(struct pipe_context *ctx, void *state)
631 {
632 struct r600_context *rctx = (struct r600_context *)ctx;
633 si_pm4_delete_state(rctx, rasterizer, (struct si_state_rasterizer *)state);
634 }
635
636 /*
637 * infeered state between dsa and stencil ref
638 */
639 static void si_update_dsa_stencil_ref(struct r600_context *rctx)
640 {
641 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
642 struct pipe_stencil_ref *ref = &rctx->stencil_ref;
643 struct si_state_dsa *dsa = rctx->queued.named.dsa;
644
645 if (pm4 == NULL)
646 return;
647
648 si_pm4_set_reg(pm4, R_028430_DB_STENCILREFMASK,
649 S_028430_STENCILTESTVAL(ref->ref_value[0]) |
650 S_028430_STENCILMASK(dsa->valuemask[0]) |
651 S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
652 S_028430_STENCILOPVAL(1));
653 si_pm4_set_reg(pm4, R_028434_DB_STENCILREFMASK_BF,
654 S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
655 S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
656 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
657 S_028434_STENCILOPVAL_BF(1));
658
659 si_pm4_set_state(rctx, dsa_stencil_ref, pm4);
660 }
661
662 static void si_set_pipe_stencil_ref(struct pipe_context *ctx,
663 const struct pipe_stencil_ref *state)
664 {
665 struct r600_context *rctx = (struct r600_context *)ctx;
666 rctx->stencil_ref = *state;
667 si_update_dsa_stencil_ref(rctx);
668 }
669
670
671 /*
672 * DSA
673 */
674
675 static uint32_t si_translate_stencil_op(int s_op)
676 {
677 switch (s_op) {
678 case PIPE_STENCIL_OP_KEEP:
679 return V_02842C_STENCIL_KEEP;
680 case PIPE_STENCIL_OP_ZERO:
681 return V_02842C_STENCIL_ZERO;
682 case PIPE_STENCIL_OP_REPLACE:
683 return V_02842C_STENCIL_REPLACE_TEST;
684 case PIPE_STENCIL_OP_INCR:
685 return V_02842C_STENCIL_ADD_CLAMP;
686 case PIPE_STENCIL_OP_DECR:
687 return V_02842C_STENCIL_SUB_CLAMP;
688 case PIPE_STENCIL_OP_INCR_WRAP:
689 return V_02842C_STENCIL_ADD_WRAP;
690 case PIPE_STENCIL_OP_DECR_WRAP:
691 return V_02842C_STENCIL_SUB_WRAP;
692 case PIPE_STENCIL_OP_INVERT:
693 return V_02842C_STENCIL_INVERT;
694 default:
695 R600_ERR("Unknown stencil op %d", s_op);
696 assert(0);
697 break;
698 }
699 return 0;
700 }
701
702 static void *si_create_dsa_state(struct pipe_context *ctx,
703 const struct pipe_depth_stencil_alpha_state *state)
704 {
705 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
706 struct si_pm4_state *pm4 = &dsa->pm4;
707 unsigned db_depth_control;
708 unsigned db_render_override, db_render_control;
709 uint32_t db_stencil_control = 0;
710
711 if (dsa == NULL) {
712 return NULL;
713 }
714
715 dsa->valuemask[0] = state->stencil[0].valuemask;
716 dsa->valuemask[1] = state->stencil[1].valuemask;
717 dsa->writemask[0] = state->stencil[0].writemask;
718 dsa->writemask[1] = state->stencil[1].writemask;
719
720 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
721 S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
722 S_028800_ZFUNC(state->depth.func);
723
724 /* stencil */
725 if (state->stencil[0].enabled) {
726 db_depth_control |= S_028800_STENCIL_ENABLE(1);
727 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func);
728 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op));
729 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op));
730 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op));
731
732 if (state->stencil[1].enabled) {
733 db_depth_control |= S_028800_BACKFACE_ENABLE(1);
734 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func);
735 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op));
736 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op));
737 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op));
738 }
739 }
740
741 /* alpha */
742 if (state->alpha.enabled) {
743 dsa->alpha_func = state->alpha.func;
744 dsa->alpha_ref = state->alpha.ref_value;
745 } else {
746 dsa->alpha_func = PIPE_FUNC_ALWAYS;
747 }
748
749 /* misc */
750 db_render_control = 0;
751 db_render_override = S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) |
752 S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
753 S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
754 /* TODO db_render_override depends on query */
755 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 0x00000000);
756 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 0x00000000);
757 si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0x00000000);
758 si_pm4_set_reg(pm4, R_02802C_DB_DEPTH_CLEAR, 0x3F800000);
759 //si_pm4_set_reg(pm4, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control);
760 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
761 si_pm4_set_reg(pm4, R_028000_DB_RENDER_CONTROL, db_render_control);
762 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
763 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
764 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
765 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
766 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
767 dsa->db_render_override = db_render_override;
768
769 return dsa;
770 }
771
772 static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
773 {
774 struct r600_context *rctx = (struct r600_context *)ctx;
775 struct si_state_dsa *dsa = state;
776
777 if (state == NULL)
778 return;
779
780 si_pm4_bind_state(rctx, dsa, dsa);
781 si_update_dsa_stencil_ref(rctx);
782 }
783
784 static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
785 {
786 struct r600_context *rctx = (struct r600_context *)ctx;
787 si_pm4_delete_state(rctx, dsa, (struct si_state_dsa *)state);
788 }
789
790 static void *si_create_db_flush_dsa(struct r600_context *rctx, bool copy_depth,
791 bool copy_stencil, int sample)
792 {
793 struct pipe_depth_stencil_alpha_state dsa;
794 struct si_state_dsa *state;
795
796 memset(&dsa, 0, sizeof(dsa));
797
798 state = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa);
799 if (copy_depth || copy_stencil) {
800 si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL,
801 S_028000_DEPTH_COPY(copy_depth) |
802 S_028000_STENCIL_COPY(copy_stencil) |
803 S_028000_COPY_CENTROID(1) |
804 S_028000_COPY_SAMPLE(sample));
805 } else {
806 si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL,
807 S_028000_DEPTH_COMPRESS_DISABLE(1) |
808 S_028000_STENCIL_COMPRESS_DISABLE(1));
809 si_pm4_set_reg(&state->pm4, R_02800C_DB_RENDER_OVERRIDE,
810 S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) |
811 S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
812 S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) |
813 S_02800C_DISABLE_TILE_RATE_TILES(1));
814 }
815
816 return state;
817 }
818
819 /*
820 * format translation
821 */
822 static uint32_t si_translate_colorformat(enum pipe_format format)
823 {
824 switch (format) {
825 /* 8-bit buffers. */
826 case PIPE_FORMAT_A8_UNORM:
827 case PIPE_FORMAT_A8_SNORM:
828 case PIPE_FORMAT_A8_UINT:
829 case PIPE_FORMAT_A8_SINT:
830 case PIPE_FORMAT_I8_UNORM:
831 case PIPE_FORMAT_I8_SNORM:
832 case PIPE_FORMAT_I8_UINT:
833 case PIPE_FORMAT_I8_SINT:
834 case PIPE_FORMAT_L8_UNORM:
835 case PIPE_FORMAT_L8_SNORM:
836 case PIPE_FORMAT_L8_UINT:
837 case PIPE_FORMAT_L8_SINT:
838 case PIPE_FORMAT_L8_SRGB:
839 case PIPE_FORMAT_R8_UNORM:
840 case PIPE_FORMAT_R8_SNORM:
841 case PIPE_FORMAT_R8_UINT:
842 case PIPE_FORMAT_R8_SINT:
843 return V_028C70_COLOR_8;
844
845 /* 16-bit buffers. */
846 case PIPE_FORMAT_B5G6R5_UNORM:
847 return V_028C70_COLOR_5_6_5;
848
849 case PIPE_FORMAT_B5G5R5A1_UNORM:
850 case PIPE_FORMAT_B5G5R5X1_UNORM:
851 return V_028C70_COLOR_1_5_5_5;
852
853 case PIPE_FORMAT_B4G4R4A4_UNORM:
854 case PIPE_FORMAT_B4G4R4X4_UNORM:
855 return V_028C70_COLOR_4_4_4_4;
856
857 case PIPE_FORMAT_L8A8_UNORM:
858 case PIPE_FORMAT_L8A8_SNORM:
859 case PIPE_FORMAT_L8A8_UINT:
860 case PIPE_FORMAT_L8A8_SINT:
861 case PIPE_FORMAT_R8G8_SNORM:
862 case PIPE_FORMAT_R8G8_UNORM:
863 case PIPE_FORMAT_R8G8_UINT:
864 case PIPE_FORMAT_R8G8_SINT:
865 return V_028C70_COLOR_8_8;
866
867 case PIPE_FORMAT_Z16_UNORM:
868 case PIPE_FORMAT_R16_UNORM:
869 case PIPE_FORMAT_R16_SNORM:
870 case PIPE_FORMAT_R16_UINT:
871 case PIPE_FORMAT_R16_SINT:
872 case PIPE_FORMAT_R16_FLOAT:
873 case PIPE_FORMAT_L16_UNORM:
874 case PIPE_FORMAT_L16_SNORM:
875 case PIPE_FORMAT_L16_FLOAT:
876 case PIPE_FORMAT_I16_UNORM:
877 case PIPE_FORMAT_I16_SNORM:
878 case PIPE_FORMAT_I16_FLOAT:
879 case PIPE_FORMAT_A16_UNORM:
880 case PIPE_FORMAT_A16_SNORM:
881 case PIPE_FORMAT_A16_FLOAT:
882 return V_028C70_COLOR_16;
883
884 /* 32-bit buffers. */
885 case PIPE_FORMAT_A8B8G8R8_SRGB:
886 case PIPE_FORMAT_A8B8G8R8_UNORM:
887 case PIPE_FORMAT_A8R8G8B8_UNORM:
888 case PIPE_FORMAT_B8G8R8A8_SRGB:
889 case PIPE_FORMAT_B8G8R8A8_UNORM:
890 case PIPE_FORMAT_B8G8R8X8_UNORM:
891 case PIPE_FORMAT_R8G8B8A8_SNORM:
892 case PIPE_FORMAT_R8G8B8A8_UNORM:
893 case PIPE_FORMAT_R8G8B8X8_UNORM:
894 case PIPE_FORMAT_R8G8B8X8_SNORM:
895 case PIPE_FORMAT_R8G8B8X8_SRGB:
896 case PIPE_FORMAT_R8G8B8X8_UINT:
897 case PIPE_FORMAT_R8G8B8X8_SINT:
898 case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
899 case PIPE_FORMAT_X8B8G8R8_UNORM:
900 case PIPE_FORMAT_X8R8G8B8_UNORM:
901 case PIPE_FORMAT_R8G8B8A8_SSCALED:
902 case PIPE_FORMAT_R8G8B8A8_USCALED:
903 case PIPE_FORMAT_R8G8B8A8_SINT:
904 case PIPE_FORMAT_R8G8B8A8_UINT:
905 return V_028C70_COLOR_8_8_8_8;
906
907 case PIPE_FORMAT_R10G10B10A2_UNORM:
908 case PIPE_FORMAT_R10G10B10X2_SNORM:
909 case PIPE_FORMAT_B10G10R10A2_UNORM:
910 case PIPE_FORMAT_B10G10R10A2_UINT:
911 case PIPE_FORMAT_B10G10R10X2_UNORM:
912 case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
913 return V_028C70_COLOR_2_10_10_10;
914
915 case PIPE_FORMAT_Z24X8_UNORM:
916 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
917 return V_028C70_COLOR_8_24;
918
919 case PIPE_FORMAT_S8X24_UINT:
920 case PIPE_FORMAT_X8Z24_UNORM:
921 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
922 return V_028C70_COLOR_24_8;
923
924 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
925 return V_028C70_COLOR_X24_8_32_FLOAT;
926
927 case PIPE_FORMAT_I32_FLOAT:
928 case PIPE_FORMAT_L32_FLOAT:
929 case PIPE_FORMAT_R32_FLOAT:
930 case PIPE_FORMAT_A32_FLOAT:
931 case PIPE_FORMAT_Z32_FLOAT:
932 return V_028C70_COLOR_32;
933
934 case PIPE_FORMAT_L16A16_UNORM:
935 case PIPE_FORMAT_L16A16_SNORM:
936 case PIPE_FORMAT_L16A16_FLOAT:
937 case PIPE_FORMAT_R16G16_SSCALED:
938 case PIPE_FORMAT_R16G16_UNORM:
939 case PIPE_FORMAT_R16G16_SNORM:
940 case PIPE_FORMAT_R16G16_UINT:
941 case PIPE_FORMAT_R16G16_SINT:
942 case PIPE_FORMAT_R16G16_FLOAT:
943 return V_028C70_COLOR_16_16;
944
945 case PIPE_FORMAT_R11G11B10_FLOAT:
946 return V_028C70_COLOR_10_11_11;
947
948 /* 64-bit buffers. */
949 case PIPE_FORMAT_R16G16B16A16_UINT:
950 case PIPE_FORMAT_R16G16B16A16_SINT:
951 case PIPE_FORMAT_R16G16B16A16_USCALED:
952 case PIPE_FORMAT_R16G16B16A16_SSCALED:
953 case PIPE_FORMAT_R16G16B16A16_UNORM:
954 case PIPE_FORMAT_R16G16B16A16_SNORM:
955 case PIPE_FORMAT_R16G16B16A16_FLOAT:
956 case PIPE_FORMAT_R16G16B16X16_UNORM:
957 case PIPE_FORMAT_R16G16B16X16_SNORM:
958 case PIPE_FORMAT_R16G16B16X16_FLOAT:
959 case PIPE_FORMAT_R16G16B16X16_UINT:
960 case PIPE_FORMAT_R16G16B16X16_SINT:
961 return V_028C70_COLOR_16_16_16_16;
962
963 case PIPE_FORMAT_L32A32_FLOAT:
964 case PIPE_FORMAT_L32A32_UINT:
965 case PIPE_FORMAT_L32A32_SINT:
966 case PIPE_FORMAT_R32G32_FLOAT:
967 case PIPE_FORMAT_R32G32_USCALED:
968 case PIPE_FORMAT_R32G32_SSCALED:
969 case PIPE_FORMAT_R32G32_SINT:
970 case PIPE_FORMAT_R32G32_UINT:
971 return V_028C70_COLOR_32_32;
972
973 /* 128-bit buffers. */
974 case PIPE_FORMAT_R32G32B32A32_SNORM:
975 case PIPE_FORMAT_R32G32B32A32_UNORM:
976 case PIPE_FORMAT_R32G32B32A32_SSCALED:
977 case PIPE_FORMAT_R32G32B32A32_USCALED:
978 case PIPE_FORMAT_R32G32B32A32_SINT:
979 case PIPE_FORMAT_R32G32B32A32_UINT:
980 case PIPE_FORMAT_R32G32B32A32_FLOAT:
981 case PIPE_FORMAT_R32G32B32X32_FLOAT:
982 case PIPE_FORMAT_R32G32B32X32_UINT:
983 case PIPE_FORMAT_R32G32B32X32_SINT:
984 return V_028C70_COLOR_32_32_32_32;
985
986 /* YUV buffers. */
987 case PIPE_FORMAT_UYVY:
988 case PIPE_FORMAT_YUYV:
989 /* 96-bit buffers. */
990 case PIPE_FORMAT_R32G32B32_FLOAT:
991 /* 8-bit buffers. */
992 case PIPE_FORMAT_L4A4_UNORM:
993 case PIPE_FORMAT_R4A4_UNORM:
994 case PIPE_FORMAT_A4R4_UNORM:
995 default:
996 return V_028C70_COLOR_INVALID; /* Unsupported. */
997 }
998 }
999
1000 static uint32_t si_translate_colorswap(enum pipe_format format)
1001 {
1002 switch (format) {
1003 /* 8-bit buffers. */
1004 case PIPE_FORMAT_L4A4_UNORM:
1005 case PIPE_FORMAT_A4R4_UNORM:
1006 return V_028C70_SWAP_ALT;
1007
1008 case PIPE_FORMAT_A8_UNORM:
1009 case PIPE_FORMAT_A8_SNORM:
1010 case PIPE_FORMAT_A8_UINT:
1011 case PIPE_FORMAT_A8_SINT:
1012 case PIPE_FORMAT_R4A4_UNORM:
1013 return V_028C70_SWAP_ALT_REV;
1014 case PIPE_FORMAT_I8_UNORM:
1015 case PIPE_FORMAT_I8_SNORM:
1016 case PIPE_FORMAT_L8_UNORM:
1017 case PIPE_FORMAT_L8_SNORM:
1018 case PIPE_FORMAT_I8_UINT:
1019 case PIPE_FORMAT_I8_SINT:
1020 case PIPE_FORMAT_L8_UINT:
1021 case PIPE_FORMAT_L8_SINT:
1022 case PIPE_FORMAT_L8_SRGB:
1023 case PIPE_FORMAT_R8_UNORM:
1024 case PIPE_FORMAT_R8_SNORM:
1025 case PIPE_FORMAT_R8_UINT:
1026 case PIPE_FORMAT_R8_SINT:
1027 return V_028C70_SWAP_STD;
1028
1029 /* 16-bit buffers. */
1030 case PIPE_FORMAT_B5G6R5_UNORM:
1031 return V_028C70_SWAP_STD_REV;
1032
1033 case PIPE_FORMAT_B5G5R5A1_UNORM:
1034 case PIPE_FORMAT_B5G5R5X1_UNORM:
1035 return V_028C70_SWAP_ALT;
1036
1037 case PIPE_FORMAT_B4G4R4A4_UNORM:
1038 case PIPE_FORMAT_B4G4R4X4_UNORM:
1039 return V_028C70_SWAP_ALT;
1040
1041 case PIPE_FORMAT_Z16_UNORM:
1042 return V_028C70_SWAP_STD;
1043
1044 case PIPE_FORMAT_L8A8_UNORM:
1045 case PIPE_FORMAT_L8A8_SNORM:
1046 case PIPE_FORMAT_L8A8_UINT:
1047 case PIPE_FORMAT_L8A8_SINT:
1048 return V_028C70_SWAP_ALT;
1049 case PIPE_FORMAT_R8G8_SNORM:
1050 case PIPE_FORMAT_R8G8_UNORM:
1051 case PIPE_FORMAT_R8G8_UINT:
1052 case PIPE_FORMAT_R8G8_SINT:
1053 return V_028C70_SWAP_STD;
1054
1055 case PIPE_FORMAT_I16_UNORM:
1056 case PIPE_FORMAT_I16_SNORM:
1057 case PIPE_FORMAT_I16_FLOAT:
1058 case PIPE_FORMAT_L16_UNORM:
1059 case PIPE_FORMAT_L16_SNORM:
1060 case PIPE_FORMAT_L16_FLOAT:
1061 case PIPE_FORMAT_R16_UNORM:
1062 case PIPE_FORMAT_R16_SNORM:
1063 case PIPE_FORMAT_R16_UINT:
1064 case PIPE_FORMAT_R16_SINT:
1065 case PIPE_FORMAT_R16_FLOAT:
1066 return V_028C70_SWAP_STD;
1067
1068 case PIPE_FORMAT_A16_UNORM:
1069 case PIPE_FORMAT_A16_SNORM:
1070 case PIPE_FORMAT_A16_FLOAT:
1071 return V_028C70_SWAP_ALT_REV;
1072
1073 /* 32-bit buffers. */
1074 case PIPE_FORMAT_A8B8G8R8_SRGB:
1075 return V_028C70_SWAP_STD_REV;
1076 case PIPE_FORMAT_B8G8R8A8_SRGB:
1077 return V_028C70_SWAP_ALT;
1078
1079 case PIPE_FORMAT_B8G8R8A8_UNORM:
1080 case PIPE_FORMAT_B8G8R8X8_UNORM:
1081 return V_028C70_SWAP_ALT;
1082
1083 case PIPE_FORMAT_A8R8G8B8_UNORM:
1084 case PIPE_FORMAT_X8R8G8B8_UNORM:
1085 return V_028C70_SWAP_ALT_REV;
1086 case PIPE_FORMAT_R8G8B8A8_SNORM:
1087 case PIPE_FORMAT_R8G8B8A8_UNORM:
1088 case PIPE_FORMAT_R8G8B8A8_SSCALED:
1089 case PIPE_FORMAT_R8G8B8A8_USCALED:
1090 case PIPE_FORMAT_R8G8B8A8_SINT:
1091 case PIPE_FORMAT_R8G8B8A8_UINT:
1092 case PIPE_FORMAT_R8G8B8X8_UNORM:
1093 case PIPE_FORMAT_R8G8B8X8_SNORM:
1094 case PIPE_FORMAT_R8G8B8X8_SRGB:
1095 case PIPE_FORMAT_R8G8B8X8_UINT:
1096 case PIPE_FORMAT_R8G8B8X8_SINT:
1097 return V_028C70_SWAP_STD;
1098
1099 case PIPE_FORMAT_A8B8G8R8_UNORM:
1100 case PIPE_FORMAT_X8B8G8R8_UNORM:
1101 /* case PIPE_FORMAT_R8SG8SB8UX8U_NORM: */
1102 return V_028C70_SWAP_STD_REV;
1103
1104 case PIPE_FORMAT_Z24X8_UNORM:
1105 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1106 return V_028C70_SWAP_STD;
1107
1108 case PIPE_FORMAT_S8X24_UINT:
1109 case PIPE_FORMAT_X8Z24_UNORM:
1110 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1111 return V_028C70_SWAP_STD_REV;
1112
1113 case PIPE_FORMAT_R10G10B10A2_UNORM:
1114 case PIPE_FORMAT_R10G10B10X2_SNORM:
1115 case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
1116 return V_028C70_SWAP_STD;
1117
1118 case PIPE_FORMAT_B10G10R10A2_UNORM:
1119 case PIPE_FORMAT_B10G10R10A2_UINT:
1120 case PIPE_FORMAT_B10G10R10X2_UNORM:
1121 return V_028C70_SWAP_ALT;
1122
1123 case PIPE_FORMAT_R11G11B10_FLOAT:
1124 case PIPE_FORMAT_I32_FLOAT:
1125 case PIPE_FORMAT_L32_FLOAT:
1126 case PIPE_FORMAT_R32_FLOAT:
1127 case PIPE_FORMAT_R32_UINT:
1128 case PIPE_FORMAT_R32_SINT:
1129 case PIPE_FORMAT_Z32_FLOAT:
1130 case PIPE_FORMAT_R16G16_FLOAT:
1131 case PIPE_FORMAT_R16G16_UNORM:
1132 case PIPE_FORMAT_R16G16_SNORM:
1133 case PIPE_FORMAT_R16G16_UINT:
1134 case PIPE_FORMAT_R16G16_SINT:
1135 return V_028C70_SWAP_STD;
1136
1137 case PIPE_FORMAT_L16A16_UNORM:
1138 case PIPE_FORMAT_L16A16_SNORM:
1139 case PIPE_FORMAT_L16A16_FLOAT:
1140 return V_028C70_SWAP_ALT;
1141
1142 case PIPE_FORMAT_A32_FLOAT:
1143 return V_028C70_SWAP_ALT_REV;
1144
1145 /* 64-bit buffers. */
1146 case PIPE_FORMAT_R32G32_FLOAT:
1147 case PIPE_FORMAT_R32G32_UINT:
1148 case PIPE_FORMAT_R32G32_SINT:
1149 case PIPE_FORMAT_R16G16B16A16_UNORM:
1150 case PIPE_FORMAT_R16G16B16A16_SNORM:
1151 case PIPE_FORMAT_R16G16B16A16_USCALED:
1152 case PIPE_FORMAT_R16G16B16A16_SSCALED:
1153 case PIPE_FORMAT_R16G16B16A16_UINT:
1154 case PIPE_FORMAT_R16G16B16A16_SINT:
1155 case PIPE_FORMAT_R16G16B16A16_FLOAT:
1156 case PIPE_FORMAT_R16G16B16X16_UNORM:
1157 case PIPE_FORMAT_R16G16B16X16_SNORM:
1158 case PIPE_FORMAT_R16G16B16X16_FLOAT:
1159 case PIPE_FORMAT_R16G16B16X16_UINT:
1160 case PIPE_FORMAT_R16G16B16X16_SINT:
1161 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1162 return V_028C70_SWAP_STD;
1163
1164 case PIPE_FORMAT_L32A32_FLOAT:
1165 case PIPE_FORMAT_L32A32_UINT:
1166 case PIPE_FORMAT_L32A32_SINT:
1167 return V_028C70_SWAP_ALT;
1168
1169 /* 128-bit buffers. */
1170 case PIPE_FORMAT_R32G32B32A32_FLOAT:
1171 case PIPE_FORMAT_R32G32B32A32_SNORM:
1172 case PIPE_FORMAT_R32G32B32A32_UNORM:
1173 case PIPE_FORMAT_R32G32B32A32_SSCALED:
1174 case PIPE_FORMAT_R32G32B32A32_USCALED:
1175 case PIPE_FORMAT_R32G32B32A32_SINT:
1176 case PIPE_FORMAT_R32G32B32A32_UINT:
1177 case PIPE_FORMAT_R32G32B32X32_FLOAT:
1178 case PIPE_FORMAT_R32G32B32X32_UINT:
1179 case PIPE_FORMAT_R32G32B32X32_SINT:
1180 return V_028C70_SWAP_STD;
1181 default:
1182 R600_ERR("unsupported colorswap format %d\n", format);
1183 return ~0U;
1184 }
1185 return ~0U;
1186 }
1187
1188 static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
1189 {
1190 if (R600_BIG_ENDIAN) {
1191 switch(colorformat) {
1192 /* 8-bit buffers. */
1193 case V_028C70_COLOR_8:
1194 return V_028C70_ENDIAN_NONE;
1195
1196 /* 16-bit buffers. */
1197 case V_028C70_COLOR_5_6_5:
1198 case V_028C70_COLOR_1_5_5_5:
1199 case V_028C70_COLOR_4_4_4_4:
1200 case V_028C70_COLOR_16:
1201 case V_028C70_COLOR_8_8:
1202 return V_028C70_ENDIAN_8IN16;
1203
1204 /* 32-bit buffers. */
1205 case V_028C70_COLOR_8_8_8_8:
1206 case V_028C70_COLOR_2_10_10_10:
1207 case V_028C70_COLOR_8_24:
1208 case V_028C70_COLOR_24_8:
1209 case V_028C70_COLOR_16_16:
1210 return V_028C70_ENDIAN_8IN32;
1211
1212 /* 64-bit buffers. */
1213 case V_028C70_COLOR_16_16_16_16:
1214 return V_028C70_ENDIAN_8IN16;
1215
1216 case V_028C70_COLOR_32_32:
1217 return V_028C70_ENDIAN_8IN32;
1218
1219 /* 128-bit buffers. */
1220 case V_028C70_COLOR_32_32_32_32:
1221 return V_028C70_ENDIAN_8IN32;
1222 default:
1223 return V_028C70_ENDIAN_NONE; /* Unsupported. */
1224 }
1225 } else {
1226 return V_028C70_ENDIAN_NONE;
1227 }
1228 }
1229
1230 /* Returns the size in bits of the widest component of a CB format */
1231 static unsigned si_colorformat_max_comp_size(uint32_t colorformat)
1232 {
1233 switch(colorformat) {
1234 case V_028C70_COLOR_4_4_4_4:
1235 return 4;
1236
1237 case V_028C70_COLOR_1_5_5_5:
1238 case V_028C70_COLOR_5_5_5_1:
1239 return 5;
1240
1241 case V_028C70_COLOR_5_6_5:
1242 return 6;
1243
1244 case V_028C70_COLOR_8:
1245 case V_028C70_COLOR_8_8:
1246 case V_028C70_COLOR_8_8_8_8:
1247 return 8;
1248
1249 case V_028C70_COLOR_10_10_10_2:
1250 case V_028C70_COLOR_2_10_10_10:
1251 return 10;
1252
1253 case V_028C70_COLOR_10_11_11:
1254 case V_028C70_COLOR_11_11_10:
1255 return 11;
1256
1257 case V_028C70_COLOR_16:
1258 case V_028C70_COLOR_16_16:
1259 case V_028C70_COLOR_16_16_16_16:
1260 return 16;
1261
1262 case V_028C70_COLOR_8_24:
1263 case V_028C70_COLOR_24_8:
1264 return 24;
1265
1266 case V_028C70_COLOR_32:
1267 case V_028C70_COLOR_32_32:
1268 case V_028C70_COLOR_32_32_32_32:
1269 case V_028C70_COLOR_X24_8_32_FLOAT:
1270 return 32;
1271 }
1272
1273 assert(!"Unknown maximum component size");
1274 return 0;
1275 }
1276
1277 static uint32_t si_translate_dbformat(enum pipe_format format)
1278 {
1279 switch (format) {
1280 case PIPE_FORMAT_Z16_UNORM:
1281 return V_028040_Z_16;
1282 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1283 case PIPE_FORMAT_X8Z24_UNORM:
1284 case PIPE_FORMAT_Z24X8_UNORM:
1285 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1286 return V_028040_Z_24; /* deprecated on SI */
1287 case PIPE_FORMAT_Z32_FLOAT:
1288 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1289 return V_028040_Z_32_FLOAT;
1290 default:
1291 return V_028040_Z_INVALID;
1292 }
1293 }
1294
1295 /*
1296 * Texture translation
1297 */
1298
1299 static uint32_t si_translate_texformat(struct pipe_screen *screen,
1300 enum pipe_format format,
1301 const struct util_format_description *desc,
1302 int first_non_void)
1303 {
1304 struct r600_screen *rscreen = (struct r600_screen*)screen;
1305 bool enable_s3tc = rscreen->info.drm_minor >= 31;
1306 boolean uniform = TRUE;
1307 int i;
1308
1309 /* Colorspace (return non-RGB formats directly). */
1310 switch (desc->colorspace) {
1311 /* Depth stencil formats */
1312 case UTIL_FORMAT_COLORSPACE_ZS:
1313 switch (format) {
1314 case PIPE_FORMAT_Z16_UNORM:
1315 return V_008F14_IMG_DATA_FORMAT_16;
1316 case PIPE_FORMAT_X24S8_UINT:
1317 case PIPE_FORMAT_Z24X8_UNORM:
1318 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1319 return V_008F14_IMG_DATA_FORMAT_8_24;
1320 case PIPE_FORMAT_X8Z24_UNORM:
1321 case PIPE_FORMAT_S8X24_UINT:
1322 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1323 return V_008F14_IMG_DATA_FORMAT_24_8;
1324 case PIPE_FORMAT_S8_UINT:
1325 return V_008F14_IMG_DATA_FORMAT_8;
1326 case PIPE_FORMAT_Z32_FLOAT:
1327 return V_008F14_IMG_DATA_FORMAT_32;
1328 case PIPE_FORMAT_X32_S8X24_UINT:
1329 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1330 return V_008F14_IMG_DATA_FORMAT_X24_8_32;
1331 default:
1332 goto out_unknown;
1333 }
1334
1335 case UTIL_FORMAT_COLORSPACE_YUV:
1336 goto out_unknown; /* TODO */
1337
1338 case UTIL_FORMAT_COLORSPACE_SRGB:
1339 if (desc->nr_channels != 4 && desc->nr_channels != 1)
1340 goto out_unknown;
1341 break;
1342
1343 default:
1344 break;
1345 }
1346
1347 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
1348 if (!enable_s3tc)
1349 goto out_unknown;
1350
1351 switch (format) {
1352 case PIPE_FORMAT_RGTC1_SNORM:
1353 case PIPE_FORMAT_LATC1_SNORM:
1354 case PIPE_FORMAT_RGTC1_UNORM:
1355 case PIPE_FORMAT_LATC1_UNORM:
1356 return V_008F14_IMG_DATA_FORMAT_BC4;
1357 case PIPE_FORMAT_RGTC2_SNORM:
1358 case PIPE_FORMAT_LATC2_SNORM:
1359 case PIPE_FORMAT_RGTC2_UNORM:
1360 case PIPE_FORMAT_LATC2_UNORM:
1361 return V_008F14_IMG_DATA_FORMAT_BC5;
1362 default:
1363 goto out_unknown;
1364 }
1365 }
1366
1367 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1368
1369 if (!enable_s3tc)
1370 goto out_unknown;
1371
1372 if (!util_format_s3tc_enabled) {
1373 goto out_unknown;
1374 }
1375
1376 switch (format) {
1377 case PIPE_FORMAT_DXT1_RGB:
1378 case PIPE_FORMAT_DXT1_RGBA:
1379 case PIPE_FORMAT_DXT1_SRGB:
1380 case PIPE_FORMAT_DXT1_SRGBA:
1381 return V_008F14_IMG_DATA_FORMAT_BC1;
1382 case PIPE_FORMAT_DXT3_RGBA:
1383 case PIPE_FORMAT_DXT3_SRGBA:
1384 return V_008F14_IMG_DATA_FORMAT_BC2;
1385 case PIPE_FORMAT_DXT5_RGBA:
1386 case PIPE_FORMAT_DXT5_SRGBA:
1387 return V_008F14_IMG_DATA_FORMAT_BC3;
1388 default:
1389 goto out_unknown;
1390 }
1391 }
1392
1393 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
1394 return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
1395 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
1396 return V_008F14_IMG_DATA_FORMAT_10_11_11;
1397 }
1398
1399 /* R8G8Bx_SNORM - TODO CxV8U8 */
1400
1401 /* See whether the components are of the same size. */
1402 for (i = 1; i < desc->nr_channels; i++) {
1403 uniform = uniform && desc->channel[0].size == desc->channel[i].size;
1404 }
1405
1406 /* Non-uniform formats. */
1407 if (!uniform) {
1408 switch(desc->nr_channels) {
1409 case 3:
1410 if (desc->channel[0].size == 5 &&
1411 desc->channel[1].size == 6 &&
1412 desc->channel[2].size == 5) {
1413 return V_008F14_IMG_DATA_FORMAT_5_6_5;
1414 }
1415 goto out_unknown;
1416 case 4:
1417 if (desc->channel[0].size == 5 &&
1418 desc->channel[1].size == 5 &&
1419 desc->channel[2].size == 5 &&
1420 desc->channel[3].size == 1) {
1421 return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
1422 }
1423 if (desc->channel[0].size == 10 &&
1424 desc->channel[1].size == 10 &&
1425 desc->channel[2].size == 10 &&
1426 desc->channel[3].size == 2) {
1427 return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
1428 }
1429 goto out_unknown;
1430 }
1431 goto out_unknown;
1432 }
1433
1434 if (first_non_void < 0 || first_non_void > 3)
1435 goto out_unknown;
1436
1437 /* uniform formats */
1438 switch (desc->channel[first_non_void].size) {
1439 case 4:
1440 switch (desc->nr_channels) {
1441 #if 0 /* Not supported for render targets */
1442 case 2:
1443 return V_008F14_IMG_DATA_FORMAT_4_4;
1444 #endif
1445 case 4:
1446 return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
1447 }
1448 break;
1449 case 8:
1450 switch (desc->nr_channels) {
1451 case 1:
1452 return V_008F14_IMG_DATA_FORMAT_8;
1453 case 2:
1454 return V_008F14_IMG_DATA_FORMAT_8_8;
1455 case 4:
1456 return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
1457 }
1458 break;
1459 case 16:
1460 switch (desc->nr_channels) {
1461 case 1:
1462 return V_008F14_IMG_DATA_FORMAT_16;
1463 case 2:
1464 return V_008F14_IMG_DATA_FORMAT_16_16;
1465 case 4:
1466 return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
1467 }
1468 break;
1469 case 32:
1470 switch (desc->nr_channels) {
1471 case 1:
1472 return V_008F14_IMG_DATA_FORMAT_32;
1473 case 2:
1474 return V_008F14_IMG_DATA_FORMAT_32_32;
1475 #if 0 /* Not supported for render targets */
1476 case 3:
1477 return V_008F14_IMG_DATA_FORMAT_32_32_32;
1478 #endif
1479 case 4:
1480 return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
1481 }
1482 }
1483
1484 out_unknown:
1485 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */
1486 return ~0;
1487 }
1488
1489 static unsigned si_tex_wrap(unsigned wrap)
1490 {
1491 switch (wrap) {
1492 default:
1493 case PIPE_TEX_WRAP_REPEAT:
1494 return V_008F30_SQ_TEX_WRAP;
1495 case PIPE_TEX_WRAP_CLAMP:
1496 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER;
1497 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1498 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
1499 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1500 return V_008F30_SQ_TEX_CLAMP_BORDER;
1501 case PIPE_TEX_WRAP_MIRROR_REPEAT:
1502 return V_008F30_SQ_TEX_MIRROR;
1503 case PIPE_TEX_WRAP_MIRROR_CLAMP:
1504 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER;
1505 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1506 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
1507 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1508 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER;
1509 }
1510 }
1511
1512 static unsigned si_tex_filter(unsigned filter)
1513 {
1514 switch (filter) {
1515 default:
1516 case PIPE_TEX_FILTER_NEAREST:
1517 return V_008F38_SQ_TEX_XY_FILTER_POINT;
1518 case PIPE_TEX_FILTER_LINEAR:
1519 return V_008F38_SQ_TEX_XY_FILTER_BILINEAR;
1520 }
1521 }
1522
1523 static unsigned si_tex_mipfilter(unsigned filter)
1524 {
1525 switch (filter) {
1526 case PIPE_TEX_MIPFILTER_NEAREST:
1527 return V_008F38_SQ_TEX_Z_FILTER_POINT;
1528 case PIPE_TEX_MIPFILTER_LINEAR:
1529 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
1530 default:
1531 case PIPE_TEX_MIPFILTER_NONE:
1532 return V_008F38_SQ_TEX_Z_FILTER_NONE;
1533 }
1534 }
1535
1536 static unsigned si_tex_compare(unsigned compare)
1537 {
1538 switch (compare) {
1539 default:
1540 case PIPE_FUNC_NEVER:
1541 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
1542 case PIPE_FUNC_LESS:
1543 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
1544 case PIPE_FUNC_EQUAL:
1545 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
1546 case PIPE_FUNC_LEQUAL:
1547 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
1548 case PIPE_FUNC_GREATER:
1549 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
1550 case PIPE_FUNC_NOTEQUAL:
1551 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
1552 case PIPE_FUNC_GEQUAL:
1553 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
1554 case PIPE_FUNC_ALWAYS:
1555 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
1556 }
1557 }
1558
1559 static unsigned si_tex_dim(unsigned dim, unsigned nr_samples)
1560 {
1561 switch (dim) {
1562 default:
1563 case PIPE_TEXTURE_1D:
1564 return V_008F1C_SQ_RSRC_IMG_1D;
1565 case PIPE_TEXTURE_1D_ARRAY:
1566 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;
1567 case PIPE_TEXTURE_2D:
1568 case PIPE_TEXTURE_RECT:
1569 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA :
1570 V_008F1C_SQ_RSRC_IMG_2D;
1571 case PIPE_TEXTURE_2D_ARRAY:
1572 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY :
1573 V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
1574 case PIPE_TEXTURE_3D:
1575 return V_008F1C_SQ_RSRC_IMG_3D;
1576 case PIPE_TEXTURE_CUBE:
1577 return V_008F1C_SQ_RSRC_IMG_CUBE;
1578 }
1579 }
1580
1581 /*
1582 * Format support testing
1583 */
1584
1585 static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
1586 {
1587 return si_translate_texformat(screen, format, util_format_description(format),
1588 util_format_get_first_non_void_channel(format)) != ~0U;
1589 }
1590
1591 static uint32_t si_translate_vertexformat(struct pipe_screen *screen,
1592 enum pipe_format format,
1593 const struct util_format_description *desc,
1594 int first_non_void)
1595 {
1596 unsigned type = desc->channel[first_non_void].type;
1597 int i;
1598
1599 if (type == UTIL_FORMAT_TYPE_FIXED)
1600 return V_008F0C_BUF_DATA_FORMAT_INVALID;
1601
1602 /* See whether the components are of the same size. */
1603 for (i = 0; i < desc->nr_channels; i++) {
1604 if (desc->channel[first_non_void].size != desc->channel[i].size)
1605 return V_008F0C_BUF_DATA_FORMAT_INVALID;
1606 }
1607
1608 switch (desc->channel[first_non_void].size) {
1609 case 8:
1610 switch (desc->nr_channels) {
1611 case 1:
1612 return V_008F0C_BUF_DATA_FORMAT_8;
1613 case 2:
1614 return V_008F0C_BUF_DATA_FORMAT_8_8;
1615 case 3:
1616 case 4:
1617 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
1618 }
1619 break;
1620 case 16:
1621 switch (desc->nr_channels) {
1622 case 1:
1623 return V_008F0C_BUF_DATA_FORMAT_16;
1624 case 2:
1625 return V_008F0C_BUF_DATA_FORMAT_16_16;
1626 case 3:
1627 case 4:
1628 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
1629 }
1630 break;
1631 case 32:
1632 if (type != UTIL_FORMAT_TYPE_FLOAT)
1633 return V_008F0C_BUF_DATA_FORMAT_INVALID;
1634
1635 switch (desc->nr_channels) {
1636 case 1:
1637 return V_008F0C_BUF_DATA_FORMAT_32;
1638 case 2:
1639 return V_008F0C_BUF_DATA_FORMAT_32_32;
1640 case 3:
1641 return V_008F0C_BUF_DATA_FORMAT_32_32_32;
1642 case 4:
1643 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
1644 }
1645 break;
1646 }
1647
1648 return V_008F0C_BUF_DATA_FORMAT_INVALID;
1649 }
1650
1651 static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format)
1652 {
1653 const struct util_format_description *desc;
1654 int first_non_void;
1655 unsigned data_format;
1656
1657 desc = util_format_description(format);
1658 first_non_void = util_format_get_first_non_void_channel(format);
1659 data_format = si_translate_vertexformat(screen, format, desc, first_non_void);
1660 return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID;
1661 }
1662
1663 static bool si_is_colorbuffer_format_supported(enum pipe_format format)
1664 {
1665 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
1666 si_translate_colorswap(format) != ~0U;
1667 }
1668
1669 static bool si_is_zs_format_supported(enum pipe_format format)
1670 {
1671 return si_translate_dbformat(format) != V_028040_Z_INVALID;
1672 }
1673
1674 boolean si_is_format_supported(struct pipe_screen *screen,
1675 enum pipe_format format,
1676 enum pipe_texture_target target,
1677 unsigned sample_count,
1678 unsigned usage)
1679 {
1680 struct r600_screen *rscreen = (struct r600_screen *)screen;
1681 unsigned retval = 0;
1682
1683 if (target >= PIPE_MAX_TEXTURE_TYPES) {
1684 R600_ERR("r600: unsupported texture type %d\n", target);
1685 return FALSE;
1686 }
1687
1688 if (!util_format_is_supported(format, usage))
1689 return FALSE;
1690
1691 if (sample_count > 1) {
1692 if (HAVE_LLVM < 0x0304 || rscreen->chip_class != SI)
1693 return FALSE;
1694
1695 switch (sample_count) {
1696 case 2:
1697 case 4:
1698 case 8:
1699 break;
1700 default:
1701 return FALSE;
1702 }
1703 }
1704
1705 if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
1706 si_is_sampler_format_supported(screen, format)) {
1707 retval |= PIPE_BIND_SAMPLER_VIEW;
1708 }
1709
1710 if ((usage & (PIPE_BIND_RENDER_TARGET |
1711 PIPE_BIND_DISPLAY_TARGET |
1712 PIPE_BIND_SCANOUT |
1713 PIPE_BIND_SHARED)) &&
1714 si_is_colorbuffer_format_supported(format)) {
1715 retval |= usage &
1716 (PIPE_BIND_RENDER_TARGET |
1717 PIPE_BIND_DISPLAY_TARGET |
1718 PIPE_BIND_SCANOUT |
1719 PIPE_BIND_SHARED);
1720 }
1721
1722 if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
1723 si_is_zs_format_supported(format)) {
1724 retval |= PIPE_BIND_DEPTH_STENCIL;
1725 }
1726
1727 if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
1728 si_is_vertex_format_supported(screen, format)) {
1729 retval |= PIPE_BIND_VERTEX_BUFFER;
1730 }
1731
1732 if (usage & PIPE_BIND_TRANSFER_READ)
1733 retval |= PIPE_BIND_TRANSFER_READ;
1734 if (usage & PIPE_BIND_TRANSFER_WRITE)
1735 retval |= PIPE_BIND_TRANSFER_WRITE;
1736
1737 return retval == usage;
1738 }
1739
1740 static unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil)
1741 {
1742 unsigned tile_mode_index = 0;
1743
1744 if (stencil) {
1745 tile_mode_index = rtex->surface.stencil_tiling_index[level];
1746 } else {
1747 tile_mode_index = rtex->surface.tiling_index[level];
1748 }
1749 return tile_mode_index;
1750 }
1751
1752 /*
1753 * framebuffer handling
1754 */
1755
1756 static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
1757 const struct pipe_framebuffer_state *state, int cb)
1758 {
1759 struct r600_texture *rtex;
1760 struct r600_surface *surf;
1761 unsigned level = state->cbufs[cb]->u.tex.level;
1762 unsigned pitch, slice;
1763 unsigned color_info, color_attrib;
1764 unsigned tile_mode_index;
1765 unsigned format, swap, ntype, endian;
1766 uint64_t offset;
1767 const struct util_format_description *desc;
1768 int i;
1769 unsigned blend_clamp = 0, blend_bypass = 0;
1770 unsigned max_comp_size;
1771
1772 surf = (struct r600_surface *)state->cbufs[cb];
1773 rtex = (struct r600_texture*)state->cbufs[cb]->texture;
1774
1775 offset = rtex->surface.level[level].offset;
1776 if (rtex->surface.level[level].mode < RADEON_SURF_MODE_1D) {
1777 offset += rtex->surface.level[level].slice_size *
1778 state->cbufs[cb]->u.tex.first_layer;
1779 }
1780 pitch = (rtex->surface.level[level].nblk_x) / 8 - 1;
1781 slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64;
1782 if (slice) {
1783 slice = slice - 1;
1784 }
1785
1786 tile_mode_index = si_tile_mode_index(rtex, level, false);
1787
1788 desc = util_format_description(surf->base.format);
1789 for (i = 0; i < 4; i++) {
1790 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
1791 break;
1792 }
1793 }
1794 if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
1795 ntype = V_028C70_NUMBER_FLOAT;
1796 } else {
1797 ntype = V_028C70_NUMBER_UNORM;
1798 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
1799 ntype = V_028C70_NUMBER_SRGB;
1800 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
1801 if (desc->channel[i].pure_integer) {
1802 ntype = V_028C70_NUMBER_SINT;
1803 } else {
1804 assert(desc->channel[i].normalized);
1805 ntype = V_028C70_NUMBER_SNORM;
1806 }
1807 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1808 if (desc->channel[i].pure_integer) {
1809 ntype = V_028C70_NUMBER_UINT;
1810 } else {
1811 assert(desc->channel[i].normalized);
1812 ntype = V_028C70_NUMBER_UNORM;
1813 }
1814 }
1815 }
1816
1817 format = si_translate_colorformat(surf->base.format);
1818 if (format == V_028C70_COLOR_INVALID) {
1819 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
1820 }
1821 assert(format != V_028C70_COLOR_INVALID);
1822 swap = si_translate_colorswap(surf->base.format);
1823 if (rtex->resource.b.b.usage == PIPE_USAGE_STAGING) {
1824 endian = V_028C70_ENDIAN_NONE;
1825 } else {
1826 endian = si_colorformat_endian_swap(format);
1827 }
1828
1829 /* blend clamp should be set for all NORM/SRGB types */
1830 if (ntype == V_028C70_NUMBER_UNORM ||
1831 ntype == V_028C70_NUMBER_SNORM ||
1832 ntype == V_028C70_NUMBER_SRGB)
1833 blend_clamp = 1;
1834
1835 /* set blend bypass according to docs if SINT/UINT or
1836 8/24 COLOR variants */
1837 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
1838 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
1839 format == V_028C70_COLOR_X24_8_32_FLOAT) {
1840 blend_clamp = 0;
1841 blend_bypass = 1;
1842 }
1843
1844 color_info = S_028C70_FORMAT(format) |
1845 S_028C70_COMP_SWAP(swap) |
1846 S_028C70_BLEND_CLAMP(blend_clamp) |
1847 S_028C70_BLEND_BYPASS(blend_bypass) |
1848 S_028C70_NUMBER_TYPE(ntype) |
1849 S_028C70_ENDIAN(endian);
1850
1851 color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) |
1852 S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1);
1853
1854 if (rtex->resource.b.b.nr_samples > 1) {
1855 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
1856
1857 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
1858 S_028C74_NUM_FRAGMENTS(log_samples);
1859
1860 if (rtex->fmask.size) {
1861 color_info |= S_028C70_COMPRESSION(1);
1862 unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height);
1863
1864 /* due to a bug in the hw, FMASK_BANK_HEIGHT must be set on SI too */
1865 color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index) |
1866 S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
1867 }
1868 }
1869
1870 if (rtex->cmask.size) {
1871 color_info |= S_028C70_FAST_CLEAR(1);
1872 }
1873
1874 offset += r600_resource_va(rctx->context.screen, state->cbufs[cb]->texture);
1875 offset >>= 8;
1876
1877 /* FIXME handle enabling of CB beyond BASE8 which has different offset */
1878 si_pm4_add_bo(pm4, &rtex->resource, RADEON_USAGE_READWRITE);
1879 si_pm4_set_reg(pm4, R_028C60_CB_COLOR0_BASE + cb * 0x3C, offset);
1880 si_pm4_set_reg(pm4, R_028C64_CB_COLOR0_PITCH + cb * 0x3C, S_028C64_TILE_MAX(pitch));
1881 si_pm4_set_reg(pm4, R_028C68_CB_COLOR0_SLICE + cb * 0x3C, S_028C68_TILE_MAX(slice));
1882
1883 if (rtex->surface.level[level].mode < RADEON_SURF_MODE_1D) {
1884 si_pm4_set_reg(pm4, R_028C6C_CB_COLOR0_VIEW + cb * 0x3C, 0x00000000);
1885 } else {
1886 si_pm4_set_reg(pm4, R_028C6C_CB_COLOR0_VIEW + cb * 0x3C,
1887 S_028C6C_SLICE_START(state->cbufs[cb]->u.tex.first_layer) |
1888 S_028C6C_SLICE_MAX(state->cbufs[cb]->u.tex.last_layer));
1889 }
1890 si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + cb * 0x3C, color_info);
1891 si_pm4_set_reg(pm4, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, color_attrib);
1892
1893 if (rtex->cmask.size) {
1894 si_pm4_set_reg(pm4, R_028C7C_CB_COLOR0_CMASK + cb * 0x3C,
1895 offset + (rtex->cmask.offset >> 8));
1896 si_pm4_set_reg(pm4, R_028C80_CB_COLOR0_CMASK_SLICE + cb * 0x3C,
1897 S_028C80_TILE_MAX(rtex->cmask.slice_tile_max));
1898 }
1899 if (rtex->fmask.size) {
1900 si_pm4_set_reg(pm4, R_028C84_CB_COLOR0_FMASK + cb * 0x3C,
1901 offset + (rtex->fmask.offset >> 8));
1902 si_pm4_set_reg(pm4, R_028C88_CB_COLOR0_FMASK_SLICE + cb * 0x3C,
1903 S_028C88_TILE_MAX(rtex->fmask.slice_tile_max));
1904 }
1905
1906 /* set CB_COLOR1_INFO for possible dual-src blending */
1907 if (state->nr_cbufs == 1) {
1908 assert(cb == 0);
1909 si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, color_info);
1910 }
1911
1912 /* Determine pixel shader export format */
1913 max_comp_size = si_colorformat_max_comp_size(format);
1914 if (ntype == V_028C70_NUMBER_SRGB ||
1915 ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
1916 max_comp_size <= 10) ||
1917 (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) {
1918 rctx->export_16bpc |= 1 << cb;
1919 /* set SPI_SHADER_COL_FORMAT for possible dual-src blending */
1920 if (state->nr_cbufs == 1)
1921 rctx->export_16bpc |= 1 << 1;
1922 }
1923 }
1924
1925 static void si_db(struct r600_context *rctx, struct si_pm4_state *pm4,
1926 const struct pipe_framebuffer_state *state)
1927 {
1928 struct r600_screen *rscreen = rctx->screen;
1929 struct r600_texture *rtex;
1930 struct r600_surface *surf;
1931 unsigned level, pitch, slice, format, tile_mode_index, array_mode;
1932 unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config;
1933 uint32_t z_info, s_info, db_depth_info;
1934 uint64_t z_offs, s_offs;
1935
1936 if (state->zsbuf == NULL) {
1937 si_pm4_set_reg(pm4, R_028040_DB_Z_INFO, S_028040_FORMAT(V_028040_Z_INVALID));
1938 si_pm4_set_reg(pm4, R_028044_DB_STENCIL_INFO, S_028044_FORMAT(V_028044_STENCIL_INVALID));
1939 return;
1940 }
1941
1942 surf = (struct r600_surface *)state->zsbuf;
1943 level = surf->base.u.tex.level;
1944 rtex = (struct r600_texture*)surf->base.texture;
1945
1946 format = si_translate_dbformat(rtex->resource.b.b.format);
1947
1948 if (format == V_028040_Z_INVALID) {
1949 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
1950 }
1951 assert(format != V_028040_Z_INVALID);
1952
1953 s_offs = z_offs = r600_resource_va(rctx->context.screen, surf->base.texture);
1954 z_offs += rtex->surface.level[level].offset;
1955 s_offs += rtex->surface.stencil_level[level].offset;
1956
1957 z_offs >>= 8;
1958 s_offs >>= 8;
1959
1960 pitch = (rtex->surface.level[level].nblk_x / 8) - 1;
1961 slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64;
1962 if (slice) {
1963 slice = slice - 1;
1964 }
1965
1966 db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
1967
1968 z_info = S_028040_FORMAT(format);
1969 if (rtex->resource.b.b.nr_samples > 1) {
1970 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
1971 }
1972
1973 if (rtex->surface.flags & RADEON_SURF_SBUFFER)
1974 s_info = S_028044_FORMAT(V_028044_STENCIL_8);
1975 else
1976 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
1977
1978 if (rctx->chip_class >= CIK) {
1979 switch (rtex->surface.level[level].mode) {
1980 case RADEON_SURF_MODE_2D:
1981 array_mode = V_02803C_ARRAY_2D_TILED_THIN1;
1982 break;
1983 case RADEON_SURF_MODE_1D:
1984 case RADEON_SURF_MODE_LINEAR_ALIGNED:
1985 case RADEON_SURF_MODE_LINEAR:
1986 default:
1987 array_mode = V_02803C_ARRAY_1D_TILED_THIN1;
1988 break;
1989 }
1990 tile_split = rtex->surface.tile_split;
1991 stile_split = rtex->surface.stencil_tile_split;
1992 macro_aspect = rtex->surface.mtilea;
1993 bankw = rtex->surface.bankw;
1994 bankh = rtex->surface.bankh;
1995 tile_split = cik_tile_split(tile_split);
1996 stile_split = cik_tile_split(stile_split);
1997 macro_aspect = cik_macro_tile_aspect(macro_aspect);
1998 bankw = cik_bank_wh(bankw);
1999 bankh = cik_bank_wh(bankh);
2000 nbanks = cik_num_banks(rscreen->tiling_info.num_banks);
2001 pipe_config = cik_db_pipe_config(rscreen->info.r600_num_tile_pipes,
2002 rscreen->info.r600_num_backends);
2003
2004 db_depth_info |= S_02803C_ARRAY_MODE(array_mode) |
2005 S_02803C_PIPE_CONFIG(pipe_config) |
2006 S_02803C_BANK_WIDTH(bankw) |
2007 S_02803C_BANK_HEIGHT(bankh) |
2008 S_02803C_MACRO_TILE_ASPECT(macro_aspect) |
2009 S_02803C_NUM_BANKS(nbanks);
2010 z_info |= S_028040_TILE_SPLIT(tile_split);
2011 s_info |= S_028044_TILE_SPLIT(stile_split);
2012 } else {
2013 tile_mode_index = si_tile_mode_index(rtex, level, false);
2014 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2015 tile_mode_index = si_tile_mode_index(rtex, level, true);
2016 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2017 }
2018
2019 si_pm4_set_reg(pm4, R_028008_DB_DEPTH_VIEW,
2020 S_028008_SLICE_START(state->zsbuf->u.tex.first_layer) |
2021 S_028008_SLICE_MAX(state->zsbuf->u.tex.last_layer));
2022
2023 si_pm4_set_reg(pm4, R_02803C_DB_DEPTH_INFO, db_depth_info);
2024 si_pm4_set_reg(pm4, R_028040_DB_Z_INFO, z_info);
2025 si_pm4_set_reg(pm4, R_028044_DB_STENCIL_INFO, s_info);
2026
2027 si_pm4_add_bo(pm4, &rtex->resource, RADEON_USAGE_READWRITE);
2028 si_pm4_set_reg(pm4, R_028048_DB_Z_READ_BASE, z_offs);
2029 si_pm4_set_reg(pm4, R_02804C_DB_STENCIL_READ_BASE, s_offs);
2030 si_pm4_set_reg(pm4, R_028050_DB_Z_WRITE_BASE, z_offs);
2031 si_pm4_set_reg(pm4, R_028054_DB_STENCIL_WRITE_BASE, s_offs);
2032
2033 si_pm4_set_reg(pm4, R_028058_DB_DEPTH_SIZE, S_028058_PITCH_TILE_MAX(pitch));
2034 si_pm4_set_reg(pm4, R_02805C_DB_DEPTH_SLICE, S_02805C_SLICE_TILE_MAX(slice));
2035 }
2036
2037 #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \
2038 (((s0x) & 0xf) | (((s0y) & 0xf) << 4) | \
2039 (((s1x) & 0xf) << 8) | (((s1y) & 0xf) << 12) | \
2040 (((s2x) & 0xf) << 16) | (((s2y) & 0xf) << 20) | \
2041 (((s3x) & 0xf) << 24) | (((s3y) & 0xf) << 28))
2042
2043 /* 2xMSAA
2044 * There are two locations (-4, 4), (4, -4). */
2045 static uint32_t sample_locs_2x[] = {
2046 FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
2047 FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
2048 FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
2049 FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
2050 };
2051 static unsigned max_dist_2x = 4;
2052 /* 4xMSAA
2053 * There are 4 locations: (-2, -2), (2, 2), (-6, 6), (6, -6). */
2054 static uint32_t sample_locs_4x[] = {
2055 FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
2056 FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
2057 FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
2058 FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
2059 };
2060 static unsigned max_dist_4x = 6;
2061 /* Cayman/SI 8xMSAA */
2062 static uint32_t cm_sample_locs_8x[] = {
2063 FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
2064 FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
2065 FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
2066 FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
2067 FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
2068 FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
2069 FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
2070 FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
2071 };
2072 static unsigned cm_max_dist_8x = 8;
2073 /* Cayman/SI 16xMSAA */
2074 static uint32_t cm_sample_locs_16x[] = {
2075 FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
2076 FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
2077 FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
2078 FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
2079 FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
2080 FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
2081 FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
2082 FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
2083 FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
2084 FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
2085 FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
2086 FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
2087 FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
2088 FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
2089 FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
2090 FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
2091 };
2092 static unsigned cm_max_dist_16x = 8;
2093
2094 static void si_get_sample_position(struct pipe_context *ctx,
2095 unsigned sample_count,
2096 unsigned sample_index,
2097 float *out_value)
2098 {
2099 int offset, index;
2100 struct {
2101 int idx:4;
2102 } val;
2103 switch (sample_count) {
2104 case 1:
2105 default:
2106 out_value[0] = out_value[1] = 0.5;
2107 break;
2108 case 2:
2109 offset = 4 * (sample_index * 2);
2110 val.idx = (sample_locs_2x[0] >> offset) & 0xf;
2111 out_value[0] = (float)(val.idx + 8) / 16.0f;
2112 val.idx = (sample_locs_2x[0] >> (offset + 4)) & 0xf;
2113 out_value[1] = (float)(val.idx + 8) / 16.0f;
2114 break;
2115 case 4:
2116 offset = 4 * (sample_index * 2);
2117 val.idx = (sample_locs_4x[0] >> offset) & 0xf;
2118 out_value[0] = (float)(val.idx + 8) / 16.0f;
2119 val.idx = (sample_locs_4x[0] >> (offset + 4)) & 0xf;
2120 out_value[1] = (float)(val.idx + 8) / 16.0f;
2121 break;
2122 case 8:
2123 offset = 4 * (sample_index % 4 * 2);
2124 index = (sample_index / 4) * 4;
2125 val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
2126 out_value[0] = (float)(val.idx + 8) / 16.0f;
2127 val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
2128 out_value[1] = (float)(val.idx + 8) / 16.0f;
2129 break;
2130 case 16:
2131 offset = 4 * (sample_index % 4 * 2);
2132 index = (sample_index / 4) * 4;
2133 val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
2134 out_value[0] = (float)(val.idx + 8) / 16.0f;
2135 val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
2136 out_value[1] = (float)(val.idx + 8) / 16.0f;
2137 break;
2138 }
2139 }
2140
2141 static void si_set_msaa_state(struct r600_context *rctx, struct si_pm4_state *pm4, int nr_samples)
2142 {
2143 unsigned max_dist = 0;
2144
2145 switch (nr_samples) {
2146 default:
2147 nr_samples = 0;
2148 break;
2149 case 2:
2150 si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_2x[0]);
2151 si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_2x[1]);
2152 si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_2x[2]);
2153 si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_2x[3]);
2154 max_dist = max_dist_2x;
2155 break;
2156 case 4:
2157 si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_4x[0]);
2158 si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_4x[1]);
2159 si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_4x[2]);
2160 si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_4x[3]);
2161 max_dist = max_dist_4x;
2162 break;
2163 case 8:
2164 si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, cm_sample_locs_8x[0]);
2165 si_pm4_set_reg(pm4, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, cm_sample_locs_8x[4]);
2166 si_pm4_set_reg(pm4, R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2, 0);
2167 si_pm4_set_reg(pm4, R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3, 0);
2168 si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, cm_sample_locs_8x[1]);
2169 si_pm4_set_reg(pm4, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, cm_sample_locs_8x[5]);
2170 si_pm4_set_reg(pm4, R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2, 0);
2171 si_pm4_set_reg(pm4, R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3, 0);
2172 si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, cm_sample_locs_8x[2]);
2173 si_pm4_set_reg(pm4, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, cm_sample_locs_8x[6]);
2174 si_pm4_set_reg(pm4, R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2, 0);
2175 si_pm4_set_reg(pm4, R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3, 0);
2176 si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, cm_sample_locs_8x[3]);
2177 si_pm4_set_reg(pm4, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, cm_sample_locs_8x[7]);
2178 max_dist = cm_max_dist_8x;
2179 break;
2180 case 16:
2181 si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, cm_sample_locs_16x[0]);
2182 si_pm4_set_reg(pm4, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, cm_sample_locs_16x[4]);
2183 si_pm4_set_reg(pm4, R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2, cm_sample_locs_16x[8]);
2184 si_pm4_set_reg(pm4, R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3, cm_sample_locs_16x[12]);
2185 si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, cm_sample_locs_16x[1]);
2186 si_pm4_set_reg(pm4, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, cm_sample_locs_16x[5]);
2187 si_pm4_set_reg(pm4, R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2, cm_sample_locs_16x[9]);
2188 si_pm4_set_reg(pm4, R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3, cm_sample_locs_16x[13]);
2189 si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, cm_sample_locs_16x[2]);
2190 si_pm4_set_reg(pm4, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, cm_sample_locs_16x[6]);
2191 si_pm4_set_reg(pm4, R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2, cm_sample_locs_16x[10]);
2192 si_pm4_set_reg(pm4, R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3, cm_sample_locs_16x[14]);
2193 si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, cm_sample_locs_16x[3]);
2194 si_pm4_set_reg(pm4, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, cm_sample_locs_16x[7]);
2195 si_pm4_set_reg(pm4, R_028C30_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2, cm_sample_locs_16x[11]);
2196 si_pm4_set_reg(pm4, R_028C34_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3, cm_sample_locs_16x[15]);
2197 max_dist = cm_max_dist_16x;
2198 break;
2199 }
2200
2201 if (nr_samples > 1) {
2202 unsigned log_samples = util_logbase2(nr_samples);
2203
2204 si_pm4_set_reg(pm4, R_028BDC_PA_SC_LINE_CNTL,
2205 S_028BDC_LAST_PIXEL(1) |
2206 S_028BDC_EXPAND_LINE_WIDTH(1));
2207 si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG,
2208 S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
2209 S_028BE0_MAX_SAMPLE_DIST(max_dist) |
2210 S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples));
2211
2212 si_pm4_set_reg(pm4, R_028804_DB_EQAA,
2213 S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
2214 S_028804_PS_ITER_SAMPLES(log_samples) |
2215 S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
2216 S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
2217 S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
2218 S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
2219 } else {
2220 si_pm4_set_reg(pm4, R_028BDC_PA_SC_LINE_CNTL, S_028BDC_LAST_PIXEL(1));
2221 si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG, 0);
2222
2223 si_pm4_set_reg(pm4, R_028804_DB_EQAA,
2224 S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
2225 S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
2226 }
2227 }
2228
2229 static void si_set_framebuffer_state(struct pipe_context *ctx,
2230 const struct pipe_framebuffer_state *state)
2231 {
2232 struct r600_context *rctx = (struct r600_context *)ctx;
2233 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
2234 uint32_t tl, br;
2235 int tl_x, tl_y, br_x, br_y, nr_samples, i;
2236
2237 if (pm4 == NULL)
2238 return;
2239
2240 si_pm4_inval_fb_cache(pm4, state->nr_cbufs);
2241 rctx->flush_and_inv_cb_meta = true;
2242
2243 if (state->zsbuf)
2244 si_pm4_inval_zsbuf_cache(pm4);
2245
2246 util_copy_framebuffer_state(&rctx->framebuffer, state);
2247
2248 /* build states */
2249 rctx->export_16bpc = 0;
2250 rctx->fb_compressed_cb_mask = 0;
2251 for (i = 0; i < state->nr_cbufs; i++) {
2252 struct r600_texture *rtex =
2253 (struct r600_texture*)state->cbufs[i]->texture;
2254
2255 si_cb(rctx, pm4, state, i);
2256
2257 if (rtex->fmask.size || rtex->cmask.size) {
2258 rctx->fb_compressed_cb_mask |= 1 << i;
2259 }
2260 }
2261 for (; i < 8; i++) {
2262 si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + i * 0x3C,
2263 S_028C70_FORMAT(V_028C70_COLOR_INVALID));
2264 }
2265
2266 assert(!(rctx->export_16bpc & ~0xff));
2267 si_db(rctx, pm4, state);
2268
2269 tl_x = 0;
2270 tl_y = 0;
2271 br_x = state->width;
2272 br_y = state->height;
2273
2274 tl = S_028240_TL_X(tl_x) | S_028240_TL_Y(tl_y);
2275 br = S_028244_BR_X(br_x) | S_028244_BR_Y(br_y);
2276
2277 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, tl);
2278 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, br);
2279 si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl);
2280 si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR, br);
2281 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, tl);
2282 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, br);
2283 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, tl);
2284 si_pm4_set_reg(pm4, R_028208_PA_SC_WINDOW_SCISSOR_BR, br);
2285 si_pm4_set_reg(pm4, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000);
2286 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
2287
2288 if (state->nr_cbufs)
2289 nr_samples = state->cbufs[0]->texture->nr_samples;
2290 else if (state->zsbuf)
2291 nr_samples = state->zsbuf->texture->nr_samples;
2292 else
2293 nr_samples = 0;
2294
2295 si_set_msaa_state(rctx, pm4, nr_samples);
2296 rctx->fb_log_samples = util_logbase2(nr_samples);
2297 rctx->fb_cb0_is_integer = state->nr_cbufs &&
2298 util_format_is_pure_integer(state->cbufs[0]->format);
2299
2300 si_pm4_set_state(rctx, framebuffer, pm4);
2301 si_update_fb_rs_state(rctx);
2302 si_update_fb_blend_state(rctx);
2303 }
2304
2305 /*
2306 * shaders
2307 */
2308
2309 /* Compute the key for the hw shader variant */
2310 static INLINE void si_shader_selector_key(struct pipe_context *ctx,
2311 struct si_pipe_shader_selector *sel,
2312 union si_shader_key *key)
2313 {
2314 struct r600_context *rctx = (struct r600_context *)ctx;
2315 memset(key, 0, sizeof(*key));
2316
2317 if (sel->type == PIPE_SHADER_VERTEX) {
2318 unsigned i;
2319 if (!rctx->vertex_elements)
2320 return;
2321
2322 for (i = 0; i < rctx->vertex_elements->count; ++i)
2323 key->vs.instance_divisors[i] = rctx->vertex_elements->elements[i].instance_divisor;
2324
2325 if (rctx->queued.named.rasterizer->clip_plane_enable & 0xf0)
2326 key->vs.ucps_enabled |= 0x2;
2327 if (rctx->queued.named.rasterizer->clip_plane_enable & 0xf)
2328 key->vs.ucps_enabled |= 0x1;
2329 } else if (sel->type == PIPE_SHADER_FRAGMENT) {
2330 if (sel->fs_write_all)
2331 key->ps.nr_cbufs = rctx->framebuffer.nr_cbufs;
2332 key->ps.export_16bpc = rctx->export_16bpc;
2333
2334 if (rctx->queued.named.rasterizer) {
2335 key->ps.color_two_side = rctx->queued.named.rasterizer->two_side;
2336 key->ps.flatshade = rctx->queued.named.rasterizer->flatshade;
2337
2338 if (rctx->queued.named.blend) {
2339 key->ps.alpha_to_one = rctx->queued.named.blend->alpha_to_one &&
2340 rctx->queued.named.rasterizer->multisample_enable &&
2341 !rctx->fb_cb0_is_integer;
2342 }
2343 }
2344 if (rctx->queued.named.dsa) {
2345 key->ps.alpha_func = rctx->queued.named.dsa->alpha_func;
2346 key->ps.alpha_ref = rctx->queued.named.dsa->alpha_ref;
2347 } else {
2348 key->ps.alpha_func = PIPE_FUNC_ALWAYS;
2349 }
2350 }
2351 }
2352
2353 /* Select the hw shader variant depending on the current state.
2354 * (*dirty) is set to 1 if current variant was changed */
2355 int si_shader_select(struct pipe_context *ctx,
2356 struct si_pipe_shader_selector *sel,
2357 unsigned *dirty)
2358 {
2359 union si_shader_key key;
2360 struct si_pipe_shader * shader = NULL;
2361 int r;
2362
2363 si_shader_selector_key(ctx, sel, &key);
2364
2365 /* Check if we don't need to change anything.
2366 * This path is also used for most shaders that don't need multiple
2367 * variants, it will cost just a computation of the key and this
2368 * test. */
2369 if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) {
2370 return 0;
2371 }
2372
2373 /* lookup if we have other variants in the list */
2374 if (sel->num_shaders > 1) {
2375 struct si_pipe_shader *p = sel->current, *c = p->next_variant;
2376
2377 while (c && memcmp(&c->key, &key, sizeof(key)) != 0) {
2378 p = c;
2379 c = c->next_variant;
2380 }
2381
2382 if (c) {
2383 p->next_variant = c->next_variant;
2384 shader = c;
2385 }
2386 }
2387
2388 if (unlikely(!shader)) {
2389 shader = CALLOC(1, sizeof(struct si_pipe_shader));
2390 shader->selector = sel;
2391 shader->key = key;
2392
2393 r = si_pipe_shader_create(ctx, shader);
2394 if (unlikely(r)) {
2395 R600_ERR("Failed to build shader variant (type=%u) %d\n",
2396 sel->type, r);
2397 sel->current = NULL;
2398 FREE(shader);
2399 return r;
2400 }
2401
2402 /* We don't know the value of fs_write_all property until we built
2403 * at least one variant, so we may need to recompute the key (include
2404 * rctx->framebuffer.nr_cbufs) after building first variant. */
2405 if (sel->type == PIPE_SHADER_FRAGMENT &&
2406 sel->num_shaders == 0 &&
2407 shader->shader.fs_write_all) {
2408 sel->fs_write_all = 1;
2409 si_shader_selector_key(ctx, sel, &shader->key);
2410 }
2411
2412 sel->num_shaders++;
2413 }
2414
2415 if (dirty)
2416 *dirty = 1;
2417
2418 shader->next_variant = sel->current;
2419 sel->current = shader;
2420
2421 return 0;
2422 }
2423
2424 static void *si_create_shader_state(struct pipe_context *ctx,
2425 const struct pipe_shader_state *state,
2426 unsigned pipe_shader_type)
2427 {
2428 struct si_pipe_shader_selector *sel = CALLOC_STRUCT(si_pipe_shader_selector);
2429 int r;
2430
2431 sel->type = pipe_shader_type;
2432 sel->tokens = tgsi_dup_tokens(state->tokens);
2433 sel->so = state->stream_output;
2434
2435 r = si_shader_select(ctx, sel, NULL);
2436 if (r) {
2437 free(sel);
2438 return NULL;
2439 }
2440
2441 return sel;
2442 }
2443
2444 static void *si_create_fs_state(struct pipe_context *ctx,
2445 const struct pipe_shader_state *state)
2446 {
2447 return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT);
2448 }
2449
2450 static void *si_create_vs_state(struct pipe_context *ctx,
2451 const struct pipe_shader_state *state)
2452 {
2453 return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX);
2454 }
2455
2456 static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
2457 {
2458 struct r600_context *rctx = (struct r600_context *)ctx;
2459 struct si_pipe_shader_selector *sel = state;
2460
2461 if (rctx->vs_shader == sel)
2462 return;
2463
2464 rctx->vs_shader = sel;
2465
2466 if (sel && sel->current)
2467 si_pm4_bind_state(rctx, vs, sel->current->pm4);
2468 else
2469 si_pm4_bind_state(rctx, vs, rctx->dummy_pixel_shader->pm4);
2470 }
2471
2472 static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
2473 {
2474 struct r600_context *rctx = (struct r600_context *)ctx;
2475 struct si_pipe_shader_selector *sel = state;
2476
2477 if (rctx->ps_shader == sel)
2478 return;
2479
2480 rctx->ps_shader = sel;
2481
2482 if (sel && sel->current)
2483 si_pm4_bind_state(rctx, ps, sel->current->pm4);
2484 else
2485 si_pm4_bind_state(rctx, ps, rctx->dummy_pixel_shader->pm4);
2486 }
2487
2488 static void si_delete_shader_selector(struct pipe_context *ctx,
2489 struct si_pipe_shader_selector *sel)
2490 {
2491 struct r600_context *rctx = (struct r600_context *)ctx;
2492 struct si_pipe_shader *p = sel->current, *c;
2493
2494 while (p) {
2495 c = p->next_variant;
2496 si_pm4_delete_state(rctx, vs, p->pm4);
2497 si_pipe_shader_destroy(ctx, p);
2498 free(p);
2499 p = c;
2500 }
2501
2502 free(sel->tokens);
2503 free(sel);
2504 }
2505
2506 static void si_delete_vs_shader(struct pipe_context *ctx, void *state)
2507 {
2508 struct r600_context *rctx = (struct r600_context *)ctx;
2509 struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
2510
2511 if (rctx->vs_shader == sel) {
2512 rctx->vs_shader = NULL;
2513 }
2514
2515 si_delete_shader_selector(ctx, sel);
2516 }
2517
2518 static void si_delete_ps_shader(struct pipe_context *ctx, void *state)
2519 {
2520 struct r600_context *rctx = (struct r600_context *)ctx;
2521 struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
2522
2523 if (rctx->ps_shader == sel) {
2524 rctx->ps_shader = NULL;
2525 }
2526
2527 si_delete_shader_selector(ctx, sel);
2528 }
2529
2530 /*
2531 * Samplers
2532 */
2533
2534 static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx,
2535 struct pipe_resource *texture,
2536 const struct pipe_sampler_view *state)
2537 {
2538 struct si_pipe_sampler_view *view = CALLOC_STRUCT(si_pipe_sampler_view);
2539 struct r600_texture *tmp = (struct r600_texture*)texture;
2540 const struct util_format_description *desc;
2541 unsigned format, num_format;
2542 uint32_t pitch = 0;
2543 unsigned char state_swizzle[4], swizzle[4];
2544 unsigned height, depth, width;
2545 enum pipe_format pipe_format = state->format;
2546 struct radeon_surface_level *surflevel;
2547 int first_non_void;
2548 uint64_t va;
2549
2550 if (view == NULL)
2551 return NULL;
2552
2553 /* initialize base object */
2554 view->base = *state;
2555 view->base.texture = NULL;
2556 pipe_reference(NULL, &texture->reference);
2557 view->base.texture = texture;
2558 view->base.reference.count = 1;
2559 view->base.context = ctx;
2560
2561 state_swizzle[0] = state->swizzle_r;
2562 state_swizzle[1] = state->swizzle_g;
2563 state_swizzle[2] = state->swizzle_b;
2564 state_swizzle[3] = state->swizzle_a;
2565
2566 surflevel = tmp->surface.level;
2567
2568 /* Texturing with separate depth and stencil. */
2569 if (tmp->is_depth && !tmp->is_flushing_texture) {
2570 switch (pipe_format) {
2571 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2572 pipe_format = PIPE_FORMAT_Z32_FLOAT;
2573 break;
2574 case PIPE_FORMAT_X8Z24_UNORM:
2575 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2576 /* Z24 is always stored like this. */
2577 pipe_format = PIPE_FORMAT_Z24X8_UNORM;
2578 break;
2579 case PIPE_FORMAT_X24S8_UINT:
2580 case PIPE_FORMAT_S8X24_UINT:
2581 case PIPE_FORMAT_X32_S8X24_UINT:
2582 pipe_format = PIPE_FORMAT_S8_UINT;
2583 surflevel = tmp->surface.stencil_level;
2584 break;
2585 default:;
2586 }
2587 }
2588
2589 desc = util_format_description(pipe_format);
2590
2591 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
2592 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
2593 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
2594
2595 switch (pipe_format) {
2596 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2597 case PIPE_FORMAT_X24S8_UINT:
2598 case PIPE_FORMAT_X32_S8X24_UINT:
2599 case PIPE_FORMAT_X8Z24_UNORM:
2600 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
2601 break;
2602 default:
2603 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
2604 }
2605 } else {
2606 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
2607 }
2608
2609 first_non_void = util_format_get_first_non_void_channel(pipe_format);
2610
2611 switch (pipe_format) {
2612 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2613 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2614 break;
2615 default:
2616 if (first_non_void < 0) {
2617 if (util_format_is_compressed(pipe_format)) {
2618 switch (pipe_format) {
2619 case PIPE_FORMAT_DXT1_SRGB:
2620 case PIPE_FORMAT_DXT1_SRGBA:
2621 case PIPE_FORMAT_DXT3_SRGBA:
2622 case PIPE_FORMAT_DXT5_SRGBA:
2623 num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2624 break;
2625 case PIPE_FORMAT_RGTC1_SNORM:
2626 case PIPE_FORMAT_LATC1_SNORM:
2627 case PIPE_FORMAT_RGTC2_SNORM:
2628 case PIPE_FORMAT_LATC2_SNORM:
2629 num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2630 break;
2631 default:
2632 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2633 break;
2634 }
2635 } else {
2636 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2637 }
2638 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
2639 num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2640 } else {
2641 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2642
2643 switch (desc->channel[first_non_void].type) {
2644 case UTIL_FORMAT_TYPE_FLOAT:
2645 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2646 break;
2647 case UTIL_FORMAT_TYPE_SIGNED:
2648 if (desc->channel[first_non_void].normalized)
2649 num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2650 else if (desc->channel[first_non_void].pure_integer)
2651 num_format = V_008F14_IMG_NUM_FORMAT_SINT;
2652 else
2653 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;
2654 break;
2655 case UTIL_FORMAT_TYPE_UNSIGNED:
2656 if (desc->channel[first_non_void].normalized)
2657 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2658 else if (desc->channel[first_non_void].pure_integer)
2659 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
2660 else
2661 num_format = V_008F14_IMG_NUM_FORMAT_USCALED;
2662 }
2663 }
2664 }
2665
2666 format = si_translate_texformat(ctx->screen, pipe_format, desc, first_non_void);
2667 if (format == ~0) {
2668 format = 0;
2669 }
2670
2671 view->resource = &tmp->resource;
2672
2673 /* not supported any more */
2674 //endian = si_colorformat_endian_swap(format);
2675
2676 width = surflevel[0].npix_x;
2677 height = surflevel[0].npix_y;
2678 depth = surflevel[0].npix_z;
2679 pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format);
2680
2681 if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
2682 height = 1;
2683 depth = texture->array_size;
2684 } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) {
2685 depth = texture->array_size;
2686 }
2687
2688 va = r600_resource_va(ctx->screen, texture);
2689 va += surflevel[0].offset;
2690 view->state[0] = va >> 8;
2691 view->state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) |
2692 S_008F14_DATA_FORMAT(format) |
2693 S_008F14_NUM_FORMAT(num_format));
2694 view->state[2] = (S_008F18_WIDTH(width - 1) |
2695 S_008F18_HEIGHT(height - 1));
2696 view->state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
2697 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
2698 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
2699 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
2700 S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ?
2701 0 : state->u.tex.first_level) |
2702 S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ?
2703 util_logbase2(texture->nr_samples) :
2704 state->u.tex.last_level) |
2705 S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, 0, false)) |
2706 S_008F1C_POW2_PAD(texture->last_level > 0) |
2707 S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples)));
2708 view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
2709 view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
2710 S_008F24_LAST_ARRAY(state->u.tex.last_layer));
2711 view->state[6] = 0;
2712 view->state[7] = 0;
2713
2714 /* Initialize the sampler view for FMASK. */
2715 if (tmp->fmask.size) {
2716 uint64_t va = r600_resource_va(ctx->screen, texture) + tmp->fmask.offset;
2717 uint32_t fmask_format;
2718
2719 switch (texture->nr_samples) {
2720 case 2:
2721 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
2722 break;
2723 case 4:
2724 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
2725 break;
2726 case 8:
2727 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
2728 break;
2729 default:
2730 assert(0);
2731 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
2732 }
2733
2734 view->fmask_state[0] = va >> 8;
2735 view->fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
2736 S_008F14_DATA_FORMAT(fmask_format) |
2737 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT);
2738 view->fmask_state[2] = S_008F18_WIDTH(width - 1) |
2739 S_008F18_HEIGHT(height - 1);
2740 view->fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
2741 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
2742 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
2743 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
2744 S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) |
2745 S_008F1C_TYPE(si_tex_dim(texture->target, 0));
2746 view->fmask_state[4] = S_008F20_PITCH(tmp->fmask.pitch - 1);
2747 view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
2748 S_008F24_LAST_ARRAY(state->u.tex.last_layer);
2749 view->fmask_state[6] = 0;
2750 view->fmask_state[7] = 0;
2751 }
2752
2753 return &view->base;
2754 }
2755
2756 static void si_sampler_view_destroy(struct pipe_context *ctx,
2757 struct pipe_sampler_view *state)
2758 {
2759 struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state;
2760
2761 pipe_resource_reference(&state->texture, NULL);
2762 FREE(resource);
2763 }
2764
2765 static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
2766 {
2767 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
2768 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
2769 (linear_filter &&
2770 (wrap == PIPE_TEX_WRAP_CLAMP ||
2771 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
2772 }
2773
2774 static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
2775 {
2776 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
2777 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
2778
2779 return (state->border_color.ui[0] || state->border_color.ui[1] ||
2780 state->border_color.ui[2] || state->border_color.ui[3]) &&
2781 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) ||
2782 wrap_mode_uses_border_color(state->wrap_t, linear_filter) ||
2783 wrap_mode_uses_border_color(state->wrap_r, linear_filter));
2784 }
2785
2786 static void *si_create_sampler_state(struct pipe_context *ctx,
2787 const struct pipe_sampler_state *state)
2788 {
2789 struct si_pipe_sampler_state *rstate = CALLOC_STRUCT(si_pipe_sampler_state);
2790 unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
2791 unsigned border_color_type;
2792
2793 if (rstate == NULL) {
2794 return NULL;
2795 }
2796
2797 if (sampler_state_needs_border_color(state))
2798 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
2799 else
2800 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2801
2802 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
2803 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
2804 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
2805 (state->max_anisotropy & 0x7) << 9 | /* XXX */
2806 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
2807 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
2808 aniso_flag_offset << 16 | /* XXX */
2809 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map));
2810 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
2811 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)));
2812 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
2813 S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter)) |
2814 S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter)) |
2815 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
2816 rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type);
2817
2818 if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
2819 memcpy(rstate->border_color, state->border_color.ui,
2820 sizeof(rstate->border_color));
2821 }
2822
2823 return rstate;
2824 }
2825
2826 /* XXX consider moving this function to si_descriptors.c for gcc to inline
2827 * the si_set_sampler_view calls. LTO might help too. */
2828 static struct si_pm4_state *si_set_sampler_views(struct r600_context *rctx,
2829 unsigned shader, unsigned count,
2830 struct pipe_sampler_view **views)
2831 {
2832 struct r600_textures_info *samplers = &rctx->samplers[shader];
2833 struct si_pipe_sampler_view **rviews = (struct si_pipe_sampler_view **)views;
2834 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
2835 int i;
2836
2837 si_pm4_inval_texture_cache(pm4);
2838
2839 for (i = 0; i < count; i++) {
2840 if (views[i]) {
2841 struct r600_texture *rtex =
2842 (struct r600_texture*)views[i]->texture;
2843
2844 if (rtex->is_depth && !rtex->is_flushing_texture) {
2845 samplers->depth_texture_mask |= 1 << i;
2846 } else {
2847 samplers->depth_texture_mask &= ~(1 << i);
2848 }
2849 if (rtex->cmask.size || rtex->fmask.size) {
2850 samplers->compressed_colortex_mask |= 1 << i;
2851 } else {
2852 samplers->compressed_colortex_mask &= ~(1 << i);
2853 }
2854
2855 si_set_sampler_view(rctx, shader, i, views[i], rviews[i]->state);
2856
2857 if (rtex->fmask.size) {
2858 si_set_sampler_view(rctx, shader, FMASK_TEX_OFFSET + i,
2859 views[i], rviews[i]->fmask_state);
2860 } else {
2861 si_set_sampler_view(rctx, shader, FMASK_TEX_OFFSET + i,
2862 NULL, NULL);
2863 }
2864 } else {
2865 samplers->depth_texture_mask &= ~(1 << i);
2866 samplers->compressed_colortex_mask &= ~(1 << i);
2867 si_set_sampler_view(rctx, shader, i, NULL, NULL);
2868 si_set_sampler_view(rctx, shader, FMASK_TEX_OFFSET + i,
2869 NULL, NULL);
2870 }
2871 }
2872 for (; i < samplers->n_views; i++) {
2873 samplers->depth_texture_mask &= ~(1 << i);
2874 samplers->compressed_colortex_mask &= ~(1 << i);
2875 si_set_sampler_view(rctx, shader, i, NULL, NULL);
2876 si_set_sampler_view(rctx, shader, FMASK_TEX_OFFSET + i,
2877 NULL, NULL);
2878 }
2879
2880 samplers->n_views = count;
2881 return pm4;
2882 }
2883
2884 static void si_set_vs_sampler_views(struct pipe_context *ctx, unsigned count,
2885 struct pipe_sampler_view **views)
2886 {
2887 struct r600_context *rctx = (struct r600_context *)ctx;
2888 struct si_pm4_state *pm4;
2889
2890 pm4 = si_set_sampler_views(rctx, PIPE_SHADER_VERTEX, count, views);
2891 si_pm4_set_state(rctx, vs_sampler_views, pm4);
2892 }
2893
2894 static void si_set_ps_sampler_views(struct pipe_context *ctx, unsigned count,
2895 struct pipe_sampler_view **views)
2896 {
2897 struct r600_context *rctx = (struct r600_context *)ctx;
2898 struct si_pm4_state *pm4;
2899
2900 pm4 = si_set_sampler_views(rctx, PIPE_SHADER_FRAGMENT, count, views);
2901 si_pm4_set_state(rctx, ps_sampler_views, pm4);
2902 }
2903
2904 static struct si_pm4_state *si_bind_sampler_states(struct r600_context *rctx, unsigned count,
2905 void **states,
2906 struct r600_textures_info *samplers,
2907 unsigned user_data_reg)
2908 {
2909 struct si_pipe_sampler_state **rstates = (struct si_pipe_sampler_state **)states;
2910 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
2911 uint32_t *border_color_table = NULL;
2912 int i, j;
2913
2914 if (!count)
2915 goto out;
2916
2917 si_pm4_inval_texture_cache(pm4);
2918
2919 si_pm4_sh_data_begin(pm4);
2920 for (i = 0; i < count; i++) {
2921 if (rstates[i] &&
2922 G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) ==
2923 V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
2924 if (!rctx->border_color_table ||
2925 ((rctx->border_color_offset + count - i) &
2926 C_008F3C_BORDER_COLOR_PTR)) {
2927 si_resource_reference(&rctx->border_color_table, NULL);
2928 rctx->border_color_offset = 0;
2929
2930 rctx->border_color_table =
2931 si_resource_create_custom(&rctx->screen->screen,
2932 PIPE_USAGE_STAGING,
2933 4096 * 4 * 4);
2934 }
2935
2936 if (!border_color_table) {
2937 border_color_table =
2938 rctx->ws->buffer_map(rctx->border_color_table->cs_buf,
2939 rctx->cs,
2940 PIPE_TRANSFER_WRITE |
2941 PIPE_TRANSFER_UNSYNCHRONIZED);
2942 }
2943
2944 for (j = 0; j < 4; j++) {
2945 border_color_table[4 * rctx->border_color_offset + j] =
2946 util_le32_to_cpu(rstates[i]->border_color[j]);
2947 }
2948
2949 rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR;
2950 rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(rctx->border_color_offset++);
2951 }
2952
2953 for (j = 0; j < Elements(rstates[i]->val); ++j) {
2954 si_pm4_sh_data_add(pm4, rstates[i] ? rstates[i]->val[j] : 0);
2955 }
2956 }
2957 si_pm4_sh_data_end(pm4, user_data_reg, SI_SGPR_SAMPLER);
2958
2959 if (border_color_table) {
2960 uint64_t va_offset =
2961 r600_resource_va(&rctx->screen->screen,
2962 (void*)rctx->border_color_table);
2963
2964 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8);
2965 if (rctx->chip_class >= CIK)
2966 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40);
2967 rctx->ws->buffer_unmap(rctx->border_color_table->cs_buf);
2968 si_pm4_add_bo(pm4, rctx->border_color_table, RADEON_USAGE_READ);
2969 }
2970
2971 memcpy(samplers->samplers, states, sizeof(void*) * count);
2972
2973 out:
2974 samplers->n_samplers = count;
2975 return pm4;
2976 }
2977
2978 static void si_bind_vs_sampler_states(struct pipe_context *ctx, unsigned count, void **states)
2979 {
2980 struct r600_context *rctx = (struct r600_context *)ctx;
2981 struct si_pm4_state *pm4;
2982
2983 pm4 = si_bind_sampler_states(rctx, count, states, &rctx->samplers[PIPE_SHADER_VERTEX],
2984 R_00B130_SPI_SHADER_USER_DATA_VS_0);
2985 si_pm4_set_state(rctx, vs_sampler, pm4);
2986 }
2987
2988 static void si_bind_ps_sampler_states(struct pipe_context *ctx, unsigned count, void **states)
2989 {
2990 struct r600_context *rctx = (struct r600_context *)ctx;
2991 struct si_pm4_state *pm4;
2992
2993 pm4 = si_bind_sampler_states(rctx, count, states, &rctx->samplers[PIPE_SHADER_FRAGMENT],
2994 R_00B030_SPI_SHADER_USER_DATA_PS_0);
2995 si_pm4_set_state(rctx, ps_sampler, pm4);
2996 }
2997
2998 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
2999 {
3000 struct r600_context *rctx = (struct r600_context *)ctx;
3001 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
3002 uint16_t mask = sample_mask;
3003
3004 if (pm4 == NULL)
3005 return;
3006
3007 si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16));
3008 si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16));
3009
3010 si_pm4_set_state(rctx, sample_mask, pm4);
3011 }
3012
3013 static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
3014 {
3015 free(state);
3016 }
3017
3018 /*
3019 * Constants
3020 */
3021 static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
3022 struct pipe_constant_buffer *input)
3023 {
3024 struct r600_context *rctx = (struct r600_context *)ctx;
3025 struct r600_constbuf_state *state = &rctx->constbuf_state[shader];
3026 struct pipe_constant_buffer *cb;
3027 const uint8_t *ptr;
3028
3029 /* Note that the state tracker can unbind constant buffers by
3030 * passing NULL here.
3031 */
3032 if (unlikely(!input || (!input->buffer && !input->user_buffer))) {
3033 state->enabled_mask &= ~(1 << index);
3034 state->dirty_mask &= ~(1 << index);
3035 pipe_resource_reference(&state->cb[index].buffer, NULL);
3036 return;
3037 }
3038
3039 cb = &state->cb[index];
3040 cb->buffer_size = input->buffer_size;
3041
3042 ptr = input->user_buffer;
3043
3044 if (ptr) {
3045 r600_upload_const_buffer(rctx,
3046 (struct si_resource**)&cb->buffer, ptr,
3047 cb->buffer_size, &cb->buffer_offset);
3048 } else {
3049 /* Setup the hw buffer. */
3050 cb->buffer_offset = input->buffer_offset;
3051 pipe_resource_reference(&cb->buffer, input->buffer);
3052 }
3053
3054 state->enabled_mask |= 1 << index;
3055 state->dirty_mask |= 1 << index;
3056 }
3057
3058 /*
3059 * Vertex elements & buffers
3060 */
3061
3062 static void *si_create_vertex_elements(struct pipe_context *ctx,
3063 unsigned count,
3064 const struct pipe_vertex_element *elements)
3065 {
3066 struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
3067 int i;
3068
3069 assert(count < PIPE_MAX_ATTRIBS);
3070 if (!v)
3071 return NULL;
3072
3073 v->count = count;
3074 for (i = 0; i < count; ++i) {
3075 const struct util_format_description *desc;
3076 unsigned data_format, num_format;
3077 int first_non_void;
3078
3079 desc = util_format_description(elements[i].src_format);
3080 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
3081 data_format = si_translate_vertexformat(ctx->screen, elements[i].src_format,
3082 desc, first_non_void);
3083
3084 switch (desc->channel[first_non_void].type) {
3085 case UTIL_FORMAT_TYPE_FIXED:
3086 num_format = V_008F0C_BUF_NUM_FORMAT_USCALED; /* XXX */
3087 break;
3088 case UTIL_FORMAT_TYPE_SIGNED:
3089 if (desc->channel[first_non_void].normalized)
3090 num_format = V_008F0C_BUF_NUM_FORMAT_SNORM;
3091 else if (desc->channel[first_non_void].pure_integer)
3092 num_format = V_008F0C_BUF_NUM_FORMAT_SINT;
3093 else
3094 num_format = V_008F0C_BUF_NUM_FORMAT_SSCALED;
3095 break;
3096 case UTIL_FORMAT_TYPE_UNSIGNED:
3097 if (desc->channel[first_non_void].normalized)
3098 num_format = V_008F0C_BUF_NUM_FORMAT_UNORM;
3099 else if (desc->channel[first_non_void].pure_integer)
3100 num_format = V_008F0C_BUF_NUM_FORMAT_UINT;
3101 else
3102 num_format = V_008F0C_BUF_NUM_FORMAT_USCALED;
3103 break;
3104 case UTIL_FORMAT_TYPE_FLOAT:
3105 default:
3106 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
3107 }
3108
3109 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
3110 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
3111 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
3112 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
3113 S_008F0C_NUM_FORMAT(num_format) |
3114 S_008F0C_DATA_FORMAT(data_format);
3115 }
3116 memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
3117
3118 return v;
3119 }
3120
3121 static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
3122 {
3123 struct r600_context *rctx = (struct r600_context *)ctx;
3124 struct si_vertex_element *v = (struct si_vertex_element*)state;
3125
3126 rctx->vertex_elements = v;
3127 }
3128
3129 static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
3130 {
3131 struct r600_context *rctx = (struct r600_context *)ctx;
3132
3133 if (rctx->vertex_elements == state)
3134 rctx->vertex_elements = NULL;
3135 FREE(state);
3136 }
3137
3138 static void si_set_vertex_buffers(struct pipe_context *ctx, unsigned start_slot, unsigned count,
3139 const struct pipe_vertex_buffer *buffers)
3140 {
3141 struct r600_context *rctx = (struct r600_context *)ctx;
3142
3143 util_set_vertex_buffers_count(rctx->vertex_buffer, &rctx->nr_vertex_buffers, buffers, start_slot, count);
3144 }
3145
3146 static void si_set_index_buffer(struct pipe_context *ctx,
3147 const struct pipe_index_buffer *ib)
3148 {
3149 struct r600_context *rctx = (struct r600_context *)ctx;
3150
3151 if (ib) {
3152 pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer);
3153 memcpy(&rctx->index_buffer, ib, sizeof(*ib));
3154 } else {
3155 pipe_resource_reference(&rctx->index_buffer.buffer, NULL);
3156 }
3157 }
3158
3159 /*
3160 * Misc
3161 */
3162 static void si_set_polygon_stipple(struct pipe_context *ctx,
3163 const struct pipe_poly_stipple *state)
3164 {
3165 }
3166
3167 static void si_texture_barrier(struct pipe_context *ctx)
3168 {
3169 struct r600_context *rctx = (struct r600_context *)ctx;
3170 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
3171
3172 if (pm4 == NULL)
3173 return;
3174
3175 si_pm4_inval_texture_cache(pm4);
3176 si_pm4_inval_fb_cache(pm4, rctx->framebuffer.nr_cbufs);
3177 si_pm4_set_state(rctx, texture_barrier, pm4);
3178 }
3179
3180 static void *si_create_blend_custom(struct r600_context *rctx, unsigned mode)
3181 {
3182 struct pipe_blend_state blend;
3183
3184 memset(&blend, 0, sizeof(blend));
3185 blend.independent_blend_enable = true;
3186 blend.rt[0].colormask = 0xf;
3187 return si_create_blend_state_mode(&rctx->context, &blend, mode);
3188 }
3189
3190 void si_init_state_functions(struct r600_context *rctx)
3191 {
3192 int i;
3193
3194 rctx->context.create_blend_state = si_create_blend_state;
3195 rctx->context.bind_blend_state = si_bind_blend_state;
3196 rctx->context.delete_blend_state = si_delete_blend_state;
3197 rctx->context.set_blend_color = si_set_blend_color;
3198
3199 rctx->context.create_rasterizer_state = si_create_rs_state;
3200 rctx->context.bind_rasterizer_state = si_bind_rs_state;
3201 rctx->context.delete_rasterizer_state = si_delete_rs_state;
3202
3203 rctx->context.create_depth_stencil_alpha_state = si_create_dsa_state;
3204 rctx->context.bind_depth_stencil_alpha_state = si_bind_dsa_state;
3205 rctx->context.delete_depth_stencil_alpha_state = si_delete_dsa_state;
3206
3207 for (i = 0; i < 8; i++) {
3208 rctx->custom_dsa_flush_depth_stencil[i] = si_create_db_flush_dsa(rctx, true, true, i);
3209 rctx->custom_dsa_flush_depth[i] = si_create_db_flush_dsa(rctx, true, false, i);
3210 rctx->custom_dsa_flush_stencil[i] = si_create_db_flush_dsa(rctx, false, true, i);
3211 }
3212 rctx->custom_dsa_flush_inplace = si_create_db_flush_dsa(rctx, false, false, 0);
3213 rctx->custom_blend_resolve = si_create_blend_custom(rctx, V_028808_CB_RESOLVE);
3214 rctx->custom_blend_decompress = si_create_blend_custom(rctx, V_028808_CB_FMASK_DECOMPRESS);
3215
3216 rctx->context.set_clip_state = si_set_clip_state;
3217 rctx->context.set_scissor_states = si_set_scissor_states;
3218 rctx->context.set_viewport_states = si_set_viewport_states;
3219 rctx->context.set_stencil_ref = si_set_pipe_stencil_ref;
3220
3221 rctx->context.set_framebuffer_state = si_set_framebuffer_state;
3222 rctx->context.get_sample_position = si_get_sample_position;
3223
3224 rctx->context.create_vs_state = si_create_vs_state;
3225 rctx->context.create_fs_state = si_create_fs_state;
3226 rctx->context.bind_vs_state = si_bind_vs_shader;
3227 rctx->context.bind_fs_state = si_bind_ps_shader;
3228 rctx->context.delete_vs_state = si_delete_vs_shader;
3229 rctx->context.delete_fs_state = si_delete_ps_shader;
3230
3231 rctx->context.create_sampler_state = si_create_sampler_state;
3232 rctx->context.bind_vertex_sampler_states = si_bind_vs_sampler_states;
3233 rctx->context.bind_fragment_sampler_states = si_bind_ps_sampler_states;
3234 rctx->context.delete_sampler_state = si_delete_sampler_state;
3235
3236 rctx->context.create_sampler_view = si_create_sampler_view;
3237 rctx->context.set_vertex_sampler_views = si_set_vs_sampler_views;
3238 rctx->context.set_fragment_sampler_views = si_set_ps_sampler_views;
3239 rctx->context.sampler_view_destroy = si_sampler_view_destroy;
3240
3241 rctx->context.set_sample_mask = si_set_sample_mask;
3242
3243 rctx->context.set_constant_buffer = si_set_constant_buffer;
3244
3245 rctx->context.create_vertex_elements_state = si_create_vertex_elements;
3246 rctx->context.bind_vertex_elements_state = si_bind_vertex_elements;
3247 rctx->context.delete_vertex_elements_state = si_delete_vertex_element;
3248 rctx->context.set_vertex_buffers = si_set_vertex_buffers;
3249 rctx->context.set_index_buffer = si_set_index_buffer;
3250
3251 rctx->context.create_stream_output_target = si_create_so_target;
3252 rctx->context.stream_output_target_destroy = si_so_target_destroy;
3253 rctx->context.set_stream_output_targets = si_set_so_targets;
3254
3255 rctx->context.texture_barrier = si_texture_barrier;
3256 rctx->context.set_polygon_stipple = si_set_polygon_stipple;
3257
3258 rctx->context.draw_vbo = si_draw_vbo;
3259 }
3260
3261 void si_init_config(struct r600_context *rctx)
3262 {
3263 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
3264
3265 if (pm4 == NULL)
3266 return;
3267
3268 si_cmd_context_control(pm4);
3269
3270 si_pm4_set_reg(pm4, R_028A4C_PA_SC_MODE_CNTL_1, 0x0);
3271
3272 si_pm4_set_reg(pm4, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0);
3273 si_pm4_set_reg(pm4, R_028A14_VGT_HOS_CNTL, 0x0);
3274 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0);
3275 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0);
3276 si_pm4_set_reg(pm4, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0);
3277 si_pm4_set_reg(pm4, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0);
3278 si_pm4_set_reg(pm4, R_028A28_VGT_GROUP_FIRST_DECR, 0x0);
3279 si_pm4_set_reg(pm4, R_028A2C_VGT_GROUP_DECR, 0x0);
3280 si_pm4_set_reg(pm4, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0);
3281 si_pm4_set_reg(pm4, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0);
3282 si_pm4_set_reg(pm4, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0);
3283 si_pm4_set_reg(pm4, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0);
3284 si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, 0x0);
3285 si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0x0);
3286 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
3287 si_pm4_set_reg(pm4, R_028B94_VGT_STRMOUT_CONFIG, 0x0);
3288 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
3289 si_pm4_set_reg(pm4, R_028AA8_IA_MULTI_VGT_PARAM,
3290 S_028AA8_SWITCH_ON_EOP(1) |
3291 S_028AA8_PARTIAL_VS_WAVE_ON(1) |
3292 S_028AA8_PRIMGROUP_SIZE(63));
3293 si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0x00000000);
3294 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
3295 if (rctx->chip_class < CIK)
3296 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
3297 S_008A14_CLIP_VTX_REORDER_ENA(1));
3298
3299 si_pm4_set_reg(pm4, R_028B54_VGT_SHADER_STAGES_EN, 0);
3300 si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
3301 si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
3302
3303 si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
3304
3305 if (rctx->chip_class >= CIK) {
3306 switch (rctx->screen->family) {
3307 case CHIP_BONAIRE:
3308 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012);
3309 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000);
3310 break;
3311 case CHIP_KAVERI:
3312 /* XXX todo */
3313 case CHIP_KABINI:
3314 /* XXX todo */
3315 default:
3316 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
3317 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000);
3318 break;
3319 }
3320 } else {
3321 switch (rctx->screen->family) {
3322 case CHIP_TAHITI:
3323 case CHIP_PITCAIRN:
3324 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x2a00126a);
3325 break;
3326 case CHIP_VERDE:
3327 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x0000124a);
3328 break;
3329 case CHIP_OLAND:
3330 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000082);
3331 break;
3332 case CHIP_HAINAN:
3333 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
3334 break;
3335 default:
3336 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
3337 break;
3338 }
3339 }
3340
3341 si_pm4_set_state(rctx, init, pm4);
3342 }