5cf520899cf040cafb57844006ebe55b301a3ad4
[mesa.git] / src / gallium / drivers / r600 / r600_state_common.c
1 /*
2 * Copyright 2010 Red Hat Inc.
3 * 2010 Jerome Glisse
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie <airlied@redhat.com>
25 * Jerome Glisse <jglisse@redhat.com>
26 */
27 #include "r600_formats.h"
28 #include "r600_shader.h"
29 #include "r600d.h"
30
31 #include "util/u_format_s3tc.h"
32 #include "util/u_index_modify.h"
33 #include "util/u_memory.h"
34 #include "util/u_upload_mgr.h"
35 #include "util/u_math.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_scan.h"
38
39 void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw)
40 {
41 assert(!cb->buf);
42 cb->buf = CALLOC(1, 4 * num_dw);
43 cb->max_num_dw = num_dw;
44 }
45
46 void r600_release_command_buffer(struct r600_command_buffer *cb)
47 {
48 FREE(cb->buf);
49 }
50
51 void r600_add_atom(struct r600_context *rctx,
52 struct r600_atom *atom,
53 unsigned id)
54 {
55 assert(id < R600_NUM_ATOMS);
56 assert(rctx->atoms[id] == NULL);
57 rctx->atoms[id] = atom;
58 atom->id = id;
59 }
60
61 void r600_init_atom(struct r600_context *rctx,
62 struct r600_atom *atom,
63 unsigned id,
64 void (*emit)(struct r600_context *ctx, struct r600_atom *state),
65 unsigned num_dw)
66 {
67 atom->emit = (void*)emit;
68 atom->num_dw = num_dw;
69 r600_add_atom(rctx, atom, id);
70 }
71
72 void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom)
73 {
74 r600_emit_command_buffer(rctx->b.gfx.cs, ((struct r600_cso_state*)atom)->cb);
75 }
76
77 void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom)
78 {
79 struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
80 struct r600_alphatest_state *a = (struct r600_alphatest_state*)atom;
81 unsigned alpha_ref = a->sx_alpha_ref;
82
83 if (rctx->b.chip_class >= EVERGREEN && a->cb0_export_16bpc) {
84 alpha_ref &= ~0x1FFF;
85 }
86
87 radeon_set_context_reg(cs, R_028410_SX_ALPHA_TEST_CONTROL,
88 a->sx_alpha_test_control |
89 S_028410_ALPHA_TEST_BYPASS(a->bypass));
90 radeon_set_context_reg(cs, R_028438_SX_ALPHA_REF, alpha_ref);
91 }
92
93 static void r600_texture_barrier(struct pipe_context *ctx)
94 {
95 struct r600_context *rctx = (struct r600_context *)ctx;
96
97 rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
98 R600_CONTEXT_FLUSH_AND_INV_CB |
99 R600_CONTEXT_FLUSH_AND_INV |
100 R600_CONTEXT_WAIT_3D_IDLE;
101 }
102
103 static unsigned r600_conv_pipe_prim(unsigned prim)
104 {
105 static const unsigned prim_conv[] = {
106 [PIPE_PRIM_POINTS] = V_008958_DI_PT_POINTLIST,
107 [PIPE_PRIM_LINES] = V_008958_DI_PT_LINELIST,
108 [PIPE_PRIM_LINE_LOOP] = V_008958_DI_PT_LINELOOP,
109 [PIPE_PRIM_LINE_STRIP] = V_008958_DI_PT_LINESTRIP,
110 [PIPE_PRIM_TRIANGLES] = V_008958_DI_PT_TRILIST,
111 [PIPE_PRIM_TRIANGLE_STRIP] = V_008958_DI_PT_TRISTRIP,
112 [PIPE_PRIM_TRIANGLE_FAN] = V_008958_DI_PT_TRIFAN,
113 [PIPE_PRIM_QUADS] = V_008958_DI_PT_QUADLIST,
114 [PIPE_PRIM_QUAD_STRIP] = V_008958_DI_PT_QUADSTRIP,
115 [PIPE_PRIM_POLYGON] = V_008958_DI_PT_POLYGON,
116 [PIPE_PRIM_LINES_ADJACENCY] = V_008958_DI_PT_LINELIST_ADJ,
117 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_008958_DI_PT_LINESTRIP_ADJ,
118 [PIPE_PRIM_TRIANGLES_ADJACENCY] = V_008958_DI_PT_TRILIST_ADJ,
119 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_008958_DI_PT_TRISTRIP_ADJ,
120 [R600_PRIM_RECTANGLE_LIST] = V_008958_DI_PT_RECTLIST
121 };
122 assert(prim < Elements(prim_conv));
123 return prim_conv[prim];
124 }
125
126 unsigned r600_conv_prim_to_gs_out(unsigned mode)
127 {
128 static const int prim_conv[] = {
129 [PIPE_PRIM_POINTS] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
130 [PIPE_PRIM_LINES] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
131 [PIPE_PRIM_LINE_LOOP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
132 [PIPE_PRIM_LINE_STRIP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
133 [PIPE_PRIM_TRIANGLES] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
134 [PIPE_PRIM_TRIANGLE_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
135 [PIPE_PRIM_TRIANGLE_FAN] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
136 [PIPE_PRIM_QUADS] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
137 [PIPE_PRIM_QUAD_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
138 [PIPE_PRIM_POLYGON] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
139 [PIPE_PRIM_LINES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
140 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
141 [PIPE_PRIM_TRIANGLES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
142 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
143 [PIPE_PRIM_PATCHES] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
144 [R600_PRIM_RECTANGLE_LIST] = V_028A6C_OUTPRIM_TYPE_TRISTRIP
145 };
146 assert(mode < Elements(prim_conv));
147
148 return prim_conv[mode];
149 }
150
151 /* common state between evergreen and r600 */
152
153 static void r600_bind_blend_state_internal(struct r600_context *rctx,
154 struct r600_blend_state *blend, bool blend_disable)
155 {
156 unsigned color_control;
157 bool update_cb = false;
158
159 rctx->alpha_to_one = blend->alpha_to_one;
160 rctx->dual_src_blend = blend->dual_src_blend;
161
162 if (!blend_disable) {
163 r600_set_cso_state_with_cb(rctx, &rctx->blend_state, blend, &blend->buffer);
164 color_control = blend->cb_color_control;
165 } else {
166 /* Blending is disabled. */
167 r600_set_cso_state_with_cb(rctx, &rctx->blend_state, blend, &blend->buffer_no_blend);
168 color_control = blend->cb_color_control_no_blend;
169 }
170
171 /* Update derived states. */
172 if (rctx->cb_misc_state.blend_colormask != blend->cb_target_mask) {
173 rctx->cb_misc_state.blend_colormask = blend->cb_target_mask;
174 update_cb = true;
175 }
176 if (rctx->b.chip_class <= R700 &&
177 rctx->cb_misc_state.cb_color_control != color_control) {
178 rctx->cb_misc_state.cb_color_control = color_control;
179 update_cb = true;
180 }
181 if (rctx->cb_misc_state.dual_src_blend != blend->dual_src_blend) {
182 rctx->cb_misc_state.dual_src_blend = blend->dual_src_blend;
183 update_cb = true;
184 }
185 if (update_cb) {
186 r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
187 }
188 }
189
190 static void r600_bind_blend_state(struct pipe_context *ctx, void *state)
191 {
192 struct r600_context *rctx = (struct r600_context *)ctx;
193 struct r600_blend_state *blend = (struct r600_blend_state *)state;
194
195 if (blend == NULL) {
196 r600_set_cso_state_with_cb(rctx, &rctx->blend_state, NULL, NULL);
197 return;
198 }
199
200 r600_bind_blend_state_internal(rctx, blend, rctx->force_blend_disable);
201 }
202
203 static void r600_set_blend_color(struct pipe_context *ctx,
204 const struct pipe_blend_color *state)
205 {
206 struct r600_context *rctx = (struct r600_context *)ctx;
207
208 rctx->blend_color.state = *state;
209 r600_mark_atom_dirty(rctx, &rctx->blend_color.atom);
210 }
211
212 void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
213 {
214 struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
215 struct pipe_blend_color *state = &rctx->blend_color.state;
216
217 radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
218 radeon_emit(cs, fui(state->color[0])); /* R_028414_CB_BLEND_RED */
219 radeon_emit(cs, fui(state->color[1])); /* R_028418_CB_BLEND_GREEN */
220 radeon_emit(cs, fui(state->color[2])); /* R_02841C_CB_BLEND_BLUE */
221 radeon_emit(cs, fui(state->color[3])); /* R_028420_CB_BLEND_ALPHA */
222 }
223
224 void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom)
225 {
226 struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
227 struct r600_vgt_state *a = (struct r600_vgt_state *)atom;
228
229 radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en);
230 radeon_set_context_reg_seq(cs, R_028408_VGT_INDX_OFFSET, 2);
231 radeon_emit(cs, a->vgt_indx_offset); /* R_028408_VGT_INDX_OFFSET */
232 radeon_emit(cs, a->vgt_multi_prim_ib_reset_indx); /* R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX */
233 if (a->last_draw_was_indirect) {
234 a->last_draw_was_indirect = false;
235 radeon_set_ctl_const(cs, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
236 }
237 }
238
239 static void r600_set_clip_state(struct pipe_context *ctx,
240 const struct pipe_clip_state *state)
241 {
242 struct r600_context *rctx = (struct r600_context *)ctx;
243
244 rctx->clip_state.state = *state;
245 r600_mark_atom_dirty(rctx, &rctx->clip_state.atom);
246 rctx->driver_consts[PIPE_SHADER_VERTEX].vs_ucp_dirty = true;
247 }
248
249 static void r600_set_stencil_ref(struct pipe_context *ctx,
250 const struct r600_stencil_ref *state)
251 {
252 struct r600_context *rctx = (struct r600_context *)ctx;
253
254 rctx->stencil_ref.state = *state;
255 r600_mark_atom_dirty(rctx, &rctx->stencil_ref.atom);
256 }
257
258 void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom)
259 {
260 struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
261 struct r600_stencil_ref_state *a = (struct r600_stencil_ref_state*)atom;
262
263 radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
264 radeon_emit(cs, /* R_028430_DB_STENCILREFMASK */
265 S_028430_STENCILREF(a->state.ref_value[0]) |
266 S_028430_STENCILMASK(a->state.valuemask[0]) |
267 S_028430_STENCILWRITEMASK(a->state.writemask[0]));
268 radeon_emit(cs, /* R_028434_DB_STENCILREFMASK_BF */
269 S_028434_STENCILREF_BF(a->state.ref_value[1]) |
270 S_028434_STENCILMASK_BF(a->state.valuemask[1]) |
271 S_028434_STENCILWRITEMASK_BF(a->state.writemask[1]));
272 }
273
274 static void r600_set_pipe_stencil_ref(struct pipe_context *ctx,
275 const struct pipe_stencil_ref *state)
276 {
277 struct r600_context *rctx = (struct r600_context *)ctx;
278 struct r600_dsa_state *dsa = (struct r600_dsa_state*)rctx->dsa_state.cso;
279 struct r600_stencil_ref ref;
280
281 rctx->stencil_ref.pipe_state = *state;
282
283 if (!dsa)
284 return;
285
286 ref.ref_value[0] = state->ref_value[0];
287 ref.ref_value[1] = state->ref_value[1];
288 ref.valuemask[0] = dsa->valuemask[0];
289 ref.valuemask[1] = dsa->valuemask[1];
290 ref.writemask[0] = dsa->writemask[0];
291 ref.writemask[1] = dsa->writemask[1];
292
293 r600_set_stencil_ref(ctx, &ref);
294 }
295
296 static void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
297 {
298 struct r600_context *rctx = (struct r600_context *)ctx;
299 struct r600_dsa_state *dsa = state;
300 struct r600_stencil_ref ref;
301
302 if (state == NULL) {
303 r600_set_cso_state_with_cb(rctx, &rctx->dsa_state, NULL, NULL);
304 return;
305 }
306
307 r600_set_cso_state_with_cb(rctx, &rctx->dsa_state, dsa, &dsa->buffer);
308
309 ref.ref_value[0] = rctx->stencil_ref.pipe_state.ref_value[0];
310 ref.ref_value[1] = rctx->stencil_ref.pipe_state.ref_value[1];
311 ref.valuemask[0] = dsa->valuemask[0];
312 ref.valuemask[1] = dsa->valuemask[1];
313 ref.writemask[0] = dsa->writemask[0];
314 ref.writemask[1] = dsa->writemask[1];
315 if (rctx->zwritemask != dsa->zwritemask) {
316 rctx->zwritemask = dsa->zwritemask;
317 if (rctx->b.chip_class >= EVERGREEN) {
318 /* work around some issue when not writing to zbuffer
319 * we are having lockup on evergreen so do not enable
320 * hyperz when not writing zbuffer
321 */
322 r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
323 }
324 }
325
326 r600_set_stencil_ref(ctx, &ref);
327
328 /* Update alphatest state. */
329 if (rctx->alphatest_state.sx_alpha_test_control != dsa->sx_alpha_test_control ||
330 rctx->alphatest_state.sx_alpha_ref != dsa->alpha_ref) {
331 rctx->alphatest_state.sx_alpha_test_control = dsa->sx_alpha_test_control;
332 rctx->alphatest_state.sx_alpha_ref = dsa->alpha_ref;
333 r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
334 }
335 }
336
337 static void r600_bind_rs_state(struct pipe_context *ctx, void *state)
338 {
339 struct r600_rasterizer_state *rs = (struct r600_rasterizer_state *)state;
340 struct r600_context *rctx = (struct r600_context *)ctx;
341
342 if (state == NULL)
343 return;
344
345 rctx->rasterizer = rs;
346
347 r600_set_cso_state_with_cb(rctx, &rctx->rasterizer_state, rs, &rs->buffer);
348
349 if (rs->offset_enable &&
350 (rs->offset_units != rctx->poly_offset_state.offset_units ||
351 rs->offset_scale != rctx->poly_offset_state.offset_scale)) {
352 rctx->poly_offset_state.offset_units = rs->offset_units;
353 rctx->poly_offset_state.offset_scale = rs->offset_scale;
354 r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom);
355 }
356
357 /* Update clip_misc_state. */
358 if (rctx->clip_misc_state.pa_cl_clip_cntl != rs->pa_cl_clip_cntl ||
359 rctx->clip_misc_state.clip_plane_enable != rs->clip_plane_enable) {
360 rctx->clip_misc_state.pa_cl_clip_cntl = rs->pa_cl_clip_cntl;
361 rctx->clip_misc_state.clip_plane_enable = rs->clip_plane_enable;
362 r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
363 }
364
365 /* Workaround for a missing scissor enable on r600. */
366 if (rctx->b.chip_class == R600 &&
367 rs->scissor_enable != rctx->scissor.enable) {
368 rctx->scissor.enable = rs->scissor_enable;
369 rctx->scissor.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
370 rctx->scissor.atom.num_dw = R600_MAX_VIEWPORTS * 4;
371 r600_mark_atom_dirty(rctx, &rctx->scissor.atom);
372 }
373
374 /* Re-emit PA_SC_LINE_STIPPLE. */
375 rctx->last_primitive_type = -1;
376 }
377
378 static void r600_delete_rs_state(struct pipe_context *ctx, void *state)
379 {
380 struct r600_rasterizer_state *rs = (struct r600_rasterizer_state *)state;
381
382 r600_release_command_buffer(&rs->buffer);
383 FREE(rs);
384 }
385
386 static void r600_sampler_view_destroy(struct pipe_context *ctx,
387 struct pipe_sampler_view *state)
388 {
389 struct r600_pipe_sampler_view *view = (struct r600_pipe_sampler_view *)state;
390
391 if (view->tex_resource->gpu_address &&
392 view->tex_resource->b.b.target == PIPE_BUFFER)
393 LIST_DELINIT(&view->list);
394
395 pipe_resource_reference(&state->texture, NULL);
396 FREE(view);
397 }
398
399 void r600_sampler_states_dirty(struct r600_context *rctx,
400 struct r600_sampler_states *state)
401 {
402 if (state->dirty_mask) {
403 if (state->dirty_mask & state->has_bordercolor_mask) {
404 rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
405 }
406 state->atom.num_dw =
407 util_bitcount(state->dirty_mask & state->has_bordercolor_mask) * 11 +
408 util_bitcount(state->dirty_mask & ~state->has_bordercolor_mask) * 5;
409 r600_mark_atom_dirty(rctx, &state->atom);
410 }
411 }
412
413 static void r600_bind_sampler_states(struct pipe_context *pipe,
414 unsigned shader,
415 unsigned start,
416 unsigned count, void **states)
417 {
418 struct r600_context *rctx = (struct r600_context *)pipe;
419 struct r600_textures_info *dst = &rctx->samplers[shader];
420 struct r600_pipe_sampler_state **rstates = (struct r600_pipe_sampler_state**)states;
421 int seamless_cube_map = -1;
422 unsigned i;
423 /* This sets 1-bit for states with index >= count. */
424 uint32_t disable_mask = ~((1ull << count) - 1);
425 /* These are the new states set by this function. */
426 uint32_t new_mask = 0;
427
428 assert(start == 0); /* XXX fix below */
429
430 if (!states) {
431 disable_mask = ~0u;
432 count = 0;
433 }
434
435 for (i = 0; i < count; i++) {
436 struct r600_pipe_sampler_state *rstate = rstates[i];
437
438 if (rstate == dst->states.states[i]) {
439 continue;
440 }
441
442 if (rstate) {
443 if (rstate->border_color_use) {
444 dst->states.has_bordercolor_mask |= 1 << i;
445 } else {
446 dst->states.has_bordercolor_mask &= ~(1 << i);
447 }
448 seamless_cube_map = rstate->seamless_cube_map;
449
450 new_mask |= 1 << i;
451 } else {
452 disable_mask |= 1 << i;
453 }
454 }
455
456 memcpy(dst->states.states, rstates, sizeof(void*) * count);
457 memset(dst->states.states + count, 0, sizeof(void*) * (NUM_TEX_UNITS - count));
458
459 dst->states.enabled_mask &= ~disable_mask;
460 dst->states.dirty_mask &= dst->states.enabled_mask;
461 dst->states.enabled_mask |= new_mask;
462 dst->states.dirty_mask |= new_mask;
463 dst->states.has_bordercolor_mask &= dst->states.enabled_mask;
464
465 r600_sampler_states_dirty(rctx, &dst->states);
466
467 /* Seamless cubemap state. */
468 if (rctx->b.chip_class <= R700 &&
469 seamless_cube_map != -1 &&
470 seamless_cube_map != rctx->seamless_cube_map.enabled) {
471 /* change in TA_CNTL_AUX need a pipeline flush */
472 rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
473 rctx->seamless_cube_map.enabled = seamless_cube_map;
474 r600_mark_atom_dirty(rctx, &rctx->seamless_cube_map.atom);
475 }
476 }
477
478 static void r600_delete_sampler_state(struct pipe_context *ctx, void *state)
479 {
480 free(state);
481 }
482
483 static void r600_delete_blend_state(struct pipe_context *ctx, void *state)
484 {
485 struct r600_context *rctx = (struct r600_context *)ctx;
486 struct r600_blend_state *blend = (struct r600_blend_state*)state;
487
488 if (rctx->blend_state.cso == state) {
489 ctx->bind_blend_state(ctx, NULL);
490 }
491
492 r600_release_command_buffer(&blend->buffer);
493 r600_release_command_buffer(&blend->buffer_no_blend);
494 FREE(blend);
495 }
496
497 static void r600_delete_dsa_state(struct pipe_context *ctx, void *state)
498 {
499 struct r600_context *rctx = (struct r600_context *)ctx;
500 struct r600_dsa_state *dsa = (struct r600_dsa_state *)state;
501
502 if (rctx->dsa_state.cso == state) {
503 ctx->bind_depth_stencil_alpha_state(ctx, NULL);
504 }
505
506 r600_release_command_buffer(&dsa->buffer);
507 free(dsa);
508 }
509
510 static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
511 {
512 struct r600_context *rctx = (struct r600_context *)ctx;
513
514 r600_set_cso_state(rctx, &rctx->vertex_fetch_shader, state);
515 }
516
517 static void r600_delete_vertex_elements(struct pipe_context *ctx, void *state)
518 {
519 struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state;
520 pipe_resource_reference((struct pipe_resource**)&shader->buffer, NULL);
521 FREE(shader);
522 }
523
524 static void r600_set_index_buffer(struct pipe_context *ctx,
525 const struct pipe_index_buffer *ib)
526 {
527 struct r600_context *rctx = (struct r600_context *)ctx;
528
529 if (ib) {
530 pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer);
531 memcpy(&rctx->index_buffer, ib, sizeof(*ib));
532 r600_context_add_resource_size(ctx, ib->buffer);
533 } else {
534 pipe_resource_reference(&rctx->index_buffer.buffer, NULL);
535 }
536 }
537
538 void r600_vertex_buffers_dirty(struct r600_context *rctx)
539 {
540 if (rctx->vertex_buffer_state.dirty_mask) {
541 rctx->b.flags |= R600_CONTEXT_INV_VERTEX_CACHE;
542 rctx->vertex_buffer_state.atom.num_dw = (rctx->b.chip_class >= EVERGREEN ? 12 : 11) *
543 util_bitcount(rctx->vertex_buffer_state.dirty_mask);
544 r600_mark_atom_dirty(rctx, &rctx->vertex_buffer_state.atom);
545 }
546 }
547
548 static void r600_set_vertex_buffers(struct pipe_context *ctx,
549 unsigned start_slot, unsigned count,
550 const struct pipe_vertex_buffer *input)
551 {
552 struct r600_context *rctx = (struct r600_context *)ctx;
553 struct r600_vertexbuf_state *state = &rctx->vertex_buffer_state;
554 struct pipe_vertex_buffer *vb = state->vb + start_slot;
555 unsigned i;
556 uint32_t disable_mask = 0;
557 /* These are the new buffers set by this function. */
558 uint32_t new_buffer_mask = 0;
559
560 /* Set vertex buffers. */
561 if (input) {
562 for (i = 0; i < count; i++) {
563 if (memcmp(&input[i], &vb[i], sizeof(struct pipe_vertex_buffer))) {
564 if (input[i].buffer) {
565 vb[i].stride = input[i].stride;
566 vb[i].buffer_offset = input[i].buffer_offset;
567 pipe_resource_reference(&vb[i].buffer, input[i].buffer);
568 new_buffer_mask |= 1 << i;
569 r600_context_add_resource_size(ctx, input[i].buffer);
570 } else {
571 pipe_resource_reference(&vb[i].buffer, NULL);
572 disable_mask |= 1 << i;
573 }
574 }
575 }
576 } else {
577 for (i = 0; i < count; i++) {
578 pipe_resource_reference(&vb[i].buffer, NULL);
579 }
580 disable_mask = ((1ull << count) - 1);
581 }
582
583 disable_mask <<= start_slot;
584 new_buffer_mask <<= start_slot;
585
586 rctx->vertex_buffer_state.enabled_mask &= ~disable_mask;
587 rctx->vertex_buffer_state.dirty_mask &= rctx->vertex_buffer_state.enabled_mask;
588 rctx->vertex_buffer_state.enabled_mask |= new_buffer_mask;
589 rctx->vertex_buffer_state.dirty_mask |= new_buffer_mask;
590
591 r600_vertex_buffers_dirty(rctx);
592 }
593
594 void r600_sampler_views_dirty(struct r600_context *rctx,
595 struct r600_samplerview_state *state)
596 {
597 if (state->dirty_mask) {
598 rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE;
599 state->atom.num_dw = (rctx->b.chip_class >= EVERGREEN ? 14 : 13) *
600 util_bitcount(state->dirty_mask);
601 r600_mark_atom_dirty(rctx, &state->atom);
602 }
603 }
604
605 static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
606 unsigned start, unsigned count,
607 struct pipe_sampler_view **views)
608 {
609 struct r600_context *rctx = (struct r600_context *) pipe;
610 struct r600_textures_info *dst = &rctx->samplers[shader];
611 struct r600_pipe_sampler_view **rviews = (struct r600_pipe_sampler_view **)views;
612 uint32_t dirty_sampler_states_mask = 0;
613 unsigned i;
614 /* This sets 1-bit for textures with index >= count. */
615 uint32_t disable_mask = ~((1ull << count) - 1);
616 /* These are the new textures set by this function. */
617 uint32_t new_mask = 0;
618
619 /* Set textures with index >= count to NULL. */
620 uint32_t remaining_mask;
621
622 assert(start == 0); /* XXX fix below */
623
624 if (!views) {
625 disable_mask = ~0u;
626 count = 0;
627 }
628
629 remaining_mask = dst->views.enabled_mask & disable_mask;
630
631 while (remaining_mask) {
632 i = u_bit_scan(&remaining_mask);
633 assert(dst->views.views[i]);
634
635 pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], NULL);
636 }
637
638 for (i = 0; i < count; i++) {
639 if (rviews[i] == dst->views.views[i]) {
640 continue;
641 }
642
643 if (rviews[i]) {
644 struct r600_texture *rtex =
645 (struct r600_texture*)rviews[i]->base.texture;
646
647 if (rviews[i]->base.texture->target != PIPE_BUFFER) {
648 if (rtex->is_depth && !rtex->is_flushing_texture) {
649 dst->views.compressed_depthtex_mask |= 1 << i;
650 } else {
651 dst->views.compressed_depthtex_mask &= ~(1 << i);
652 }
653
654 /* Track compressed colorbuffers. */
655 if (rtex->cmask.size) {
656 dst->views.compressed_colortex_mask |= 1 << i;
657 } else {
658 dst->views.compressed_colortex_mask &= ~(1 << i);
659 }
660 }
661 /* Changing from array to non-arrays textures and vice versa requires
662 * updating TEX_ARRAY_OVERRIDE in sampler states on R6xx-R7xx. */
663 if (rctx->b.chip_class <= R700 &&
664 (dst->states.enabled_mask & (1 << i)) &&
665 (rviews[i]->base.texture->target == PIPE_TEXTURE_1D_ARRAY ||
666 rviews[i]->base.texture->target == PIPE_TEXTURE_2D_ARRAY) != dst->is_array_sampler[i]) {
667 dirty_sampler_states_mask |= 1 << i;
668 }
669
670 pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], views[i]);
671 new_mask |= 1 << i;
672 r600_context_add_resource_size(pipe, views[i]->texture);
673 } else {
674 pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], NULL);
675 disable_mask |= 1 << i;
676 }
677 }
678
679 dst->views.enabled_mask &= ~disable_mask;
680 dst->views.dirty_mask &= dst->views.enabled_mask;
681 dst->views.enabled_mask |= new_mask;
682 dst->views.dirty_mask |= new_mask;
683 dst->views.compressed_depthtex_mask &= dst->views.enabled_mask;
684 dst->views.compressed_colortex_mask &= dst->views.enabled_mask;
685 dst->views.dirty_buffer_constants = TRUE;
686 r600_sampler_views_dirty(rctx, &dst->views);
687
688 if (dirty_sampler_states_mask) {
689 dst->states.dirty_mask |= dirty_sampler_states_mask;
690 r600_sampler_states_dirty(rctx, &dst->states);
691 }
692 }
693
694 static void r600_set_viewport_states(struct pipe_context *ctx,
695 unsigned start_slot,
696 unsigned num_viewports,
697 const struct pipe_viewport_state *state)
698 {
699 struct r600_context *rctx = (struct r600_context *)ctx;
700 struct r600_viewport_state *rstate = &rctx->viewport;
701 int i;
702
703 for (i = start_slot; i < start_slot + num_viewports; i++)
704 rstate->state[i] = state[i - start_slot];
705 rstate->dirty_mask |= ((1 << num_viewports) - 1) << start_slot;
706 rstate->atom.num_dw = util_bitcount(rstate->dirty_mask) * 8;
707 r600_mark_atom_dirty(rctx, &rctx->viewport.atom);
708 }
709
710 void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom)
711 {
712 struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
713 struct r600_viewport_state *rstate = &rctx->viewport;
714 struct pipe_viewport_state *state;
715 uint32_t dirty_mask;
716 unsigned i, offset;
717
718 dirty_mask = rstate->dirty_mask;
719 while (dirty_mask != 0) {
720 i = u_bit_scan(&dirty_mask);
721 offset = i * 6 * 4;
722 radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE_0 + offset, 6);
723 state = &rstate->state[i];
724 radeon_emit(cs, fui(state->scale[0])); /* R_02843C_PA_CL_VPORT_XSCALE_0 */
725 radeon_emit(cs, fui(state->translate[0])); /* R_028440_PA_CL_VPORT_XOFFSET_0 */
726 radeon_emit(cs, fui(state->scale[1])); /* R_028444_PA_CL_VPORT_YSCALE_0 */
727 radeon_emit(cs, fui(state->translate[1])); /* R_028448_PA_CL_VPORT_YOFFSET_0 */
728 radeon_emit(cs, fui(state->scale[2])); /* R_02844C_PA_CL_VPORT_ZSCALE_0 */
729 radeon_emit(cs, fui(state->translate[2])); /* R_028450_PA_CL_VPORT_ZOFFSET_0 */
730 }
731 rstate->dirty_mask = 0;
732 rstate->atom.num_dw = 0;
733 }
734
735 /* Compute the key for the hw shader variant */
736 static inline union r600_shader_key r600_shader_selector_key(struct pipe_context * ctx,
737 struct r600_pipe_shader_selector * sel)
738 {
739 struct r600_context *rctx = (struct r600_context *)ctx;
740 union r600_shader_key key;
741 memset(&key, 0, sizeof(key));
742
743 switch (sel->type) {
744 case PIPE_SHADER_VERTEX: {
745 key.vs.as_es = (rctx->gs_shader != NULL);
746 if (rctx->ps_shader->current->shader.gs_prim_id_input && !rctx->gs_shader) {
747 key.vs.as_gs_a = true;
748 key.vs.prim_id_out = rctx->ps_shader->current->shader.input[rctx->ps_shader->current->shader.ps_prim_id_input].spi_sid;
749 }
750 break;
751 }
752 case PIPE_SHADER_GEOMETRY:
753 break;
754 case PIPE_SHADER_FRAGMENT: {
755 key.ps.color_two_side = rctx->rasterizer && rctx->rasterizer->two_side;
756 key.ps.alpha_to_one = rctx->alpha_to_one &&
757 rctx->rasterizer && rctx->rasterizer->multisample_enable &&
758 !rctx->framebuffer.cb0_is_integer;
759 key.ps.nr_cbufs = rctx->framebuffer.state.nr_cbufs;
760 /* Dual-source blending only makes sense with nr_cbufs == 1. */
761 if (key.ps.nr_cbufs == 1 && rctx->dual_src_blend)
762 key.ps.nr_cbufs = 2;
763 break;
764 }
765 default:
766 assert(0);
767 }
768
769 return key;
770 }
771
772 /* Select the hw shader variant depending on the current state.
773 * (*dirty) is set to 1 if current variant was changed */
774 static int r600_shader_select(struct pipe_context *ctx,
775 struct r600_pipe_shader_selector* sel,
776 bool *dirty)
777 {
778 union r600_shader_key key;
779 struct r600_pipe_shader * shader = NULL;
780 int r;
781
782 memset(&key, 0, sizeof(key));
783 key = r600_shader_selector_key(ctx, sel);
784
785 /* Check if we don't need to change anything.
786 * This path is also used for most shaders that don't need multiple
787 * variants, it will cost just a computation of the key and this
788 * test. */
789 if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) {
790 return 0;
791 }
792
793 /* lookup if we have other variants in the list */
794 if (sel->num_shaders > 1) {
795 struct r600_pipe_shader *p = sel->current, *c = p->next_variant;
796
797 while (c && memcmp(&c->key, &key, sizeof(key)) != 0) {
798 p = c;
799 c = c->next_variant;
800 }
801
802 if (c) {
803 p->next_variant = c->next_variant;
804 shader = c;
805 }
806 }
807
808 if (unlikely(!shader)) {
809 shader = CALLOC(1, sizeof(struct r600_pipe_shader));
810 shader->selector = sel;
811
812 r = r600_pipe_shader_create(ctx, shader, key);
813 if (unlikely(r)) {
814 R600_ERR("Failed to build shader variant (type=%u) %d\n",
815 sel->type, r);
816 sel->current = NULL;
817 FREE(shader);
818 return r;
819 }
820
821 /* We don't know the value of nr_ps_max_color_exports until we built
822 * at least one variant, so we may need to recompute the key after
823 * building first variant. */
824 if (sel->type == PIPE_SHADER_FRAGMENT &&
825 sel->num_shaders == 0) {
826 sel->nr_ps_max_color_exports = shader->shader.nr_ps_max_color_exports;
827 key = r600_shader_selector_key(ctx, sel);
828 }
829
830 memcpy(&shader->key, &key, sizeof(key));
831 sel->num_shaders++;
832 }
833
834 if (dirty)
835 *dirty = true;
836
837 shader->next_variant = sel->current;
838 sel->current = shader;
839
840 return 0;
841 }
842
843 static void *r600_create_shader_state(struct pipe_context *ctx,
844 const struct pipe_shader_state *state,
845 unsigned pipe_shader_type)
846 {
847 struct r600_pipe_shader_selector *sel = CALLOC_STRUCT(r600_pipe_shader_selector);
848
849 sel->type = pipe_shader_type;
850 sel->tokens = tgsi_dup_tokens(state->tokens);
851 sel->so = state->stream_output;
852 tgsi_scan_shader(state->tokens, &sel->info);
853
854 switch (pipe_shader_type) {
855 case PIPE_SHADER_GEOMETRY:
856 sel->gs_output_prim =
857 sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
858 sel->gs_max_out_vertices =
859 sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
860 sel->gs_num_invocations =
861 sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
862 break;
863 }
864
865 return sel;
866 }
867
868 static void *r600_create_ps_state(struct pipe_context *ctx,
869 const struct pipe_shader_state *state)
870 {
871 return r600_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT);
872 }
873
874 static void *r600_create_vs_state(struct pipe_context *ctx,
875 const struct pipe_shader_state *state)
876 {
877 return r600_create_shader_state(ctx, state, PIPE_SHADER_VERTEX);
878 }
879
880 static void *r600_create_gs_state(struct pipe_context *ctx,
881 const struct pipe_shader_state *state)
882 {
883 return r600_create_shader_state(ctx, state, PIPE_SHADER_GEOMETRY);
884 }
885
886 static void r600_bind_ps_state(struct pipe_context *ctx, void *state)
887 {
888 struct r600_context *rctx = (struct r600_context *)ctx;
889
890 if (!state)
891 state = rctx->dummy_pixel_shader;
892
893 rctx->ps_shader = (struct r600_pipe_shader_selector *)state;
894 }
895
896 static void r600_bind_vs_state(struct pipe_context *ctx, void *state)
897 {
898 struct r600_context *rctx = (struct r600_context *)ctx;
899
900 if (!state)
901 return;
902
903 rctx->vs_shader = (struct r600_pipe_shader_selector *)state;
904 rctx->b.streamout.stride_in_dw = rctx->vs_shader->so.stride;
905 }
906
907 static void r600_bind_gs_state(struct pipe_context *ctx, void *state)
908 {
909 struct r600_context *rctx = (struct r600_context *)ctx;
910
911 rctx->gs_shader = (struct r600_pipe_shader_selector *)state;
912
913 if (!state)
914 return;
915 rctx->b.streamout.stride_in_dw = rctx->gs_shader->so.stride;
916 }
917
918 static void r600_delete_shader_selector(struct pipe_context *ctx,
919 struct r600_pipe_shader_selector *sel)
920 {
921 struct r600_pipe_shader *p = sel->current, *c;
922 while (p) {
923 c = p->next_variant;
924 r600_pipe_shader_destroy(ctx, p);
925 free(p);
926 p = c;
927 }
928
929 free(sel->tokens);
930 free(sel);
931 }
932
933
934 static void r600_delete_ps_state(struct pipe_context *ctx, void *state)
935 {
936 struct r600_context *rctx = (struct r600_context *)ctx;
937 struct r600_pipe_shader_selector *sel = (struct r600_pipe_shader_selector *)state;
938
939 if (rctx->ps_shader == sel) {
940 rctx->ps_shader = NULL;
941 }
942
943 r600_delete_shader_selector(ctx, sel);
944 }
945
946 static void r600_delete_vs_state(struct pipe_context *ctx, void *state)
947 {
948 struct r600_context *rctx = (struct r600_context *)ctx;
949 struct r600_pipe_shader_selector *sel = (struct r600_pipe_shader_selector *)state;
950
951 if (rctx->vs_shader == sel) {
952 rctx->vs_shader = NULL;
953 }
954
955 r600_delete_shader_selector(ctx, sel);
956 }
957
958
959 static void r600_delete_gs_state(struct pipe_context *ctx, void *state)
960 {
961 struct r600_context *rctx = (struct r600_context *)ctx;
962 struct r600_pipe_shader_selector *sel = (struct r600_pipe_shader_selector *)state;
963
964 if (rctx->gs_shader == sel) {
965 rctx->gs_shader = NULL;
966 }
967
968 r600_delete_shader_selector(ctx, sel);
969 }
970
971
972 void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state)
973 {
974 if (state->dirty_mask) {
975 rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE;
976 state->atom.num_dw = rctx->b.chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20
977 : util_bitcount(state->dirty_mask)*19;
978 r600_mark_atom_dirty(rctx, &state->atom);
979 }
980 }
981
982 static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
983 struct pipe_constant_buffer *input)
984 {
985 struct r600_context *rctx = (struct r600_context *)ctx;
986 struct r600_constbuf_state *state = &rctx->constbuf_state[shader];
987 struct pipe_constant_buffer *cb;
988 const uint8_t *ptr;
989
990 /* Note that the state tracker can unbind constant buffers by
991 * passing NULL here.
992 */
993 if (unlikely(!input || (!input->buffer && !input->user_buffer))) {
994 state->enabled_mask &= ~(1 << index);
995 state->dirty_mask &= ~(1 << index);
996 pipe_resource_reference(&state->cb[index].buffer, NULL);
997 return;
998 }
999
1000 cb = &state->cb[index];
1001 cb->buffer_size = input->buffer_size;
1002
1003 ptr = input->user_buffer;
1004
1005 if (ptr) {
1006 /* Upload the user buffer. */
1007 if (R600_BIG_ENDIAN) {
1008 uint32_t *tmpPtr;
1009 unsigned i, size = input->buffer_size;
1010
1011 if (!(tmpPtr = malloc(size))) {
1012 R600_ERR("Failed to allocate BE swap buffer.\n");
1013 return;
1014 }
1015
1016 for (i = 0; i < size / 4; ++i) {
1017 tmpPtr[i] = util_cpu_to_le32(((uint32_t *)ptr)[i]);
1018 }
1019
1020 u_upload_data(rctx->b.uploader, 0, size, tmpPtr, &cb->buffer_offset, &cb->buffer);
1021 free(tmpPtr);
1022 } else {
1023 u_upload_data(rctx->b.uploader, 0, input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer);
1024 }
1025 /* account it in gtt */
1026 rctx->b.gtt += input->buffer_size;
1027 } else {
1028 /* Setup the hw buffer. */
1029 cb->buffer_offset = input->buffer_offset;
1030 pipe_resource_reference(&cb->buffer, input->buffer);
1031 r600_context_add_resource_size(ctx, input->buffer);
1032 }
1033
1034 state->enabled_mask |= 1 << index;
1035 state->dirty_mask |= 1 << index;
1036 r600_constant_buffers_dirty(rctx, state);
1037 }
1038
1039 static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
1040 {
1041 struct r600_context *rctx = (struct r600_context*)pipe;
1042
1043 if (rctx->sample_mask.sample_mask == (uint16_t)sample_mask)
1044 return;
1045
1046 rctx->sample_mask.sample_mask = sample_mask;
1047 r600_mark_atom_dirty(rctx, &rctx->sample_mask.atom);
1048 }
1049
1050 static void r600_update_driver_const_buffers(struct r600_context *rctx)
1051 {
1052 int sh, size;;
1053 void *ptr;
1054 struct pipe_constant_buffer cb;
1055 for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
1056 struct r600_shader_driver_constants_info *info = &rctx->driver_consts[sh];
1057 if (!info->vs_ucp_dirty &&
1058 !info->texture_const_dirty &&
1059 !info->ps_sample_pos_dirty)
1060 continue;
1061
1062 ptr = info->constants;
1063 size = info->alloc_size;
1064 if (info->vs_ucp_dirty) {
1065 assert(sh == PIPE_SHADER_VERTEX);
1066 if (!size) {
1067 ptr = rctx->clip_state.state.ucp;
1068 size = R600_UCP_SIZE;
1069 } else {
1070 memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE);
1071 }
1072 info->vs_ucp_dirty = false;
1073 }
1074
1075 if (info->ps_sample_pos_dirty) {
1076 assert(sh == PIPE_SHADER_FRAGMENT);
1077 if (!size) {
1078 ptr = rctx->sample_positions;
1079 size = R600_UCP_SIZE;
1080 } else {
1081 memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE);
1082 }
1083 info->ps_sample_pos_dirty = false;
1084 }
1085
1086 if (info->texture_const_dirty) {
1087 assert (ptr);
1088 assert (size);
1089 if (sh == PIPE_SHADER_VERTEX)
1090 memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE);
1091 if (sh == PIPE_SHADER_FRAGMENT)
1092 memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE);
1093 }
1094 info->texture_const_dirty = false;
1095
1096 cb.buffer = NULL;
1097 cb.user_buffer = ptr;
1098 cb.buffer_offset = 0;
1099 cb.buffer_size = size;
1100 rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, &cb);
1101 pipe_resource_reference(&cb.buffer, NULL);
1102 }
1103 }
1104
1105 static void *r600_alloc_buf_consts(struct r600_context *rctx, int shader_type,
1106 int array_size, uint32_t *base_offset)
1107 {
1108 struct r600_shader_driver_constants_info *info = &rctx->driver_consts[shader_type];
1109 if (array_size + R600_UCP_SIZE > info->alloc_size) {
1110 info->constants = realloc(info->constants, array_size + R600_UCP_SIZE);
1111 info->alloc_size = array_size + R600_UCP_SIZE;
1112 }
1113 memset(info->constants + (R600_UCP_SIZE / 4), 0, array_size);
1114 info->texture_const_dirty = true;
1115 *base_offset = R600_UCP_SIZE;
1116 return info->constants;
1117 }
1118 /*
1119 * On r600/700 hw we don't have vertex fetch swizzle, though TBO
1120 * doesn't require full swizzles it does need masking and setting alpha
1121 * to one, so we setup a set of 5 constants with the masks + alpha value
1122 * then in the shader, we AND the 4 components with 0xffffffff or 0,
1123 * then OR the alpha with the value given here.
1124 * We use a 6th constant to store the txq buffer size in
1125 * we use 7th slot for number of cube layers in a cube map array.
1126 */
1127 static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_type)
1128 {
1129 struct r600_textures_info *samplers = &rctx->samplers[shader_type];
1130 int bits;
1131 uint32_t array_size;
1132 int i, j;
1133 uint32_t *constants;
1134 uint32_t base_offset;
1135 if (!samplers->views.dirty_buffer_constants)
1136 return;
1137
1138 samplers->views.dirty_buffer_constants = FALSE;
1139
1140 bits = util_last_bit(samplers->views.enabled_mask);
1141 array_size = bits * 8 * sizeof(uint32_t) * 4;
1142
1143 constants = r600_alloc_buf_consts(rctx, shader_type, array_size, &base_offset);
1144
1145 for (i = 0; i < bits; i++) {
1146 if (samplers->views.enabled_mask & (1 << i)) {
1147 int offset = (base_offset / 4) + i * 8;
1148 const struct util_format_description *desc;
1149 desc = util_format_description(samplers->views.views[i]->base.format);
1150
1151 for (j = 0; j < 4; j++)
1152 if (j < desc->nr_channels)
1153 constants[offset+j] = 0xffffffff;
1154 else
1155 constants[offset+j] = 0x0;
1156 if (desc->nr_channels < 4) {
1157 if (desc->channel[0].pure_integer)
1158 constants[offset+4] = 1;
1159 else
1160 constants[offset+4] = fui(1.0);
1161 } else
1162 constants[offset + 4] = 0;
1163
1164 constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
1165 constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6;
1166 }
1167 }
1168
1169 }
1170
1171 /* On evergreen we store two values
1172 * 1. buffer size for TXQ
1173 * 2. number of cube layers in a cube map array.
1174 */
1175 static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type)
1176 {
1177 struct r600_textures_info *samplers = &rctx->samplers[shader_type];
1178 int bits;
1179 uint32_t array_size;
1180 int i;
1181 uint32_t *constants;
1182 uint32_t base_offset;
1183 if (!samplers->views.dirty_buffer_constants)
1184 return;
1185
1186 samplers->views.dirty_buffer_constants = FALSE;
1187
1188 bits = util_last_bit(samplers->views.enabled_mask);
1189 array_size = bits * 2 * sizeof(uint32_t) * 4;
1190
1191 constants = r600_alloc_buf_consts(rctx, shader_type, array_size,
1192 &base_offset);
1193
1194 for (i = 0; i < bits; i++) {
1195 if (samplers->views.enabled_mask & (1 << i)) {
1196 uint32_t offset = (base_offset / 4) + i * 2;
1197 constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
1198 constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6;
1199 }
1200 }
1201 }
1202
1203 /* set sample xy locations as array of fragment shader constants */
1204 void r600_set_sample_locations_constant_buffer(struct r600_context *rctx)
1205 {
1206 int i;
1207 struct pipe_context *ctx = &rctx->b.b;
1208
1209 assert(rctx->framebuffer.nr_samples < R600_UCP_SIZE);
1210 assert(rctx->framebuffer.nr_samples <= Elements(rctx->sample_positions)/4);
1211
1212 memset(rctx->sample_positions, 0, 4 * 4 * 16);
1213 for (i = 0; i < rctx->framebuffer.nr_samples; i++) {
1214 ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &rctx->sample_positions[4*i]);
1215 /* Also fill in center-zeroed positions used for interpolateAtSample */
1216 rctx->sample_positions[4*i + 2] = rctx->sample_positions[4*i + 0] - 0.5f;
1217 rctx->sample_positions[4*i + 3] = rctx->sample_positions[4*i + 1] - 0.5f;
1218 }
1219
1220 rctx->driver_consts[PIPE_SHADER_FRAGMENT].ps_sample_pos_dirty = true;
1221 }
1222
1223 static void update_shader_atom(struct pipe_context *ctx,
1224 struct r600_shader_state *state,
1225 struct r600_pipe_shader *shader)
1226 {
1227 struct r600_context *rctx = (struct r600_context *)ctx;
1228
1229 state->shader = shader;
1230 if (shader) {
1231 state->atom.num_dw = shader->command_buffer.num_dw;
1232 r600_context_add_resource_size(ctx, (struct pipe_resource *)shader->bo);
1233 } else {
1234 state->atom.num_dw = 0;
1235 }
1236 r600_mark_atom_dirty(rctx, &state->atom);
1237 }
1238
1239 static void update_gs_block_state(struct r600_context *rctx, unsigned enable)
1240 {
1241 if (rctx->shader_stages.geom_enable != enable) {
1242 rctx->shader_stages.geom_enable = enable;
1243 r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
1244 }
1245
1246 if (rctx->gs_rings.enable != enable) {
1247 rctx->gs_rings.enable = enable;
1248 r600_mark_atom_dirty(rctx, &rctx->gs_rings.atom);
1249
1250 if (enable && !rctx->gs_rings.esgs_ring.buffer) {
1251 unsigned size = 0x1C000;
1252 rctx->gs_rings.esgs_ring.buffer =
1253 pipe_buffer_create(rctx->b.b.screen, PIPE_BIND_CUSTOM,
1254 PIPE_USAGE_DEFAULT, size);
1255 rctx->gs_rings.esgs_ring.buffer_size = size;
1256
1257 size = 0x4000000;
1258
1259 rctx->gs_rings.gsvs_ring.buffer =
1260 pipe_buffer_create(rctx->b.b.screen, PIPE_BIND_CUSTOM,
1261 PIPE_USAGE_DEFAULT, size);
1262 rctx->gs_rings.gsvs_ring.buffer_size = size;
1263 }
1264
1265 if (enable) {
1266 r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_GEOMETRY,
1267 R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.esgs_ring);
1268 r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
1269 R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.gsvs_ring);
1270 } else {
1271 r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_GEOMETRY,
1272 R600_GS_RING_CONST_BUFFER, NULL);
1273 r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
1274 R600_GS_RING_CONST_BUFFER, NULL);
1275 }
1276 }
1277 }
1278
1279 static bool r600_update_derived_state(struct r600_context *rctx)
1280 {
1281 struct pipe_context * ctx = (struct pipe_context*)rctx;
1282 bool ps_dirty = false, vs_dirty = false, gs_dirty = false;
1283 bool blend_disable;
1284 bool need_buf_const;
1285 if (!rctx->blitter->running) {
1286 unsigned i;
1287
1288 /* Decompress textures if needed. */
1289 for (i = 0; i < PIPE_SHADER_TYPES; i++) {
1290 struct r600_samplerview_state *views = &rctx->samplers[i].views;
1291 if (views->compressed_depthtex_mask) {
1292 r600_decompress_depth_textures(rctx, views);
1293 }
1294 if (views->compressed_colortex_mask) {
1295 r600_decompress_color_textures(rctx, views);
1296 }
1297 }
1298 }
1299
1300 r600_shader_select(ctx, rctx->ps_shader, &ps_dirty);
1301 if (unlikely(!rctx->ps_shader->current))
1302 return false;
1303
1304 update_gs_block_state(rctx, rctx->gs_shader != NULL);
1305
1306 if (rctx->gs_shader) {
1307 r600_shader_select(ctx, rctx->gs_shader, &gs_dirty);
1308 if (unlikely(!rctx->gs_shader->current))
1309 return false;
1310
1311 if (!rctx->shader_stages.geom_enable) {
1312 rctx->shader_stages.geom_enable = true;
1313 r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
1314 }
1315
1316 /* gs_shader provides GS and VS (copy shader) */
1317 if (unlikely(rctx->geometry_shader.shader != rctx->gs_shader->current)) {
1318 update_shader_atom(ctx, &rctx->geometry_shader, rctx->gs_shader->current);
1319 update_shader_atom(ctx, &rctx->vertex_shader, rctx->gs_shader->current->gs_copy_shader);
1320 /* Update clip misc state. */
1321 if (rctx->gs_shader->current->gs_copy_shader->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl ||
1322 rctx->gs_shader->current->gs_copy_shader->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write ||
1323 rctx->clip_misc_state.clip_disable != rctx->gs_shader->current->shader.vs_position_window_space) {
1324 rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->gs_shader->current->gs_copy_shader->pa_cl_vs_out_cntl;
1325 rctx->clip_misc_state.clip_dist_write = rctx->gs_shader->current->gs_copy_shader->shader.clip_dist_write;
1326 rctx->clip_misc_state.clip_disable = rctx->gs_shader->current->shader.vs_position_window_space;
1327 r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
1328 }
1329 rctx->b.streamout.enabled_stream_buffers_mask = rctx->gs_shader->current->gs_copy_shader->enabled_stream_buffers_mask;
1330 }
1331
1332 r600_shader_select(ctx, rctx->vs_shader, &vs_dirty);
1333 if (unlikely(!rctx->vs_shader->current))
1334 return false;
1335
1336 /* vs_shader is used as ES */
1337 if (unlikely(vs_dirty || rctx->export_shader.shader != rctx->vs_shader->current)) {
1338 update_shader_atom(ctx, &rctx->export_shader, rctx->vs_shader->current);
1339 }
1340 } else {
1341 if (unlikely(rctx->geometry_shader.shader)) {
1342 update_shader_atom(ctx, &rctx->geometry_shader, NULL);
1343 update_shader_atom(ctx, &rctx->export_shader, NULL);
1344 rctx->shader_stages.geom_enable = false;
1345 r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
1346 }
1347
1348 r600_shader_select(ctx, rctx->vs_shader, &vs_dirty);
1349 if (unlikely(!rctx->vs_shader->current))
1350 return false;
1351
1352 if (unlikely(vs_dirty || rctx->vertex_shader.shader != rctx->vs_shader->current)) {
1353 update_shader_atom(ctx, &rctx->vertex_shader, rctx->vs_shader->current);
1354
1355 /* Update clip misc state. */
1356 if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl ||
1357 rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write ||
1358 rctx->clip_misc_state.clip_disable != rctx->vs_shader->current->shader.vs_position_window_space) {
1359 rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->vs_shader->current->pa_cl_vs_out_cntl;
1360 rctx->clip_misc_state.clip_dist_write = rctx->vs_shader->current->shader.clip_dist_write;
1361 rctx->clip_misc_state.clip_disable = rctx->vs_shader->current->shader.vs_position_window_space;
1362 r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
1363 }
1364 rctx->b.streamout.enabled_stream_buffers_mask = rctx->vs_shader->current->enabled_stream_buffers_mask;
1365 }
1366 }
1367
1368
1369 if (unlikely(ps_dirty || rctx->pixel_shader.shader != rctx->ps_shader->current ||
1370 rctx->rasterizer->sprite_coord_enable != rctx->ps_shader->current->sprite_coord_enable ||
1371 rctx->rasterizer->flatshade != rctx->ps_shader->current->flatshade)) {
1372
1373 if (rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs) {
1374 rctx->cb_misc_state.nr_ps_color_outputs = rctx->ps_shader->current->nr_ps_color_outputs;
1375 r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
1376 }
1377
1378 if (rctx->b.chip_class <= R700) {
1379 bool multiwrite = rctx->ps_shader->current->shader.fs_write_all;
1380
1381 if (rctx->cb_misc_state.multiwrite != multiwrite) {
1382 rctx->cb_misc_state.multiwrite = multiwrite;
1383 r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
1384 }
1385 }
1386
1387 if (unlikely(!ps_dirty && rctx->ps_shader && rctx->rasterizer &&
1388 ((rctx->rasterizer->sprite_coord_enable != rctx->ps_shader->current->sprite_coord_enable) ||
1389 (rctx->rasterizer->flatshade != rctx->ps_shader->current->flatshade)))) {
1390
1391 if (rctx->b.chip_class >= EVERGREEN)
1392 evergreen_update_ps_state(ctx, rctx->ps_shader->current);
1393 else
1394 r600_update_ps_state(ctx, rctx->ps_shader->current);
1395 }
1396
1397 r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
1398 update_shader_atom(ctx, &rctx->pixel_shader, rctx->ps_shader->current);
1399 }
1400
1401 if (rctx->b.chip_class >= EVERGREEN) {
1402 evergreen_update_db_shader_control(rctx);
1403 } else {
1404 r600_update_db_shader_control(rctx);
1405 }
1406
1407 /* on R600 we stuff masks + txq info into one constant buffer */
1408 /* on evergreen we only need a txq info one */
1409 if (rctx->ps_shader) {
1410 need_buf_const = rctx->ps_shader->current->shader.uses_tex_buffers || rctx->ps_shader->current->shader.has_txq_cube_array_z_comp;
1411 if (need_buf_const) {
1412 if (rctx->b.chip_class < EVERGREEN)
1413 r600_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
1414 else
1415 eg_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
1416 }
1417 }
1418
1419 if (rctx->vs_shader) {
1420 need_buf_const = rctx->vs_shader->current->shader.uses_tex_buffers || rctx->vs_shader->current->shader.has_txq_cube_array_z_comp;
1421 if (need_buf_const) {
1422 if (rctx->b.chip_class < EVERGREEN)
1423 r600_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX);
1424 else
1425 eg_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX);
1426 }
1427 }
1428
1429 if (rctx->gs_shader) {
1430 need_buf_const = rctx->gs_shader->current->shader.uses_tex_buffers || rctx->gs_shader->current->shader.has_txq_cube_array_z_comp;
1431 if (need_buf_const) {
1432 if (rctx->b.chip_class < EVERGREEN)
1433 r600_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY);
1434 else
1435 eg_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY);
1436 }
1437 }
1438
1439 r600_update_driver_const_buffers(rctx);
1440
1441 if (rctx->b.chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) {
1442 if (!r600_adjust_gprs(rctx)) {
1443 /* discard rendering */
1444 return false;
1445 }
1446 }
1447
1448 blend_disable = (rctx->dual_src_blend &&
1449 rctx->ps_shader->current->nr_ps_color_outputs < 2);
1450
1451 if (blend_disable != rctx->force_blend_disable) {
1452 rctx->force_blend_disable = blend_disable;
1453 r600_bind_blend_state_internal(rctx,
1454 rctx->blend_state.cso,
1455 blend_disable);
1456 }
1457
1458 return true;
1459 }
1460
1461 void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom)
1462 {
1463 struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
1464 struct r600_clip_misc_state *state = &rctx->clip_misc_state;
1465
1466 radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
1467 state->pa_cl_clip_cntl |
1468 (state->clip_dist_write ? 0 : state->clip_plane_enable & 0x3F) |
1469 S_028810_CLIP_DISABLE(state->clip_disable));
1470 radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
1471 state->pa_cl_vs_out_cntl |
1472 (state->clip_plane_enable & state->clip_dist_write));
1473 }
1474
1475 static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
1476 {
1477 struct r600_context *rctx = (struct r600_context *)ctx;
1478 struct pipe_draw_info info = *dinfo;
1479 struct pipe_index_buffer ib = {};
1480 struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
1481 bool render_cond_bit = rctx->b.current_render_cond && !rctx->b.render_cond_force_off;
1482 uint64_t mask;
1483
1484 if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) {
1485 return;
1486 }
1487
1488 if (!rctx->vs_shader || !rctx->ps_shader) {
1489 assert(0);
1490 return;
1491 }
1492
1493 /* make sure that the gfx ring is only one active */
1494 if (rctx->b.dma.cs && rctx->b.dma.cs->cdw) {
1495 rctx->b.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
1496 }
1497
1498 if (!r600_update_derived_state(rctx)) {
1499 /* useless to render because current rendering command
1500 * can't be achieved
1501 */
1502 return;
1503 }
1504
1505 if (info.indexed) {
1506 /* Initialize the index buffer struct. */
1507 pipe_resource_reference(&ib.buffer, rctx->index_buffer.buffer);
1508 ib.user_buffer = rctx->index_buffer.user_buffer;
1509 ib.index_size = rctx->index_buffer.index_size;
1510 ib.offset = rctx->index_buffer.offset;
1511 if (!info.indirect) {
1512 ib.offset += info.start * ib.index_size;
1513 }
1514
1515 /* Translate 8-bit indices to 16-bit. */
1516 if (unlikely(ib.index_size == 1)) {
1517 struct pipe_resource *out_buffer = NULL;
1518 unsigned out_offset;
1519 void *ptr;
1520 unsigned start, count;
1521
1522 if (likely(!info.indirect)) {
1523 start = 0;
1524 count = info.count;
1525 }
1526 else {
1527 /* Have to get start/count from indirect buffer, slow path ahead... */
1528 struct r600_resource *indirect_resource = (struct r600_resource *)info.indirect;
1529 unsigned *data = r600_buffer_map_sync_with_rings(&rctx->b, indirect_resource,
1530 PIPE_TRANSFER_READ);
1531 if (data) {
1532 data += info.indirect_offset / sizeof(unsigned);
1533 start = data[2] * ib.index_size;
1534 count = data[0];
1535 }
1536 else {
1537 start = 0;
1538 count = 0;
1539 }
1540 }
1541
1542 u_upload_alloc(rctx->b.uploader, start, count * 2,
1543 &out_offset, &out_buffer, &ptr);
1544
1545 util_shorten_ubyte_elts_to_userptr(
1546 &rctx->b.b, &ib, 0, ib.offset + start, count, ptr);
1547
1548 pipe_resource_reference(&ib.buffer, NULL);
1549 ib.user_buffer = NULL;
1550 ib.buffer = out_buffer;
1551 ib.offset = out_offset;
1552 ib.index_size = 2;
1553 }
1554
1555 /* Upload the index buffer.
1556 * The upload is skipped for small index counts on little-endian machines
1557 * and the indices are emitted via PKT3_DRAW_INDEX_IMMD.
1558 * Indirect draws never use immediate indices.
1559 * Note: Instanced rendering in combination with immediate indices hangs. */
1560 if (ib.user_buffer && (R600_BIG_ENDIAN || info.indirect ||
1561 info.instance_count > 1 ||
1562 info.count*ib.index_size > 20)) {
1563 u_upload_data(rctx->b.uploader, 0, info.count * ib.index_size,
1564 ib.user_buffer, &ib.offset, &ib.buffer);
1565 ib.user_buffer = NULL;
1566 }
1567 } else {
1568 info.index_bias = info.start;
1569 }
1570
1571 /* Set the index offset and primitive restart. */
1572 if (rctx->vgt_state.vgt_multi_prim_ib_reset_en != info.primitive_restart ||
1573 rctx->vgt_state.vgt_multi_prim_ib_reset_indx != info.restart_index ||
1574 rctx->vgt_state.vgt_indx_offset != info.index_bias ||
1575 (rctx->vgt_state.last_draw_was_indirect && !info.indirect)) {
1576 rctx->vgt_state.vgt_multi_prim_ib_reset_en = info.primitive_restart;
1577 rctx->vgt_state.vgt_multi_prim_ib_reset_indx = info.restart_index;
1578 rctx->vgt_state.vgt_indx_offset = info.index_bias;
1579 r600_mark_atom_dirty(rctx, &rctx->vgt_state.atom);
1580 }
1581
1582 /* Workaround for hardware deadlock on certain R600 ASICs: write into a CB register. */
1583 if (rctx->b.chip_class == R600) {
1584 rctx->b.flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
1585 r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
1586 }
1587
1588 /* Emit states. */
1589 r600_need_cs_space(rctx, ib.user_buffer ? 5 : 0, TRUE);
1590 r600_flush_emit(rctx);
1591
1592 mask = rctx->dirty_atoms;
1593 while (mask != 0) {
1594 r600_emit_atom(rctx, rctx->atoms[u_bit_scan64(&mask)]);
1595 }
1596
1597 if (rctx->b.chip_class == CAYMAN) {
1598 /* Copied from radeonsi. */
1599 unsigned primgroup_size = 128; /* recommended without a GS */
1600 bool ia_switch_on_eop = false;
1601 bool partial_vs_wave = false;
1602
1603 if (rctx->gs_shader)
1604 primgroup_size = 64; /* recommended with a GS */
1605
1606 if ((rctx->rasterizer && rctx->rasterizer->pa_sc_line_stipple) ||
1607 (rctx->b.screen->debug_flags & DBG_SWITCH_ON_EOP)) {
1608 ia_switch_on_eop = true;
1609 }
1610
1611 if (rctx->b.streamout.streamout_enabled ||
1612 rctx->b.streamout.prims_gen_query_enabled)
1613 partial_vs_wave = true;
1614
1615 radeon_set_context_reg(cs, CM_R_028AA8_IA_MULTI_VGT_PARAM,
1616 S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
1617 S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
1618 S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1));
1619 }
1620
1621 /* On R6xx, CULL_FRONT=1 culls all points, lines, and rectangles,
1622 * even though it should have no effect on those. */
1623 if (rctx->b.chip_class == R600 && rctx->rasterizer) {
1624 unsigned su_sc_mode_cntl = rctx->rasterizer->pa_su_sc_mode_cntl;
1625 unsigned prim = info.mode;
1626
1627 if (rctx->gs_shader) {
1628 prim = rctx->gs_shader->gs_output_prim;
1629 }
1630 prim = r600_conv_prim_to_gs_out(prim); /* decrease the number of types to 3 */
1631
1632 if (prim == V_028A6C_OUTPRIM_TYPE_POINTLIST ||
1633 prim == V_028A6C_OUTPRIM_TYPE_LINESTRIP ||
1634 info.mode == R600_PRIM_RECTANGLE_LIST) {
1635 su_sc_mode_cntl &= C_028814_CULL_FRONT;
1636 }
1637 radeon_set_context_reg(cs, R_028814_PA_SU_SC_MODE_CNTL, su_sc_mode_cntl);
1638 }
1639
1640 /* Update start instance. */
1641 if (!info.indirect && rctx->last_start_instance != info.start_instance) {
1642 radeon_set_ctl_const(cs, R_03CFF4_SQ_VTX_START_INST_LOC, info.start_instance);
1643 rctx->last_start_instance = info.start_instance;
1644 }
1645
1646 /* Update the primitive type. */
1647 if (rctx->last_primitive_type != info.mode) {
1648 unsigned ls_mask = 0;
1649
1650 if (info.mode == PIPE_PRIM_LINES)
1651 ls_mask = 1;
1652 else if (info.mode == PIPE_PRIM_LINE_STRIP ||
1653 info.mode == PIPE_PRIM_LINE_LOOP)
1654 ls_mask = 2;
1655
1656 radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
1657 S_028A0C_AUTO_RESET_CNTL(ls_mask) |
1658 (rctx->rasterizer ? rctx->rasterizer->pa_sc_line_stipple : 0));
1659 radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE,
1660 r600_conv_pipe_prim(info.mode));
1661
1662 rctx->last_primitive_type = info.mode;
1663 }
1664
1665 /* Draw packets. */
1666 if (!info.indirect) {
1667 cs->buf[cs->cdw++] = PKT3(PKT3_NUM_INSTANCES, 0, 0);
1668 cs->buf[cs->cdw++] = info.instance_count;
1669 }
1670
1671 if (unlikely(info.indirect)) {
1672 uint64_t va = r600_resource(info.indirect)->gpu_address;
1673 assert(rctx->b.chip_class >= EVERGREEN);
1674
1675 // Invalidate so non-indirect draw calls reset this state
1676 rctx->vgt_state.last_draw_was_indirect = true;
1677 rctx->last_start_instance = -1;
1678
1679 cs->buf[cs->cdw++] = PKT3(EG_PKT3_SET_BASE, 2, 0);
1680 cs->buf[cs->cdw++] = EG_DRAW_INDEX_INDIRECT_PATCH_TABLE_BASE;
1681 cs->buf[cs->cdw++] = va;
1682 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
1683
1684 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
1685 cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
1686 (struct r600_resource*)info.indirect,
1687 RADEON_USAGE_READ,
1688 RADEON_PRIO_DRAW_INDIRECT);
1689 }
1690
1691 if (info.indexed) {
1692 cs->buf[cs->cdw++] = PKT3(PKT3_INDEX_TYPE, 0, 0);
1693 cs->buf[cs->cdw++] = ib.index_size == 4 ?
1694 (VGT_INDEX_32 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_32_BIT : 0)) :
1695 (VGT_INDEX_16 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_16_BIT : 0));
1696
1697 if (ib.user_buffer) {
1698 unsigned size_bytes = info.count*ib.index_size;
1699 unsigned size_dw = align(size_bytes, 4) / 4;
1700 cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_IMMD, 1 + size_dw, render_cond_bit);
1701 cs->buf[cs->cdw++] = info.count;
1702 cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_IMMEDIATE;
1703 memcpy(cs->buf+cs->cdw, ib.user_buffer, size_bytes);
1704 cs->cdw += size_dw;
1705 } else {
1706 uint64_t va = r600_resource(ib.buffer)->gpu_address + ib.offset;
1707
1708 if (likely(!info.indirect)) {
1709 cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, render_cond_bit);
1710 cs->buf[cs->cdw++] = va;
1711 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
1712 cs->buf[cs->cdw++] = info.count;
1713 cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
1714 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
1715 cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
1716 (struct r600_resource*)ib.buffer,
1717 RADEON_USAGE_READ,
1718 RADEON_PRIO_INDEX_BUFFER);
1719 }
1720 else {
1721 uint32_t max_size = (ib.buffer->width0 - ib.offset) / ib.index_size;
1722
1723 cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BASE, 1, 0);
1724 cs->buf[cs->cdw++] = va;
1725 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
1726
1727 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
1728 cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
1729 (struct r600_resource*)ib.buffer,
1730 RADEON_USAGE_READ,
1731 RADEON_PRIO_INDEX_BUFFER);
1732
1733 cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, 0);
1734 cs->buf[cs->cdw++] = max_size;
1735
1736 cs->buf[cs->cdw++] = PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, render_cond_bit);
1737 cs->buf[cs->cdw++] = info.indirect_offset;
1738 cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
1739 }
1740 }
1741 } else {
1742 if (unlikely(info.count_from_stream_output)) {
1743 struct r600_so_target *t = (struct r600_so_target*)info.count_from_stream_output;
1744 uint64_t va = t->buf_filled_size->gpu_address + t->buf_filled_size_offset;
1745
1746 radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
1747
1748 cs->buf[cs->cdw++] = PKT3(PKT3_COPY_DW, 4, 0);
1749 cs->buf[cs->cdw++] = COPY_DW_SRC_IS_MEM | COPY_DW_DST_IS_REG;
1750 cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; /* src address lo */
1751 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFUL; /* src address hi */
1752 cs->buf[cs->cdw++] = R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2; /* dst register */
1753 cs->buf[cs->cdw++] = 0; /* unused */
1754
1755 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
1756 cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
1757 t->buf_filled_size, RADEON_USAGE_READ,
1758 RADEON_PRIO_SO_FILLED_SIZE);
1759 }
1760
1761 if (likely(!info.indirect)) {
1762 cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit);
1763 cs->buf[cs->cdw++] = info.count;
1764 }
1765 else {
1766 cs->buf[cs->cdw++] = PKT3(EG_PKT3_DRAW_INDIRECT, 1, render_cond_bit);
1767 cs->buf[cs->cdw++] = info.indirect_offset;
1768 }
1769 cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_AUTO_INDEX |
1770 (info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);
1771 }
1772
1773 if (rctx->screen->b.trace_bo) {
1774 r600_trace_emit(rctx);
1775 }
1776
1777 /* Set the depth buffer as dirty. */
1778 if (rctx->framebuffer.state.zsbuf) {
1779 struct pipe_surface *surf = rctx->framebuffer.state.zsbuf;
1780 struct r600_texture *rtex = (struct r600_texture *)surf->texture;
1781
1782 rtex->dirty_level_mask |= 1 << surf->u.tex.level;
1783
1784 if (rtex->surface.flags & RADEON_SURF_SBUFFER)
1785 rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
1786 }
1787 if (rctx->framebuffer.compressed_cb_mask) {
1788 struct pipe_surface *surf;
1789 struct r600_texture *rtex;
1790 unsigned mask = rctx->framebuffer.compressed_cb_mask;
1791
1792 do {
1793 unsigned i = u_bit_scan(&mask);
1794 surf = rctx->framebuffer.state.cbufs[i];
1795 rtex = (struct r600_texture*)surf->texture;
1796
1797 rtex->dirty_level_mask |= 1 << surf->u.tex.level;
1798
1799 } while (mask);
1800 }
1801
1802 pipe_resource_reference(&ib.buffer, NULL);
1803 rctx->b.num_draw_calls++;
1804 }
1805
1806 uint32_t r600_translate_stencil_op(int s_op)
1807 {
1808 switch (s_op) {
1809 case PIPE_STENCIL_OP_KEEP:
1810 return V_028800_STENCIL_KEEP;
1811 case PIPE_STENCIL_OP_ZERO:
1812 return V_028800_STENCIL_ZERO;
1813 case PIPE_STENCIL_OP_REPLACE:
1814 return V_028800_STENCIL_REPLACE;
1815 case PIPE_STENCIL_OP_INCR:
1816 return V_028800_STENCIL_INCR;
1817 case PIPE_STENCIL_OP_DECR:
1818 return V_028800_STENCIL_DECR;
1819 case PIPE_STENCIL_OP_INCR_WRAP:
1820 return V_028800_STENCIL_INCR_WRAP;
1821 case PIPE_STENCIL_OP_DECR_WRAP:
1822 return V_028800_STENCIL_DECR_WRAP;
1823 case PIPE_STENCIL_OP_INVERT:
1824 return V_028800_STENCIL_INVERT;
1825 default:
1826 R600_ERR("Unknown stencil op %d", s_op);
1827 assert(0);
1828 break;
1829 }
1830 return 0;
1831 }
1832
1833 uint32_t r600_translate_fill(uint32_t func)
1834 {
1835 switch(func) {
1836 case PIPE_POLYGON_MODE_FILL:
1837 return 2;
1838 case PIPE_POLYGON_MODE_LINE:
1839 return 1;
1840 case PIPE_POLYGON_MODE_POINT:
1841 return 0;
1842 default:
1843 assert(0);
1844 return 0;
1845 }
1846 }
1847
1848 unsigned r600_tex_wrap(unsigned wrap)
1849 {
1850 switch (wrap) {
1851 default:
1852 case PIPE_TEX_WRAP_REPEAT:
1853 return V_03C000_SQ_TEX_WRAP;
1854 case PIPE_TEX_WRAP_CLAMP:
1855 return V_03C000_SQ_TEX_CLAMP_HALF_BORDER;
1856 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1857 return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL;
1858 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1859 return V_03C000_SQ_TEX_CLAMP_BORDER;
1860 case PIPE_TEX_WRAP_MIRROR_REPEAT:
1861 return V_03C000_SQ_TEX_MIRROR;
1862 case PIPE_TEX_WRAP_MIRROR_CLAMP:
1863 return V_03C000_SQ_TEX_MIRROR_ONCE_HALF_BORDER;
1864 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1865 return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
1866 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1867 return V_03C000_SQ_TEX_MIRROR_ONCE_BORDER;
1868 }
1869 }
1870
1871 unsigned r600_tex_filter(unsigned filter)
1872 {
1873 switch (filter) {
1874 default:
1875 case PIPE_TEX_FILTER_NEAREST:
1876 return V_03C000_SQ_TEX_XY_FILTER_POINT;
1877 case PIPE_TEX_FILTER_LINEAR:
1878 return V_03C000_SQ_TEX_XY_FILTER_BILINEAR;
1879 }
1880 }
1881
1882 unsigned r600_tex_mipfilter(unsigned filter)
1883 {
1884 switch (filter) {
1885 case PIPE_TEX_MIPFILTER_NEAREST:
1886 return V_03C000_SQ_TEX_Z_FILTER_POINT;
1887 case PIPE_TEX_MIPFILTER_LINEAR:
1888 return V_03C000_SQ_TEX_Z_FILTER_LINEAR;
1889 default:
1890 case PIPE_TEX_MIPFILTER_NONE:
1891 return V_03C000_SQ_TEX_Z_FILTER_NONE;
1892 }
1893 }
1894
1895 unsigned r600_tex_compare(unsigned compare)
1896 {
1897 switch (compare) {
1898 default:
1899 case PIPE_FUNC_NEVER:
1900 return V_03C000_SQ_TEX_DEPTH_COMPARE_NEVER;
1901 case PIPE_FUNC_LESS:
1902 return V_03C000_SQ_TEX_DEPTH_COMPARE_LESS;
1903 case PIPE_FUNC_EQUAL:
1904 return V_03C000_SQ_TEX_DEPTH_COMPARE_EQUAL;
1905 case PIPE_FUNC_LEQUAL:
1906 return V_03C000_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
1907 case PIPE_FUNC_GREATER:
1908 return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATER;
1909 case PIPE_FUNC_NOTEQUAL:
1910 return V_03C000_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
1911 case PIPE_FUNC_GEQUAL:
1912 return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
1913 case PIPE_FUNC_ALWAYS:
1914 return V_03C000_SQ_TEX_DEPTH_COMPARE_ALWAYS;
1915 }
1916 }
1917
1918 static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
1919 {
1920 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
1921 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
1922 (linear_filter &&
1923 (wrap == PIPE_TEX_WRAP_CLAMP ||
1924 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
1925 }
1926
1927 bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
1928 {
1929 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
1930 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
1931
1932 return (state->border_color.ui[0] || state->border_color.ui[1] ||
1933 state->border_color.ui[2] || state->border_color.ui[3]) &&
1934 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) ||
1935 wrap_mode_uses_border_color(state->wrap_t, linear_filter) ||
1936 wrap_mode_uses_border_color(state->wrap_r, linear_filter));
1937 }
1938
1939 void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a)
1940 {
1941
1942 struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
1943 struct r600_pipe_shader *shader = ((struct r600_shader_state*)a)->shader;
1944
1945 if (!shader)
1946 return;
1947
1948 r600_emit_command_buffer(cs, &shader->command_buffer);
1949 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
1950 radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, shader->bo,
1951 RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER));
1952 }
1953
1954 unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
1955 const unsigned char *swizzle_view,
1956 boolean vtx)
1957 {
1958 unsigned i;
1959 unsigned char swizzle[4];
1960 unsigned result = 0;
1961 const uint32_t tex_swizzle_shift[4] = {
1962 16, 19, 22, 25,
1963 };
1964 const uint32_t vtx_swizzle_shift[4] = {
1965 3, 6, 9, 12,
1966 };
1967 const uint32_t swizzle_bit[4] = {
1968 0, 1, 2, 3,
1969 };
1970 const uint32_t *swizzle_shift = tex_swizzle_shift;
1971
1972 if (vtx)
1973 swizzle_shift = vtx_swizzle_shift;
1974
1975 if (swizzle_view) {
1976 util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle);
1977 } else {
1978 memcpy(swizzle, swizzle_format, 4);
1979 }
1980
1981 /* Get swizzle. */
1982 for (i = 0; i < 4; i++) {
1983 switch (swizzle[i]) {
1984 case UTIL_FORMAT_SWIZZLE_Y:
1985 result |= swizzle_bit[1] << swizzle_shift[i];
1986 break;
1987 case UTIL_FORMAT_SWIZZLE_Z:
1988 result |= swizzle_bit[2] << swizzle_shift[i];
1989 break;
1990 case UTIL_FORMAT_SWIZZLE_W:
1991 result |= swizzle_bit[3] << swizzle_shift[i];
1992 break;
1993 case UTIL_FORMAT_SWIZZLE_0:
1994 result |= V_038010_SQ_SEL_0 << swizzle_shift[i];
1995 break;
1996 case UTIL_FORMAT_SWIZZLE_1:
1997 result |= V_038010_SQ_SEL_1 << swizzle_shift[i];
1998 break;
1999 default: /* UTIL_FORMAT_SWIZZLE_X */
2000 result |= swizzle_bit[0] << swizzle_shift[i];
2001 }
2002 }
2003 return result;
2004 }
2005
2006 /* texture format translate */
2007 uint32_t r600_translate_texformat(struct pipe_screen *screen,
2008 enum pipe_format format,
2009 const unsigned char *swizzle_view,
2010 uint32_t *word4_p, uint32_t *yuv_format_p)
2011 {
2012 struct r600_screen *rscreen = (struct r600_screen *)screen;
2013 uint32_t result = 0, word4 = 0, yuv_format = 0;
2014 const struct util_format_description *desc;
2015 boolean uniform = TRUE;
2016 bool enable_s3tc = rscreen->b.info.drm_minor >= 9;
2017 bool is_srgb_valid = FALSE;
2018 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
2019 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
2020
2021 int i;
2022 const uint32_t sign_bit[4] = {
2023 S_038010_FORMAT_COMP_X(V_038010_SQ_FORMAT_COMP_SIGNED),
2024 S_038010_FORMAT_COMP_Y(V_038010_SQ_FORMAT_COMP_SIGNED),
2025 S_038010_FORMAT_COMP_Z(V_038010_SQ_FORMAT_COMP_SIGNED),
2026 S_038010_FORMAT_COMP_W(V_038010_SQ_FORMAT_COMP_SIGNED)
2027 };
2028 desc = util_format_description(format);
2029
2030 /* Depth and stencil swizzling is handled separately. */
2031 if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) {
2032 word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view, FALSE);
2033 }
2034
2035 /* Colorspace (return non-RGB formats directly). */
2036 switch (desc->colorspace) {
2037 /* Depth stencil formats */
2038 case UTIL_FORMAT_COLORSPACE_ZS:
2039 switch (format) {
2040 /* Depth sampler formats. */
2041 case PIPE_FORMAT_Z16_UNORM:
2042 word4 |= r600_get_swizzle_combined(swizzle_xxxx, swizzle_view, FALSE);
2043 result = FMT_16;
2044 goto out_word4;
2045 case PIPE_FORMAT_Z24X8_UNORM:
2046 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
2047 word4 |= r600_get_swizzle_combined(swizzle_xxxx, swizzle_view, FALSE);
2048 result = FMT_8_24;
2049 goto out_word4;
2050 case PIPE_FORMAT_X8Z24_UNORM:
2051 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2052 if (rscreen->b.chip_class < EVERGREEN)
2053 goto out_unknown;
2054 word4 |= r600_get_swizzle_combined(swizzle_yyyy, swizzle_view, FALSE);
2055 result = FMT_24_8;
2056 goto out_word4;
2057 case PIPE_FORMAT_Z32_FLOAT:
2058 word4 |= r600_get_swizzle_combined(swizzle_xxxx, swizzle_view, FALSE);
2059 result = FMT_32_FLOAT;
2060 goto out_word4;
2061 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2062 word4 |= r600_get_swizzle_combined(swizzle_xxxx, swizzle_view, FALSE);
2063 result = FMT_X24_8_32_FLOAT;
2064 goto out_word4;
2065 /* Stencil sampler formats. */
2066 case PIPE_FORMAT_S8_UINT:
2067 word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT);
2068 word4 |= r600_get_swizzle_combined(swizzle_xxxx, swizzle_view, FALSE);
2069 result = FMT_8;
2070 goto out_word4;
2071 case PIPE_FORMAT_X24S8_UINT:
2072 word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT);
2073 word4 |= r600_get_swizzle_combined(swizzle_yyyy, swizzle_view, FALSE);
2074 result = FMT_8_24;
2075 goto out_word4;
2076 case PIPE_FORMAT_S8X24_UINT:
2077 if (rscreen->b.chip_class < EVERGREEN)
2078 goto out_unknown;
2079 word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT);
2080 word4 |= r600_get_swizzle_combined(swizzle_xxxx, swizzle_view, FALSE);
2081 result = FMT_24_8;
2082 goto out_word4;
2083 case PIPE_FORMAT_X32_S8X24_UINT:
2084 word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT);
2085 word4 |= r600_get_swizzle_combined(swizzle_yyyy, swizzle_view, FALSE);
2086 result = FMT_X24_8_32_FLOAT;
2087 goto out_word4;
2088 default:
2089 goto out_unknown;
2090 }
2091
2092 case UTIL_FORMAT_COLORSPACE_YUV:
2093 yuv_format |= (1 << 30);
2094 switch (format) {
2095 case PIPE_FORMAT_UYVY:
2096 case PIPE_FORMAT_YUYV:
2097 default:
2098 break;
2099 }
2100 goto out_unknown; /* XXX */
2101
2102 case UTIL_FORMAT_COLORSPACE_SRGB:
2103 word4 |= S_038010_FORCE_DEGAMMA(1);
2104 break;
2105
2106 default:
2107 break;
2108 }
2109
2110 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
2111 if (!enable_s3tc)
2112 goto out_unknown;
2113
2114 switch (format) {
2115 case PIPE_FORMAT_RGTC1_SNORM:
2116 case PIPE_FORMAT_LATC1_SNORM:
2117 word4 |= sign_bit[0];
2118 case PIPE_FORMAT_RGTC1_UNORM:
2119 case PIPE_FORMAT_LATC1_UNORM:
2120 result = FMT_BC4;
2121 goto out_word4;
2122 case PIPE_FORMAT_RGTC2_SNORM:
2123 case PIPE_FORMAT_LATC2_SNORM:
2124 word4 |= sign_bit[0] | sign_bit[1];
2125 case PIPE_FORMAT_RGTC2_UNORM:
2126 case PIPE_FORMAT_LATC2_UNORM:
2127 result = FMT_BC5;
2128 goto out_word4;
2129 default:
2130 goto out_unknown;
2131 }
2132 }
2133
2134 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
2135
2136 if (!enable_s3tc)
2137 goto out_unknown;
2138
2139 if (!util_format_s3tc_enabled) {
2140 goto out_unknown;
2141 }
2142
2143 switch (format) {
2144 case PIPE_FORMAT_DXT1_RGB:
2145 case PIPE_FORMAT_DXT1_RGBA:
2146 case PIPE_FORMAT_DXT1_SRGB:
2147 case PIPE_FORMAT_DXT1_SRGBA:
2148 result = FMT_BC1;
2149 is_srgb_valid = TRUE;
2150 goto out_word4;
2151 case PIPE_FORMAT_DXT3_RGBA:
2152 case PIPE_FORMAT_DXT3_SRGBA:
2153 result = FMT_BC2;
2154 is_srgb_valid = TRUE;
2155 goto out_word4;
2156 case PIPE_FORMAT_DXT5_RGBA:
2157 case PIPE_FORMAT_DXT5_SRGBA:
2158 result = FMT_BC3;
2159 is_srgb_valid = TRUE;
2160 goto out_word4;
2161 default:
2162 goto out_unknown;
2163 }
2164 }
2165
2166 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
2167 if (!enable_s3tc)
2168 goto out_unknown;
2169
2170 if (rscreen->b.chip_class < EVERGREEN)
2171 goto out_unknown;
2172
2173 switch (format) {
2174 case PIPE_FORMAT_BPTC_RGBA_UNORM:
2175 case PIPE_FORMAT_BPTC_SRGBA:
2176 result = FMT_BC7;
2177 is_srgb_valid = TRUE;
2178 goto out_word4;
2179 case PIPE_FORMAT_BPTC_RGB_FLOAT:
2180 word4 |= sign_bit[0] | sign_bit[1] | sign_bit[2];
2181 /* fall through */
2182 case PIPE_FORMAT_BPTC_RGB_UFLOAT:
2183 result = FMT_BC6;
2184 goto out_word4;
2185 default:
2186 goto out_unknown;
2187 }
2188 }
2189
2190 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
2191 switch (format) {
2192 case PIPE_FORMAT_R8G8_B8G8_UNORM:
2193 case PIPE_FORMAT_G8R8_B8R8_UNORM:
2194 result = FMT_GB_GR;
2195 goto out_word4;
2196 case PIPE_FORMAT_G8R8_G8B8_UNORM:
2197 case PIPE_FORMAT_R8G8_R8B8_UNORM:
2198 result = FMT_BG_RG;
2199 goto out_word4;
2200 default:
2201 goto out_unknown;
2202 }
2203 }
2204
2205 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
2206 result = FMT_5_9_9_9_SHAREDEXP;
2207 goto out_word4;
2208 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
2209 result = FMT_10_11_11_FLOAT;
2210 goto out_word4;
2211 }
2212
2213
2214 for (i = 0; i < desc->nr_channels; i++) {
2215 if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
2216 word4 |= sign_bit[i];
2217 }
2218 }
2219
2220 /* R8G8Bx_SNORM - XXX CxV8U8 */
2221
2222 /* See whether the components are of the same size. */
2223 for (i = 1; i < desc->nr_channels; i++) {
2224 uniform = uniform && desc->channel[0].size == desc->channel[i].size;
2225 }
2226
2227 /* Non-uniform formats. */
2228 if (!uniform) {
2229 if (desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB &&
2230 desc->channel[0].pure_integer)
2231 word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT);
2232 switch(desc->nr_channels) {
2233 case 3:
2234 if (desc->channel[0].size == 5 &&
2235 desc->channel[1].size == 6 &&
2236 desc->channel[2].size == 5) {
2237 result = FMT_5_6_5;
2238 goto out_word4;
2239 }
2240 goto out_unknown;
2241 case 4:
2242 if (desc->channel[0].size == 5 &&
2243 desc->channel[1].size == 5 &&
2244 desc->channel[2].size == 5 &&
2245 desc->channel[3].size == 1) {
2246 result = FMT_1_5_5_5;
2247 goto out_word4;
2248 }
2249 if (desc->channel[0].size == 10 &&
2250 desc->channel[1].size == 10 &&
2251 desc->channel[2].size == 10 &&
2252 desc->channel[3].size == 2) {
2253 result = FMT_2_10_10_10;
2254 goto out_word4;
2255 }
2256 goto out_unknown;
2257 }
2258 goto out_unknown;
2259 }
2260
2261 /* Find the first non-VOID channel. */
2262 for (i = 0; i < 4; i++) {
2263 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
2264 break;
2265 }
2266 }
2267
2268 if (i == 4)
2269 goto out_unknown;
2270
2271 /* uniform formats */
2272 switch (desc->channel[i].type) {
2273 case UTIL_FORMAT_TYPE_UNSIGNED:
2274 case UTIL_FORMAT_TYPE_SIGNED:
2275 #if 0
2276 if (!desc->channel[i].normalized &&
2277 desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) {
2278 goto out_unknown;
2279 }
2280 #endif
2281 if (desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB &&
2282 desc->channel[i].pure_integer)
2283 word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT);
2284
2285 switch (desc->channel[i].size) {
2286 case 4:
2287 switch (desc->nr_channels) {
2288 case 2:
2289 result = FMT_4_4;
2290 goto out_word4;
2291 case 4:
2292 result = FMT_4_4_4_4;
2293 goto out_word4;
2294 }
2295 goto out_unknown;
2296 case 8:
2297 switch (desc->nr_channels) {
2298 case 1:
2299 result = FMT_8;
2300 goto out_word4;
2301 case 2:
2302 result = FMT_8_8;
2303 goto out_word4;
2304 case 4:
2305 result = FMT_8_8_8_8;
2306 is_srgb_valid = TRUE;
2307 goto out_word4;
2308 }
2309 goto out_unknown;
2310 case 16:
2311 switch (desc->nr_channels) {
2312 case 1:
2313 result = FMT_16;
2314 goto out_word4;
2315 case 2:
2316 result = FMT_16_16;
2317 goto out_word4;
2318 case 4:
2319 result = FMT_16_16_16_16;
2320 goto out_word4;
2321 }
2322 goto out_unknown;
2323 case 32:
2324 switch (desc->nr_channels) {
2325 case 1:
2326 result = FMT_32;
2327 goto out_word4;
2328 case 2:
2329 result = FMT_32_32;
2330 goto out_word4;
2331 case 4:
2332 result = FMT_32_32_32_32;
2333 goto out_word4;
2334 }
2335 }
2336 goto out_unknown;
2337
2338 case UTIL_FORMAT_TYPE_FLOAT:
2339 switch (desc->channel[i].size) {
2340 case 16:
2341 switch (desc->nr_channels) {
2342 case 1:
2343 result = FMT_16_FLOAT;
2344 goto out_word4;
2345 case 2:
2346 result = FMT_16_16_FLOAT;
2347 goto out_word4;
2348 case 4:
2349 result = FMT_16_16_16_16_FLOAT;
2350 goto out_word4;
2351 }
2352 goto out_unknown;
2353 case 32:
2354 switch (desc->nr_channels) {
2355 case 1:
2356 result = FMT_32_FLOAT;
2357 goto out_word4;
2358 case 2:
2359 result = FMT_32_32_FLOAT;
2360 goto out_word4;
2361 case 4:
2362 result = FMT_32_32_32_32_FLOAT;
2363 goto out_word4;
2364 }
2365 }
2366 goto out_unknown;
2367 }
2368
2369 out_word4:
2370
2371 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB && !is_srgb_valid)
2372 return ~0;
2373 if (word4_p)
2374 *word4_p = word4;
2375 if (yuv_format_p)
2376 *yuv_format_p = yuv_format;
2377 return result;
2378 out_unknown:
2379 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */
2380 return ~0;
2381 }
2382
2383 uint32_t r600_translate_colorformat(enum chip_class chip, enum pipe_format format)
2384 {
2385 const struct util_format_description *desc = util_format_description(format);
2386 int channel = util_format_get_first_non_void_channel(format);
2387 bool is_float;
2388
2389 #define HAS_SIZE(x,y,z,w) \
2390 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
2391 desc->channel[2].size == (z) && desc->channel[3].size == (w))
2392
2393 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
2394 return V_0280A0_COLOR_10_11_11_FLOAT;
2395
2396 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
2397 channel == -1)
2398 return ~0U;
2399
2400 is_float = desc->channel[channel].type == UTIL_FORMAT_TYPE_FLOAT;
2401
2402 switch (desc->nr_channels) {
2403 case 1:
2404 switch (desc->channel[0].size) {
2405 case 8:
2406 return V_0280A0_COLOR_8;
2407 case 16:
2408 if (is_float)
2409 return V_0280A0_COLOR_16_FLOAT;
2410 else
2411 return V_0280A0_COLOR_16;
2412 case 32:
2413 if (is_float)
2414 return V_0280A0_COLOR_32_FLOAT;
2415 else
2416 return V_0280A0_COLOR_32;
2417 }
2418 break;
2419 case 2:
2420 if (desc->channel[0].size == desc->channel[1].size) {
2421 switch (desc->channel[0].size) {
2422 case 4:
2423 if (chip <= R700)
2424 return V_0280A0_COLOR_4_4;
2425 else
2426 return ~0U; /* removed on Evergreen */
2427 case 8:
2428 return V_0280A0_COLOR_8_8;
2429 case 16:
2430 if (is_float)
2431 return V_0280A0_COLOR_16_16_FLOAT;
2432 else
2433 return V_0280A0_COLOR_16_16;
2434 case 32:
2435 if (is_float)
2436 return V_0280A0_COLOR_32_32_FLOAT;
2437 else
2438 return V_0280A0_COLOR_32_32;
2439 }
2440 } else if (HAS_SIZE(8,24,0,0)) {
2441 return V_0280A0_COLOR_24_8;
2442 } else if (HAS_SIZE(24,8,0,0)) {
2443 return V_0280A0_COLOR_8_24;
2444 }
2445 break;
2446 case 3:
2447 if (HAS_SIZE(5,6,5,0)) {
2448 return V_0280A0_COLOR_5_6_5;
2449 } else if (HAS_SIZE(32,8,24,0)) {
2450 return V_0280A0_COLOR_X24_8_32_FLOAT;
2451 }
2452 break;
2453 case 4:
2454 if (desc->channel[0].size == desc->channel[1].size &&
2455 desc->channel[0].size == desc->channel[2].size &&
2456 desc->channel[0].size == desc->channel[3].size) {
2457 switch (desc->channel[0].size) {
2458 case 4:
2459 return V_0280A0_COLOR_4_4_4_4;
2460 case 8:
2461 return V_0280A0_COLOR_8_8_8_8;
2462 case 16:
2463 if (is_float)
2464 return V_0280A0_COLOR_16_16_16_16_FLOAT;
2465 else
2466 return V_0280A0_COLOR_16_16_16_16;
2467 case 32:
2468 if (is_float)
2469 return V_0280A0_COLOR_32_32_32_32_FLOAT;
2470 else
2471 return V_0280A0_COLOR_32_32_32_32;
2472 }
2473 } else if (HAS_SIZE(5,5,5,1)) {
2474 return V_0280A0_COLOR_1_5_5_5;
2475 } else if (HAS_SIZE(10,10,10,2)) {
2476 return V_0280A0_COLOR_2_10_10_10;
2477 }
2478 break;
2479 }
2480 return ~0U;
2481 }
2482
2483 uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
2484 {
2485 if (R600_BIG_ENDIAN) {
2486 switch(colorformat) {
2487 /* 8-bit buffers. */
2488 case V_0280A0_COLOR_4_4:
2489 case V_0280A0_COLOR_8:
2490 return ENDIAN_NONE;
2491
2492 /* 16-bit buffers. */
2493 case V_0280A0_COLOR_5_6_5:
2494 case V_0280A0_COLOR_1_5_5_5:
2495 case V_0280A0_COLOR_4_4_4_4:
2496 case V_0280A0_COLOR_16:
2497 case V_0280A0_COLOR_8_8:
2498 return ENDIAN_8IN16;
2499
2500 /* 32-bit buffers. */
2501 case V_0280A0_COLOR_8_8_8_8:
2502 case V_0280A0_COLOR_2_10_10_10:
2503 case V_0280A0_COLOR_8_24:
2504 case V_0280A0_COLOR_24_8:
2505 case V_0280A0_COLOR_32_FLOAT:
2506 case V_0280A0_COLOR_16_16_FLOAT:
2507 case V_0280A0_COLOR_16_16:
2508 return ENDIAN_8IN32;
2509
2510 /* 64-bit buffers. */
2511 case V_0280A0_COLOR_16_16_16_16:
2512 case V_0280A0_COLOR_16_16_16_16_FLOAT:
2513 return ENDIAN_8IN16;
2514
2515 case V_0280A0_COLOR_32_32_FLOAT:
2516 case V_0280A0_COLOR_32_32:
2517 case V_0280A0_COLOR_X24_8_32_FLOAT:
2518 return ENDIAN_8IN32;
2519
2520 /* 128-bit buffers. */
2521 case V_0280A0_COLOR_32_32_32_32_FLOAT:
2522 case V_0280A0_COLOR_32_32_32_32:
2523 return ENDIAN_8IN32;
2524 default:
2525 return ENDIAN_NONE; /* Unsupported. */
2526 }
2527 } else {
2528 return ENDIAN_NONE;
2529 }
2530 }
2531
2532 static void r600_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf)
2533 {
2534 struct r600_context *rctx = (struct r600_context*)ctx;
2535 struct r600_resource *rbuffer = r600_resource(buf);
2536 unsigned i, shader, mask, alignment = rbuffer->buf->alignment;
2537 struct r600_pipe_sampler_view *view;
2538
2539 /* Reallocate the buffer in the same pipe_resource. */
2540 r600_init_resource(&rctx->screen->b, rbuffer, rbuffer->b.b.width0,
2541 alignment, TRUE);
2542
2543 /* We changed the buffer, now we need to bind it where the old one was bound. */
2544 /* Vertex buffers. */
2545 mask = rctx->vertex_buffer_state.enabled_mask;
2546 while (mask) {
2547 i = u_bit_scan(&mask);
2548 if (rctx->vertex_buffer_state.vb[i].buffer == &rbuffer->b.b) {
2549 rctx->vertex_buffer_state.dirty_mask |= 1 << i;
2550 r600_vertex_buffers_dirty(rctx);
2551 }
2552 }
2553 /* Streamout buffers. */
2554 for (i = 0; i < rctx->b.streamout.num_targets; i++) {
2555 if (rctx->b.streamout.targets[i]->b.buffer == &rbuffer->b.b) {
2556 if (rctx->b.streamout.begin_emitted) {
2557 r600_emit_streamout_end(&rctx->b);
2558 }
2559 rctx->b.streamout.append_bitmask = rctx->b.streamout.enabled_mask;
2560 r600_streamout_buffers_dirty(&rctx->b);
2561 }
2562 }
2563
2564 /* Constant buffers. */
2565 for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) {
2566 struct r600_constbuf_state *state = &rctx->constbuf_state[shader];
2567 bool found = false;
2568 uint32_t mask = state->enabled_mask;
2569
2570 while (mask) {
2571 unsigned i = u_bit_scan(&mask);
2572 if (state->cb[i].buffer == &rbuffer->b.b) {
2573 found = true;
2574 state->dirty_mask |= 1 << i;
2575 }
2576 }
2577 if (found) {
2578 r600_constant_buffers_dirty(rctx, state);
2579 }
2580 }
2581
2582 /* Texture buffer objects - update the virtual addresses in descriptors. */
2583 LIST_FOR_EACH_ENTRY(view, &rctx->b.texture_buffers, list) {
2584 if (view->base.texture == &rbuffer->b.b) {
2585 unsigned stride = util_format_get_blocksize(view->base.format);
2586 uint64_t offset = (uint64_t)view->base.u.buf.first_element * stride;
2587 uint64_t va = rbuffer->gpu_address + offset;
2588
2589 view->tex_resource_words[0] = va;
2590 view->tex_resource_words[2] &= C_038008_BASE_ADDRESS_HI;
2591 view->tex_resource_words[2] |= S_038008_BASE_ADDRESS_HI(va >> 32);
2592 }
2593 }
2594 /* Texture buffer objects - make bindings dirty if needed. */
2595 for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) {
2596 struct r600_samplerview_state *state = &rctx->samplers[shader].views;
2597 bool found = false;
2598 uint32_t mask = state->enabled_mask;
2599
2600 while (mask) {
2601 unsigned i = u_bit_scan(&mask);
2602 if (state->views[i]->base.texture == &rbuffer->b.b) {
2603 found = true;
2604 state->dirty_mask |= 1 << i;
2605 }
2606 }
2607 if (found) {
2608 r600_sampler_views_dirty(rctx, state);
2609 }
2610 }
2611 }
2612
2613 static void r600_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
2614 {
2615 struct r600_context *rctx = (struct r600_context*)ctx;
2616
2617 if (rctx->db_misc_state.occlusion_query_enabled != enable) {
2618 rctx->db_misc_state.occlusion_query_enabled = enable;
2619 r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
2620 }
2621 }
2622
2623 static void r600_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
2624 bool include_draw_vbo)
2625 {
2626 r600_need_cs_space((struct r600_context*)ctx, num_dw, include_draw_vbo);
2627 }
2628
2629 /* keep this at the end of this file, please */
2630 void r600_init_common_state_functions(struct r600_context *rctx)
2631 {
2632 rctx->b.b.create_fs_state = r600_create_ps_state;
2633 rctx->b.b.create_vs_state = r600_create_vs_state;
2634 rctx->b.b.create_gs_state = r600_create_gs_state;
2635 rctx->b.b.create_vertex_elements_state = r600_create_vertex_fetch_shader;
2636 rctx->b.b.bind_blend_state = r600_bind_blend_state;
2637 rctx->b.b.bind_depth_stencil_alpha_state = r600_bind_dsa_state;
2638 rctx->b.b.bind_sampler_states = r600_bind_sampler_states;
2639 rctx->b.b.bind_fs_state = r600_bind_ps_state;
2640 rctx->b.b.bind_rasterizer_state = r600_bind_rs_state;
2641 rctx->b.b.bind_vertex_elements_state = r600_bind_vertex_elements;
2642 rctx->b.b.bind_vs_state = r600_bind_vs_state;
2643 rctx->b.b.bind_gs_state = r600_bind_gs_state;
2644 rctx->b.b.delete_blend_state = r600_delete_blend_state;
2645 rctx->b.b.delete_depth_stencil_alpha_state = r600_delete_dsa_state;
2646 rctx->b.b.delete_fs_state = r600_delete_ps_state;
2647 rctx->b.b.delete_rasterizer_state = r600_delete_rs_state;
2648 rctx->b.b.delete_sampler_state = r600_delete_sampler_state;
2649 rctx->b.b.delete_vertex_elements_state = r600_delete_vertex_elements;
2650 rctx->b.b.delete_vs_state = r600_delete_vs_state;
2651 rctx->b.b.delete_gs_state = r600_delete_gs_state;
2652 rctx->b.b.set_blend_color = r600_set_blend_color;
2653 rctx->b.b.set_clip_state = r600_set_clip_state;
2654 rctx->b.b.set_constant_buffer = r600_set_constant_buffer;
2655 rctx->b.b.set_sample_mask = r600_set_sample_mask;
2656 rctx->b.b.set_stencil_ref = r600_set_pipe_stencil_ref;
2657 rctx->b.b.set_viewport_states = r600_set_viewport_states;
2658 rctx->b.b.set_vertex_buffers = r600_set_vertex_buffers;
2659 rctx->b.b.set_index_buffer = r600_set_index_buffer;
2660 rctx->b.b.set_sampler_views = r600_set_sampler_views;
2661 rctx->b.b.sampler_view_destroy = r600_sampler_view_destroy;
2662 rctx->b.b.texture_barrier = r600_texture_barrier;
2663 rctx->b.b.set_stream_output_targets = r600_set_streamout_targets;
2664 rctx->b.b.draw_vbo = r600_draw_vbo;
2665 rctx->b.invalidate_buffer = r600_invalidate_buffer;
2666 rctx->b.set_occlusion_query_state = r600_set_occlusion_query_state;
2667 rctx->b.need_gfx_cs_space = r600_need_gfx_cs_space;
2668 }
2669
2670 void r600_trace_emit(struct r600_context *rctx)
2671 {
2672 struct r600_screen *rscreen = rctx->screen;
2673 struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
2674 uint64_t va;
2675 uint32_t reloc;
2676
2677 va = rscreen->b.trace_bo->gpu_address;
2678 reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rscreen->b.trace_bo,
2679 RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
2680 radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0));
2681 radeon_emit(cs, va & 0xFFFFFFFFUL);
2682 radeon_emit(cs, (va >> 32UL) & 0xFFUL);
2683 radeon_emit(cs, cs->cdw);
2684 radeon_emit(cs, rscreen->b.cs_count);
2685 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
2686 radeon_emit(cs, reloc);
2687 }