r300g: Kill dead code for hashing custom state.
[mesa.git] / src / gallium / drivers / r300 / r300_state_derived.c
1 /*
2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
3 * Copyright 2009 Marek Olšák <maraeo@gmail.com>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "draw/draw_context.h"
25
26 #include "util/u_math.h"
27 #include "util/u_memory.h"
28
29 #include "r300_context.h"
30 #include "r300_fs.h"
31 #include "r300_screen.h"
32 #include "r300_shader_semantics.h"
33 #include "r300_state_derived.h"
34 #include "r300_state_inlines.h"
35 #include "r300_vs.h"
36
37 /* r300_state_derived: Various bits of state which are dependent upon
38 * currently bound CSO data. */
39
40 static void r300_draw_emit_attrib(struct r300_context* r300,
41 enum attrib_emit emit,
42 enum interp_mode interp,
43 int index)
44 {
45 struct tgsi_shader_info* info = &r300->vs->info;
46 int output;
47
48 output = draw_find_shader_output(r300->draw,
49 info->output_semantic_name[index],
50 info->output_semantic_index[index]);
51 draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output);
52 }
53
54 static void r300_draw_emit_all_attribs(struct r300_context* r300)
55 {
56 struct r300_shader_semantics* vs_outputs = &r300->vs->outputs;
57 int i, gen_count;
58
59 /* Position. */
60 if (vs_outputs->pos != ATTR_UNUSED) {
61 r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
62 vs_outputs->pos);
63 } else {
64 assert(0);
65 }
66
67 /* Point size. */
68 if (vs_outputs->psize != ATTR_UNUSED) {
69 r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS,
70 vs_outputs->psize);
71 }
72
73 /* Colors. */
74 for (i = 0; i < ATTR_COLOR_COUNT; i++) {
75 if (vs_outputs->color[i] != ATTR_UNUSED) {
76 r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
77 vs_outputs->color[i]);
78 }
79 }
80
81 /* XXX Back-face colors. */
82
83 /* Texture coordinates. */
84 gen_count = 0;
85 for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
86 if (vs_outputs->generic[i] != ATTR_UNUSED) {
87 r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
88 vs_outputs->generic[i]);
89 gen_count++;
90 }
91 }
92
93 /* Fog coordinates. */
94 if (vs_outputs->fog != ATTR_UNUSED) {
95 r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
96 vs_outputs->fog);
97 gen_count++;
98 }
99
100 /* XXX magic */
101 assert(gen_count <= 8);
102 }
103
104 /* Update the PSC tables. */
105 static void r300_vertex_psc(struct r300_context* r300)
106 {
107 struct r300_vertex_info *vformat = r300->vertex_info;
108 uint16_t type, swizzle;
109 enum pipe_format format;
110 unsigned i;
111 int identity[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
112 int* stream_tab;
113
114 /* If TCL is bypassed, map vertex streams to equivalent VS output
115 * locations. */
116 if (r300->tcl_bypass) {
117 stream_tab = r300->vs->stream_loc_notcl;
118 } else {
119 stream_tab = identity;
120 }
121
122 /* Vertex shaders have no semantics on their inputs,
123 * so PSC should just route stuff based on the vertex elements,
124 * and not on attrib information. */
125 DBG(r300, DBG_DRAW, "r300: vs expects %d attribs, routing %d elements"
126 " in psc\n",
127 r300->vs->info.num_inputs,
128 r300->vertex_element_count);
129
130 for (i = 0; i < r300->vertex_element_count; i++) {
131 format = r300->vertex_element[i].src_format;
132
133 type = r300_translate_vertex_data_type(format) |
134 (stream_tab[i] << R300_DST_VEC_LOC_SHIFT);
135 swizzle = r300_translate_vertex_data_swizzle(format);
136
137 if (i & 1) {
138 vformat->vap_prog_stream_cntl[i >> 1] |= type << 16;
139 vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
140 } else {
141 vformat->vap_prog_stream_cntl[i >> 1] |= type;
142 vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
143 }
144 }
145
146 assert(i <= 15);
147
148 /* Set the last vector in the PSC. */
149 if (i) {
150 i -= 1;
151 }
152 vformat->vap_prog_stream_cntl[i >> 1] |=
153 (R300_LAST_VEC << (i & 1 ? 16 : 0));
154 }
155
156 /* Update the PSC tables for SW TCL, using Draw. */
157 static void r300_swtcl_vertex_psc(struct r300_context* r300)
158 {
159 struct r300_vertex_info *vformat = r300->vertex_info;
160 struct vertex_info* vinfo = &vformat->vinfo;
161 uint16_t type, swizzle;
162 enum pipe_format format;
163 unsigned i, attrib_count;
164 int* vs_output_tab = r300->vs->stream_loc_notcl;
165
166 /* For each Draw attribute, route it to the fragment shader according
167 * to the vs_output_tab. */
168 attrib_count = vinfo->num_attribs;
169 DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
170 for (i = 0; i < attrib_count; i++) {
171 DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
172 " vs_output_tab %d\n", vinfo->attrib[i].src_index,
173 vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
174 vs_output_tab[i]);
175 }
176
177 for (i = 0; i < attrib_count; i++) {
178 /* Make sure we have a proper destination for our attribute. */
179 assert(vs_output_tab[i] != -1);
180
181 format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
182
183 /* Obtain the type of data in this attribute. */
184 type = r300_translate_vertex_data_type(format) |
185 vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT;
186
187 /* Obtain the swizzle for this attribute. Note that the default
188 * swizzle in the hardware is not XYZW! */
189 swizzle = r300_translate_vertex_data_swizzle(format);
190
191 /* Add the attribute to the PSC table. */
192 if (i & 1) {
193 vformat->vap_prog_stream_cntl[i >> 1] |= type << 16;
194 vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
195 } else {
196 vformat->vap_prog_stream_cntl[i >> 1] |= type;
197 vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
198 }
199 }
200
201 /* Set the last vector in the PSC. */
202 if (i) {
203 i -= 1;
204 }
205 vformat->vap_prog_stream_cntl[i >> 1] |=
206 (R300_LAST_VEC << (i & 1 ? 16 : 0));
207 }
208
209 static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr,
210 boolean swizzle_0001)
211 {
212 rs->ip[id] |= R300_RS_COL_PTR(ptr);
213 if (swizzle_0001) {
214 rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
215 } else {
216 rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
217 }
218 rs->inst[id] |= R300_RS_INST_COL_ID(id);
219 }
220
221 static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset)
222 {
223 rs->inst[id] |= R300_RS_INST_COL_CN_WRITE |
224 R300_RS_INST_COL_ADDR(fp_offset);
225 }
226
227 static void r300_rs_tex(struct r300_rs_block* rs, int id, int ptr,
228 boolean swizzle_X001)
229 {
230 if (swizzle_X001) {
231 rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) |
232 R300_RS_SEL_S(R300_RS_SEL_C0) |
233 R300_RS_SEL_T(R300_RS_SEL_K0) |
234 R300_RS_SEL_R(R300_RS_SEL_K0) |
235 R300_RS_SEL_Q(R300_RS_SEL_K1);
236 } else {
237 rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) |
238 R300_RS_SEL_S(R300_RS_SEL_C0) |
239 R300_RS_SEL_T(R300_RS_SEL_C1) |
240 R300_RS_SEL_R(R300_RS_SEL_C2) |
241 R300_RS_SEL_Q(R300_RS_SEL_C3);
242 }
243 rs->inst[id] |= R300_RS_INST_TEX_ID(id);
244 }
245
246 static void r300_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
247 {
248 rs->inst[id] |= R300_RS_INST_TEX_CN_WRITE |
249 R300_RS_INST_TEX_ADDR(fp_offset);
250 }
251
252 static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr,
253 boolean swizzle_0001)
254 {
255 rs->ip[id] |= R500_RS_COL_PTR(ptr);
256 if (swizzle_0001) {
257 rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
258 } else {
259 rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
260 }
261 rs->inst[id] |= R500_RS_INST_COL_ID(id);
262 }
263
264 static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset)
265 {
266 rs->inst[id] |= R500_RS_INST_COL_CN_WRITE |
267 R500_RS_INST_COL_ADDR(fp_offset);
268 }
269
270 static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr,
271 boolean swizzle_X001)
272 {
273 int rs_tex_comp = ptr*4;
274
275 if (swizzle_X001) {
276 rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) |
277 R500_RS_SEL_T(R500_RS_IP_PTR_K0) |
278 R500_RS_SEL_R(R500_RS_IP_PTR_K0) |
279 R500_RS_SEL_Q(R500_RS_IP_PTR_K1);
280 } else {
281 rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) |
282 R500_RS_SEL_T(rs_tex_comp + 1) |
283 R500_RS_SEL_R(rs_tex_comp + 2) |
284 R500_RS_SEL_Q(rs_tex_comp + 3);
285 }
286 rs->inst[id] |= R500_RS_INST_TEX_ID(id);
287 }
288
289 static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
290 {
291 rs->inst[id] |= R500_RS_INST_TEX_CN_WRITE |
292 R500_RS_INST_TEX_ADDR(fp_offset);
293 }
294
295 /* Set up the RS block.
296 *
297 * This is the part of the chipset that actually does the rasterization
298 * of vertices into fragments. This is also the part of the chipset that
299 * locks up if any part of it is even slightly wrong. */
300 static void r300_update_rs_block(struct r300_context* r300,
301 struct r300_shader_semantics* vs_outputs,
302 struct r300_shader_semantics* fs_inputs)
303 {
304 struct r300_rs_block* rs = r300->rs_block;
305 int i, col_count = 0, tex_count = 0, fp_offset = 0;
306 void (*rX00_rs_col)(struct r300_rs_block*, int, int, boolean);
307 void (*rX00_rs_col_write)(struct r300_rs_block*, int, int);
308 void (*rX00_rs_tex)(struct r300_rs_block*, int, int, boolean);
309 void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int);
310 boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
311 vs_outputs->bcolor[1] != ATTR_UNUSED;
312
313 if (r300_screen(r300->context.screen)->caps->is_r500) {
314 rX00_rs_col = r500_rs_col;
315 rX00_rs_col_write = r500_rs_col_write;
316 rX00_rs_tex = r500_rs_tex;
317 rX00_rs_tex_write = r500_rs_tex_write;
318 } else {
319 rX00_rs_col = r300_rs_col;
320 rX00_rs_col_write = r300_rs_col_write;
321 rX00_rs_tex = r300_rs_tex;
322 rX00_rs_tex_write = r300_rs_tex_write;
323 }
324
325 /* Rasterize colors. */
326 for (i = 0; i < ATTR_COLOR_COUNT; i++) {
327 if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used ||
328 vs_outputs->color[1] != ATTR_UNUSED) {
329 /* Always rasterize if it's written by the VS,
330 * otherwise it locks up. */
331 rX00_rs_col(rs, col_count, i, FALSE);
332
333 /* Write it to the FS input register if it's used by the FS. */
334 if (fs_inputs->color[i] != ATTR_UNUSED) {
335 rX00_rs_col_write(rs, col_count, fp_offset);
336 fp_offset++;
337 }
338 col_count++;
339 } else {
340 /* Skip the FS input register, leave it uninitialized. */
341 /* If we try to set it to (0,0,0,1), it will lock up. */
342 if (fs_inputs->color[i] != ATTR_UNUSED) {
343 fp_offset++;
344 }
345 }
346 }
347
348 /* Rasterize texture coordinates. */
349 for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
350 if (vs_outputs->generic[i] != ATTR_UNUSED) {
351 /* Always rasterize if it's written by the VS,
352 * otherwise it locks up. */
353 rX00_rs_tex(rs, tex_count, tex_count, FALSE);
354
355 /* Write it to the FS input register if it's used by the FS. */
356 if (fs_inputs->generic[i] != ATTR_UNUSED) {
357 rX00_rs_tex_write(rs, tex_count, fp_offset);
358 fp_offset++;
359 }
360 tex_count++;
361 } else {
362 /* Skip the FS input register, leave it uninitialized. */
363 /* If we try to set it to (0,0,0,1), it will lock up. */
364 if (fs_inputs->generic[i] != ATTR_UNUSED) {
365 fp_offset++;
366 }
367 }
368 }
369
370 /* Rasterize fog coordinates. */
371 if (vs_outputs->fog != ATTR_UNUSED) {
372 /* Always rasterize if it's written by the VS,
373 * otherwise it locks up. */
374 rX00_rs_tex(rs, tex_count, tex_count, TRUE);
375
376 /* Write it to the FS input register if it's used by the FS. */
377 if (fs_inputs->fog != ATTR_UNUSED) {
378 rX00_rs_tex_write(rs, tex_count, fp_offset);
379 fp_offset++;
380 }
381 tex_count++;
382 } else {
383 /* Skip the FS input register, leave it uninitialized. */
384 /* If we try to set it to (0,0,0,1), it will lock up. */
385 if (fs_inputs->fog != ATTR_UNUSED) {
386 fp_offset++;
387 }
388 }
389
390 /* Rasterize WPOS. */
391 /* If the FS doesn't need it, it's not written by the VS. */
392 if (fs_inputs->wpos != ATTR_UNUSED) {
393 rX00_rs_tex(rs, tex_count, tex_count, FALSE);
394 rX00_rs_tex_write(rs, tex_count, fp_offset);
395
396 fp_offset++;
397 tex_count++;
398 }
399
400 /* Rasterize at least one color, or bad things happen. */
401 if (col_count == 0 && tex_count == 0) {
402 rX00_rs_col(rs, 0, 0, TRUE);
403 col_count++;
404 }
405
406 rs->count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) |
407 R300_HIRES_EN;
408
409 rs->inst_count = MAX3(col_count - 1, tex_count - 1, 0);
410 }
411
412 /* Update the vertex format. */
413 static void r300_update_derived_shader_state(struct r300_context* r300)
414 {
415 struct r300_screen* r300screen = r300_screen(r300->context.screen);
416
417 /* Reset structures */
418 memset(r300->rs_block, 0, sizeof(struct r300_rs_block));
419 memset(r300->vertex_info, 0, sizeof(struct r300_vertex_info));
420 memcpy(r300->vertex_info->vinfo.hwfmt, r300->vs->hwfmt, sizeof(uint)*4);
421
422 r300_update_rs_block(r300, &r300->vs->outputs, &r300->fs->inputs);
423
424 if (r300screen->caps->has_tcl) {
425 r300_vertex_psc(r300);
426 } else {
427 r300_draw_emit_all_attribs(r300);
428 draw_compute_vertex_size(&r300->vertex_info->vinfo);
429 r300_swtcl_vertex_psc(r300);
430 }
431
432 r300->dirty_state |= R300_NEW_RS_BLOCK;
433 }
434
435 static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa)
436 {
437 /* We are interested only in the cases when a new depth or stencil value
438 * can be written and changed. */
439
440 /* We might optionally check for [Z func: never] and inspect the stencil
441 * state in a similar fashion, but it's not terribly important. */
442 return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) ||
443 (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) ||
444 ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) &&
445 (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK));
446 }
447
448 static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa)
449 {
450 /* We are interested only in the cases when alpha testing can kill
451 * a fragment. */
452 uint32_t af = dsa->alpha_function;
453
454 return (af & R300_FG_ALPHA_FUNC_ENABLE) &&
455 (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS;
456 }
457
458 static void r300_update_ztop(struct r300_context* r300)
459 {
460 struct r300_ztop_state* ztop_state =
461 (struct r300_ztop_state*)r300->ztop_state.state;
462
463 /* This is important enough that I felt it warranted a comment.
464 *
465 * According to the docs, these are the conditions where ZTOP must be
466 * disabled:
467 * 1) Alpha testing enabled
468 * 2) Texture kill instructions in fragment shader
469 * 3) Chroma key culling enabled
470 * 4) W-buffering enabled
471 *
472 * The docs claim that for the first three cases, if no ZS writes happen,
473 * then ZTOP can be used.
474 *
475 * (3) will never apply since we do not support chroma-keyed operations.
476 * (4) will need to be re-examined (and this comment updated) if/when
477 * Hyper-Z becomes supported.
478 *
479 * Additionally, the following conditions require disabled ZTOP:
480 * 5) Depth writes in fragment shader
481 * 6) Outstanding occlusion queries
482 *
483 * This register causes stalls all the way from SC to CB when changed,
484 * but it is buffered on-chip so it does not hurt to write it if it has
485 * not changed.
486 *
487 * ~C.
488 */
489
490 /* ZS writes */
491 if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) &&
492 (r300_dsa_alpha_test_enabled(r300->dsa_state.state) ||/* (1) */
493 r300->fs->info.uses_kill)) { /* (2) */
494 ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
495 } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */
496 ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
497 } else if (r300->query_current) { /* (6) */
498 ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
499 } else {
500 ztop_state->z_buffer_top = R300_ZTOP_ENABLE;
501 }
502
503 r300->ztop_state.dirty = TRUE;
504 }
505
506 void r300_update_derived_state(struct r300_context* r300)
507 {
508 /* XXX */
509 if (r300->dirty_state &
510 (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER |
511 R300_NEW_VERTEX_FORMAT) || r300->rs_state.dirty) {
512 r300_update_derived_shader_state(r300);
513 }
514
515 r300_update_ztop(r300);
516 }