r300g: add generating texture coordinates for point sprites (WIP)
[mesa.git] / src / gallium / drivers / r300 / r300_state_derived.c
1 /*
2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
3 * Copyright 2009 Marek Olšák <maraeo@gmail.com>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "draw/draw_context.h"
25
26 #include "util/u_math.h"
27 #include "util/u_memory.h"
28
29 #include "r300_context.h"
30 #include "r300_fs.h"
31 #include "r300_screen.h"
32 #include "r300_shader_semantics.h"
33 #include "r300_state_derived.h"
34 #include "r300_state_inlines.h"
35 #include "r300_vs.h"
36
37 /* r300_state_derived: Various bits of state which are dependent upon
38 * currently bound CSO data. */
39
40 static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr,
41 boolean swizzle_0001)
42 {
43 rs->ip[id] |= R300_RS_COL_PTR(ptr);
44 if (swizzle_0001) {
45 rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
46 } else {
47 rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
48 }
49 rs->inst[id] |= R300_RS_INST_COL_ID(id);
50 }
51
52 static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset)
53 {
54 rs->inst[id] |= R300_RS_INST_COL_CN_WRITE |
55 R300_RS_INST_COL_ADDR(fp_offset);
56 }
57
58 static void r300_rs_tex(struct r300_rs_block* rs, int id, int ptr,
59 boolean swizzle_X001)
60 {
61 if (swizzle_X001) {
62 rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) |
63 R300_RS_SEL_S(R300_RS_SEL_C0) |
64 R300_RS_SEL_T(R300_RS_SEL_K0) |
65 R300_RS_SEL_R(R300_RS_SEL_K0) |
66 R300_RS_SEL_Q(R300_RS_SEL_K1);
67 } else {
68 rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) |
69 R300_RS_SEL_S(R300_RS_SEL_C0) |
70 R300_RS_SEL_T(R300_RS_SEL_C1) |
71 R300_RS_SEL_R(R300_RS_SEL_C2) |
72 R300_RS_SEL_Q(R300_RS_SEL_C3);
73 }
74 rs->inst[id] |= R300_RS_INST_TEX_ID(id);
75 }
76
77 static void r300_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
78 {
79 rs->inst[id] |= R300_RS_INST_TEX_CN_WRITE |
80 R300_RS_INST_TEX_ADDR(fp_offset);
81 }
82
83 static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr,
84 boolean swizzle_0001)
85 {
86 rs->ip[id] |= R500_RS_COL_PTR(ptr);
87 if (swizzle_0001) {
88 rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
89 } else {
90 rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
91 }
92 rs->inst[id] |= R500_RS_INST_COL_ID(id);
93 }
94
95 static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset)
96 {
97 rs->inst[id] |= R500_RS_INST_COL_CN_WRITE |
98 R500_RS_INST_COL_ADDR(fp_offset);
99 }
100
101 static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr,
102 boolean swizzle_X001)
103 {
104 int rs_tex_comp = ptr*4;
105
106 if (swizzle_X001) {
107 rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) |
108 R500_RS_SEL_T(R500_RS_IP_PTR_K0) |
109 R500_RS_SEL_R(R500_RS_IP_PTR_K0) |
110 R500_RS_SEL_Q(R500_RS_IP_PTR_K1);
111 } else {
112 rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) |
113 R500_RS_SEL_T(rs_tex_comp + 1) |
114 R500_RS_SEL_R(rs_tex_comp + 2) |
115 R500_RS_SEL_Q(rs_tex_comp + 3);
116 }
117 rs->inst[id] |= R500_RS_INST_TEX_ID(id);
118 }
119
120 static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
121 {
122 rs->inst[id] |= R500_RS_INST_TEX_CN_WRITE |
123 R500_RS_INST_TEX_ADDR(fp_offset);
124 }
125
126 /* Set up the RS block.
127 *
128 * This is the part of the chipset that actually does the rasterization
129 * of vertices into fragments. This is also the part of the chipset that
130 * locks up if any part of it is even slightly wrong. */
131 static void r300_update_rs_block(struct r300_context* r300,
132 struct r300_shader_semantics* vs_outputs,
133 struct r300_shader_semantics* fs_inputs)
134 {
135 struct r300_rs_block rs = { { 0 } };
136 int i, col_count = 0, tex_count = 0, fp_offset = 0, count;
137 void (*rX00_rs_col)(struct r300_rs_block*, int, int, boolean);
138 void (*rX00_rs_col_write)(struct r300_rs_block*, int, int);
139 void (*rX00_rs_tex)(struct r300_rs_block*, int, int, boolean);
140 void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int);
141 boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
142 vs_outputs->bcolor[1] != ATTR_UNUSED;
143
144 if (r300_screen(r300->context.screen)->caps->is_r500) {
145 rX00_rs_col = r500_rs_col;
146 rX00_rs_col_write = r500_rs_col_write;
147 rX00_rs_tex = r500_rs_tex;
148 rX00_rs_tex_write = r500_rs_tex_write;
149 } else {
150 rX00_rs_col = r300_rs_col;
151 rX00_rs_col_write = r300_rs_col_write;
152 rX00_rs_tex = r300_rs_tex;
153 rX00_rs_tex_write = r300_rs_tex_write;
154 }
155
156 /* Rasterize colors. */
157 for (i = 0; i < ATTR_COLOR_COUNT; i++) {
158 if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used ||
159 vs_outputs->color[1] != ATTR_UNUSED) {
160 /* Always rasterize if it's written by the VS,
161 * otherwise it locks up. */
162 rX00_rs_col(&rs, col_count, i, FALSE);
163
164 /* Write it to the FS input register if it's used by the FS. */
165 if (fs_inputs->color[i] != ATTR_UNUSED) {
166 rX00_rs_col_write(&rs, col_count, fp_offset);
167 fp_offset++;
168 }
169 col_count++;
170 } else {
171 /* Skip the FS input register, leave it uninitialized. */
172 /* If we try to set it to (0,0,0,1), it will lock up. */
173 if (fs_inputs->color[i] != ATTR_UNUSED) {
174 fp_offset++;
175 }
176 }
177 }
178
179 /* Rasterize texture coordinates. */
180 for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
181 if (vs_outputs->generic[i] != ATTR_UNUSED ||
182 r300->sprite_coord_index == i) {
183 /* Always rasterize if it's written by the VS,
184 * otherwise it locks up. */
185 rX00_rs_tex(&rs, tex_count, tex_count, FALSE);
186
187 /* Write it to the FS input register if it's used by the FS. */
188 if (fs_inputs->generic[i] != ATTR_UNUSED) {
189 rX00_rs_tex_write(&rs, tex_count, fp_offset);
190 if (r300->sprite_coord_index == i)
191 debug_printf("r300: SpriteCoord (generic index %i) is being written to reg %i\n", i, fp_offset);
192 fp_offset++;
193 }
194 tex_count++;
195 } else {
196 /* Skip the FS input register, leave it uninitialized. */
197 /* If we try to set it to (0,0,0,1), it will lock up. */
198 if (fs_inputs->generic[i] != ATTR_UNUSED) {
199 fp_offset++;
200 }
201 }
202 }
203
204 /* Rasterize fog coordinates. */
205 if (vs_outputs->fog != ATTR_UNUSED) {
206 /* Always rasterize if it's written by the VS,
207 * otherwise it locks up. */
208 rX00_rs_tex(&rs, tex_count, tex_count, TRUE);
209
210 /* Write it to the FS input register if it's used by the FS. */
211 if (fs_inputs->fog != ATTR_UNUSED) {
212 rX00_rs_tex_write(&rs, tex_count, fp_offset);
213 fp_offset++;
214 }
215 tex_count++;
216 } else {
217 /* Skip the FS input register, leave it uninitialized. */
218 /* If we try to set it to (0,0,0,1), it will lock up. */
219 if (fs_inputs->fog != ATTR_UNUSED) {
220 fp_offset++;
221 }
222 }
223
224 /* Rasterize WPOS. */
225 /* If the FS doesn't need it, it's not written by the VS. */
226 if (fs_inputs->wpos != ATTR_UNUSED) {
227 rX00_rs_tex(&rs, tex_count, tex_count, FALSE);
228 rX00_rs_tex_write(&rs, tex_count, fp_offset);
229
230 fp_offset++;
231 tex_count++;
232 }
233
234 /* Rasterize at least one color, or bad things happen. */
235 if (col_count == 0 && tex_count == 0) {
236 rX00_rs_col(&rs, 0, 0, TRUE);
237 col_count++;
238 }
239
240 rs.count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) |
241 R300_HIRES_EN;
242
243 count = MAX3(col_count, tex_count, 1);
244 rs.inst_count = count - 1;
245
246 /* Now, after all that, see if we actually need to update the state. */
247 if (memcmp(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block))) {
248 memcpy(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block));
249 r300->rs_block_state.size = 5 + count*2;
250 }
251 }
252
253 /* Update the shader-dependant states. */
254 static void r300_update_derived_shader_state(struct r300_context* r300)
255 {
256 struct r300_vertex_shader* vs = r300->vs_state.state;
257
258 r300_update_rs_block(r300, &vs->outputs, &r300->fs->inputs);
259 }
260
261 static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa)
262 {
263 /* We are interested only in the cases when a new depth or stencil value
264 * can be written and changed. */
265
266 /* We might optionally check for [Z func: never] and inspect the stencil
267 * state in a similar fashion, but it's not terribly important. */
268 return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) ||
269 (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) ||
270 ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) &&
271 (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK));
272 }
273
274 static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa)
275 {
276 /* We are interested only in the cases when alpha testing can kill
277 * a fragment. */
278 uint32_t af = dsa->alpha_function;
279
280 return (af & R300_FG_ALPHA_FUNC_ENABLE) &&
281 (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS;
282 }
283
284 static void r300_update_ztop(struct r300_context* r300)
285 {
286 struct r300_ztop_state* ztop_state =
287 (struct r300_ztop_state*)r300->ztop_state.state;
288
289 /* This is important enough that I felt it warranted a comment.
290 *
291 * According to the docs, these are the conditions where ZTOP must be
292 * disabled:
293 * 1) Alpha testing enabled
294 * 2) Texture kill instructions in fragment shader
295 * 3) Chroma key culling enabled
296 * 4) W-buffering enabled
297 *
298 * The docs claim that for the first three cases, if no ZS writes happen,
299 * then ZTOP can be used.
300 *
301 * (3) will never apply since we do not support chroma-keyed operations.
302 * (4) will need to be re-examined (and this comment updated) if/when
303 * Hyper-Z becomes supported.
304 *
305 * Additionally, the following conditions require disabled ZTOP:
306 * 5) Depth writes in fragment shader
307 * 6) Outstanding occlusion queries
308 *
309 * This register causes stalls all the way from SC to CB when changed,
310 * but it is buffered on-chip so it does not hurt to write it if it has
311 * not changed.
312 *
313 * ~C.
314 */
315
316 /* ZS writes */
317 if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) &&
318 (r300_dsa_alpha_test_enabled(r300->dsa_state.state) ||/* (1) */
319 r300->fs->info.uses_kill)) { /* (2) */
320 ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
321 } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */
322 ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
323 } else if (r300->query_current) { /* (6) */
324 ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
325 } else {
326 ztop_state->z_buffer_top = R300_ZTOP_ENABLE;
327 }
328
329 r300->ztop_state.dirty = TRUE;
330 }
331
332 static void r300_merge_textures_and_samplers(struct r300_context* r300)
333 {
334 struct r300_textures_state *state =
335 (struct r300_textures_state*)r300->textures_state.state;
336 struct r300_texture_sampler_state *texstate;
337 struct r300_sampler_state *sampler;
338 struct r300_texture *tex;
339 unsigned min_level, max_level, i, size;
340 unsigned count = MIN2(state->texture_count, state->sampler_count);
341
342 state->tx_enable = 0;
343 size = 2;
344
345 for (i = 0; i < count; i++) {
346 if (state->fragment_sampler_views[i] && state->sampler_states[i]) {
347 state->tx_enable |= 1 << i;
348
349 tex = (struct r300_texture *)state->fragment_sampler_views[i]->texture;
350 sampler = state->sampler_states[i];
351
352 texstate = &state->regs[i];
353 memcpy(texstate->format, &tex->state, sizeof(uint32_t)*3);
354 texstate->filter[0] = sampler->filter0;
355 texstate->filter[1] = sampler->filter1;
356 texstate->border_color = sampler->border_color;
357 texstate->tile_config = R300_TXO_MACRO_TILE(tex->macrotile) |
358 R300_TXO_MICRO_TILE(tex->microtile);
359
360 /* to emulate 1D textures through 2D ones correctly */
361 if (tex->tex.target == PIPE_TEXTURE_1D) {
362 texstate->filter[0] &= ~R300_TX_WRAP_T_MASK;
363 texstate->filter[0] |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
364 }
365
366 if (tex->is_npot) {
367 /* NPOT textures don't support mip filter, unfortunately.
368 * This prevents incorrect rendering. */
369 texstate->filter[0] &= ~R300_TX_MIN_FILTER_MIP_MASK;
370 } else {
371 /* determine min/max levels */
372 /* the MAX_MIP level is the largest (finest) one */
373 max_level = MIN2(sampler->max_lod, tex->tex.last_level);
374 min_level = MIN2(sampler->min_lod, max_level);
375 texstate->format[0] |= R300_TX_NUM_LEVELS(max_level);
376 texstate->filter[0] |= R300_TX_MAX_MIP_LEVEL(min_level);
377 }
378
379 texstate->filter[0] |= i << 28;
380
381 size += 16;
382 state->count = i+1;
383 }
384 }
385
386 r300->textures_state.size = size;
387 }
388
389 void r300_update_derived_state(struct r300_context* r300)
390 {
391 if (r300->rs_block_state.dirty) {
392 r300_update_derived_shader_state(r300);
393 }
394
395 if (r300->textures_state.dirty) {
396 r300_merge_textures_and_samplers(r300);
397 }
398
399 r300_update_ztop(r300);
400 }