i915g: Fix the blending for the A8 destination buffer case.
[mesa.git] / src / gallium / drivers / i915 / i915_state_emit.c
1 /**************************************************************************
2 *
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "i915_reg.h"
30 #include "i915_context.h"
31 #include "i915_batch.h"
32 #include "i915_debug.h"
33 #include "i915_resource.h"
34
35 #include "pipe/p_context.h"
36 #include "pipe/p_defines.h"
37 #include "pipe/p_format.h"
38
39 #include "util/u_format.h"
40 #include "util/u_math.h"
41 #include "util/u_memory.h"
42
43 struct i915_tracked_hw_state {
44 const char *name;
45 void (*validate)(struct i915_context *, unsigned *batch_space);
46 void (*emit)(struct i915_context *);
47 unsigned dirty, batch_space;
48 };
49
50
51 static void
52 validate_flush(struct i915_context *i915, unsigned *batch_space)
53 {
54 *batch_space = i915->flush_dirty ? 1 : 0;
55 }
56
57 static void
58 emit_flush(struct i915_context *i915)
59 {
60 /* Cache handling is very cheap atm. State handling can request to flushes:
61 * - I915_FLUSH_CACHE which is a flush everything request and
62 * - I915_PIPELINE_FLUSH which is specifically for the draw_offset flush.
63 * Because the cache handling is so dumb, no explicit "invalidate map cache".
64 * Also, the first is a strict superset of the latter, so the following logic
65 * works. */
66 if (i915->flush_dirty & I915_FLUSH_CACHE)
67 OUT_BATCH(MI_FLUSH | FLUSH_MAP_CACHE);
68 else if (i915->flush_dirty & I915_PIPELINE_FLUSH)
69 OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);
70 }
71
72 uint32_t invariant_state[] = {
73 _3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | AA_LINE_ECAAR_WIDTH_1_0 |
74 AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0,
75
76 _3DSTATE_DFLT_DIFFUSE_CMD, 0,
77
78 _3DSTATE_DFLT_SPEC_CMD, 0,
79
80 _3DSTATE_DFLT_Z_CMD, 0,
81
82 _3DSTATE_COORD_SET_BINDINGS |
83 CSB_TCB(0, 0) |
84 CSB_TCB(1, 1) |
85 CSB_TCB(2, 2) |
86 CSB_TCB(3, 3) |
87 CSB_TCB(4, 4) |
88 CSB_TCB(5, 5) |
89 CSB_TCB(6, 6) |
90 CSB_TCB(7, 7),
91
92 _3DSTATE_RASTER_RULES_CMD |
93 ENABLE_POINT_RASTER_RULE |
94 OGL_POINT_RASTER_RULE |
95 ENABLE_LINE_STRIP_PROVOKE_VRTX |
96 ENABLE_TRI_FAN_PROVOKE_VRTX |
97 LINE_STRIP_PROVOKE_VRTX(1) |
98 TRI_FAN_PROVOKE_VRTX(2) |
99 ENABLE_TEXKILL_3D_4D |
100 TEXKILL_4D,
101
102 _3DSTATE_DEPTH_SUBRECT_DISABLE,
103
104 /* disable indirect state for now
105 */
106 _3DSTATE_LOAD_INDIRECT | 0, 0};
107
108 static void
109 emit_invariant(struct i915_context *i915)
110 {
111 i915_winsys_batchbuffer_write(i915->batch, invariant_state,
112 Elements(invariant_state)*sizeof(uint32_t));
113 }
114
115 static void
116 validate_immediate(struct i915_context *i915, unsigned *batch_space)
117 {
118 unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
119 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
120 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
121 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
122 i915->immediate_dirty;
123
124 if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0) && i915->vbo)
125 i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo;
126
127 *batch_space = 1 + util_bitcount(dirty);
128 }
129
130 static void
131 emit_immediate(struct i915_context *i915)
132 {
133 /* remove unwanted bits and S7 */
134 unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
135 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
136 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
137 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
138 i915->immediate_dirty;
139 int i, num = util_bitcount(dirty);
140 assert(num && num <= I915_MAX_IMMEDIATE);
141
142 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
143 dirty << 4 | (num - 1));
144
145 if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) {
146 if (i915->vbo)
147 OUT_RELOC(i915->vbo, I915_USAGE_VERTEX,
148 i915->current.immediate[I915_IMMEDIATE_S0]);
149 else
150 OUT_BATCH(0);
151 }
152
153 for (i = 1; i < I915_MAX_IMMEDIATE; i++) {
154 if (dirty & (1 << i)) {
155 /* Fixup blend function for A8 dst buffers.
156 * When we blend to an A8 buffer, the GPU thinks it's a G8 buffer,
157 * and therefore we need to use the color factor for alphas. */
158 if ((i == I915_IMMEDIATE_S6) &&
159 (i915->current.target_fixup_format == PIPE_FORMAT_A8_UNORM)) {
160 uint32_t imm = i915->current.immediate[i];
161 uint32_t srcRGB = (imm >> S6_CBUF_SRC_BLEND_FACT_SHIFT) & BLENDFACT_MASK;
162 if (srcRGB == BLENDFACT_DST_ALPHA)
163 srcRGB = BLENDFACT_DST_COLR;
164 else if (srcRGB == BLENDFACT_INV_DST_ALPHA)
165 srcRGB = BLENDFACT_INV_DST_COLR;
166 imm &= ~SRC_BLND_FACT(BLENDFACT_MASK);
167 imm |= SRC_BLND_FACT(srcRGB);
168 OUT_BATCH(imm);
169 } else {
170 OUT_BATCH(i915->current.immediate[i]);
171 }
172 }
173 }
174 }
175
176 static void
177 validate_dynamic(struct i915_context *i915, unsigned *batch_space)
178 {
179 *batch_space = util_bitcount(i915->dynamic_dirty & ((1 << I915_MAX_DYNAMIC) - 1));
180 }
181
182 static void
183 emit_dynamic(struct i915_context *i915)
184 {
185 int i;
186 for (i = 0; i < I915_MAX_DYNAMIC; i++) {
187 if (i915->dynamic_dirty & (1 << i))
188 OUT_BATCH(i915->current.dynamic[i]);
189 }
190 }
191
192 static void
193 validate_static(struct i915_context *i915, unsigned *batch_space)
194 {
195 *batch_space = 0;
196
197 if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) {
198 i915->validation_buffers[i915->num_validation_buffers++]
199 = i915->current.cbuf_bo;
200 *batch_space += 3;
201 }
202
203 if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) {
204 i915->validation_buffers[i915->num_validation_buffers++]
205 = i915->current.depth_bo;
206 *batch_space += 3;
207 }
208
209 if (i915->static_dirty & I915_DST_VARS)
210 *batch_space += 2;
211
212 if (i915->static_dirty & I915_DST_RECT)
213 *batch_space += 5;
214 }
215
216 static void
217 emit_static(struct i915_context *i915)
218 {
219 if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) {
220 OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
221 OUT_BATCH(i915->current.cbuf_flags);
222 OUT_RELOC(i915->current.cbuf_bo,
223 I915_USAGE_RENDER,
224 0);
225 }
226
227 /* What happens if no zbuf??
228 */
229 if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) {
230 OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
231 OUT_BATCH(i915->current.depth_flags);
232 OUT_RELOC(i915->current.depth_bo,
233 I915_USAGE_RENDER,
234 0);
235 }
236
237 if (i915->static_dirty & I915_DST_VARS) {
238 OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
239 OUT_BATCH(i915->current.dst_buf_vars);
240 }
241 }
242
243 static void
244 validate_map(struct i915_context *i915, unsigned *batch_space)
245 {
246 const uint enabled = i915->current.sampler_enable_flags;
247 uint unit;
248 struct i915_texture *tex;
249
250 *batch_space = i915->current.sampler_enable_nr ?
251 2 + 3*i915->current.sampler_enable_nr : 0;
252
253 for (unit = 0; unit < I915_TEX_UNITS; unit++) {
254 if (enabled & (1 << unit)) {
255 tex = i915_texture(i915->fragment_sampler_views[unit]->texture);
256 i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer;
257 }
258 }
259 }
260
261 static void
262 emit_map(struct i915_context *i915)
263 {
264 const uint nr = i915->current.sampler_enable_nr;
265 if (nr) {
266 const uint enabled = i915->current.sampler_enable_flags;
267 uint unit;
268 uint count = 0;
269 OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
270 OUT_BATCH(enabled);
271 for (unit = 0; unit < I915_TEX_UNITS; unit++) {
272 if (enabled & (1 << unit)) {
273 struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture);
274 struct i915_winsys_buffer *buf = texture->buffer;
275 assert(buf);
276
277 count++;
278
279 OUT_RELOC(buf, I915_USAGE_SAMPLER, 0);
280 OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */
281 OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */
282 }
283 }
284 assert(count == nr);
285 }
286 }
287
288 static void
289 validate_sampler(struct i915_context *i915, unsigned *batch_space)
290 {
291 *batch_space = i915->current.sampler_enable_nr ?
292 2 + 3*i915->current.sampler_enable_nr : 0;
293 }
294
295 static void
296 emit_sampler(struct i915_context *i915)
297 {
298 if (i915->current.sampler_enable_nr) {
299 int i;
300
301 OUT_BATCH( _3DSTATE_SAMPLER_STATE |
302 (3 * i915->current.sampler_enable_nr) );
303
304 OUT_BATCH( i915->current.sampler_enable_flags );
305
306 for (i = 0; i < I915_TEX_UNITS; i++) {
307 if (i915->current.sampler_enable_flags & (1<<i)) {
308 OUT_BATCH( i915->current.sampler[i][0] );
309 OUT_BATCH( i915->current.sampler[i][1] );
310 OUT_BATCH( i915->current.sampler[i][2] );
311 }
312 }
313 }
314 }
315
316 static void
317 validate_constants(struct i915_context *i915, unsigned *batch_space)
318 {
319 *batch_space = i915->fs->num_constants ?
320 2 + 4*i915->fs->num_constants : 0;
321 }
322
323 static void
324 emit_constants(struct i915_context *i915)
325 {
326 /* Collate the user-defined constants with the fragment shader's
327 * immediates according to the constant_flags[] array.
328 */
329 const uint nr = i915->fs->num_constants;
330 if (nr) {
331 uint i;
332
333 OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) );
334 OUT_BATCH((1 << nr) - 1);
335
336 for (i = 0; i < nr; i++) {
337 const uint *c;
338 if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
339 /* grab user-defined constant */
340 c = (uint *) i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])->data;
341 c += 4 * i;
342 }
343 else {
344 /* emit program constant */
345 c = (uint *) i915->fs->constants[i];
346 }
347 #if 0 /* debug */
348 {
349 float *f = (float *) c;
350 printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
351 (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
352 ? "user" : "immediate"));
353 }
354 #endif
355 OUT_BATCH(*c++);
356 OUT_BATCH(*c++);
357 OUT_BATCH(*c++);
358 OUT_BATCH(*c++);
359 }
360 }
361 }
362
363 static void
364 validate_program(struct i915_context *i915, unsigned *batch_space)
365 {
366 uint additional_size = i915->current.target_fixup_format ? 1 : 0;
367
368 /* we need more batch space if we want to emulate rgba framebuffers */
369 *batch_space = i915->fs->program_len + 3 * additional_size;
370 }
371
372 static void
373 emit_program(struct i915_context *i915)
374 {
375 uint need_target_fixup = i915->current.target_fixup_format ? 1 : 0;
376 uint i;
377
378 /* we should always have, at least, a pass-through program */
379 assert(i915->fs->program_len > 0);
380
381 {
382 /* first word has the size, we have to adjust that */
383 uint size = (i915->fs->program[0]);
384 size += need_target_fixup * 3;
385 OUT_BATCH(size);
386 }
387
388 /* output the declarations of the program */
389 for (i=1 ; i < i915->fs->program_len; i++)
390 OUT_BATCH(i915->fs->program[i]);
391
392 /* we emit an additional mov with swizzle to fake RGBA framebuffers */
393 if (need_target_fixup) {
394 /* mov out_color, out_color.zyxw */
395 OUT_BATCH(A0_MOV |
396 (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) |
397 A0_DEST_CHANNEL_ALL |
398 (REG_TYPE_OC << A0_SRC0_TYPE_SHIFT) |
399 (T_DIFFUSE << A0_SRC0_NR_SHIFT));
400 OUT_BATCH(i915->current.fixup_swizzle);
401 OUT_BATCH(0);
402 }
403 }
404
405 static void
406 emit_draw_rect(struct i915_context *i915)
407 {
408 if (i915->static_dirty & I915_DST_RECT) {
409 OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
410 OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS);
411 OUT_BATCH(i915->current.draw_offset);
412 OUT_BATCH(i915->current.draw_size);
413 OUT_BATCH(i915->current.draw_offset);
414 }
415 }
416
417 static boolean
418 i915_validate_state(struct i915_context *i915, unsigned *batch_space)
419 {
420 unsigned tmp;
421
422 i915->num_validation_buffers = 0;
423 if (i915->hardware_dirty & I915_HW_INVARIANT)
424 *batch_space = Elements(invariant_state);
425 else
426 *batch_space = 0;
427
428 #define VALIDATE_ATOM(atom, hw_dirty) \
429 if (i915->hardware_dirty & hw_dirty) { \
430 validate_##atom(i915, &tmp); \
431 *batch_space += tmp; }
432 VALIDATE_ATOM(flush, I915_HW_FLUSH);
433 VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE);
434 VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC);
435 VALIDATE_ATOM(static, I915_HW_STATIC);
436 VALIDATE_ATOM(map, I915_HW_MAP);
437 VALIDATE_ATOM(sampler, I915_HW_SAMPLER);
438 VALIDATE_ATOM(constants, I915_HW_CONSTANTS);
439 VALIDATE_ATOM(program, I915_HW_PROGRAM);
440 #undef VALIDATE_ATOM
441
442 if (i915->num_validation_buffers == 0)
443 return TRUE;
444
445 if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers,
446 i915->num_validation_buffers))
447 return FALSE;
448
449 return TRUE;
450 }
451
452 /* Push the state into the sarea and/or texture memory.
453 */
454 void
455 i915_emit_hardware_state(struct i915_context *i915 )
456 {
457 unsigned batch_space;
458 uintptr_t save_ptr;
459
460 assert(i915->dirty == 0);
461
462 if (I915_DBG_ON(DBG_ATOMS))
463 i915_dump_hardware_dirty(i915, __FUNCTION__);
464
465 if (!i915_validate_state(i915, &batch_space)) {
466 FLUSH_BATCH(NULL);
467 assert(i915_validate_state(i915, &batch_space));
468 }
469
470 if(!BEGIN_BATCH(batch_space)) {
471 FLUSH_BATCH(NULL);
472 assert(i915_validate_state(i915, &batch_space));
473 assert(BEGIN_BATCH(batch_space));
474 }
475
476 save_ptr = (uintptr_t)i915->batch->ptr;
477
478 #define EMIT_ATOM(atom, hw_dirty) \
479 if (i915->hardware_dirty & hw_dirty) \
480 emit_##atom(i915);
481 EMIT_ATOM(flush, I915_HW_FLUSH);
482 EMIT_ATOM(invariant, I915_HW_INVARIANT);
483 EMIT_ATOM(immediate, I915_HW_IMMEDIATE);
484 EMIT_ATOM(dynamic, I915_HW_DYNAMIC);
485 EMIT_ATOM(static, I915_HW_STATIC);
486 EMIT_ATOM(map, I915_HW_MAP);
487 EMIT_ATOM(sampler, I915_HW_SAMPLER);
488 EMIT_ATOM(constants, I915_HW_CONSTANTS);
489 EMIT_ATOM(program, I915_HW_PROGRAM);
490 EMIT_ATOM(draw_rect, I915_HW_STATIC);
491 #undef EMIT_ATOM
492
493 I915_DBG(DBG_EMIT, "%s: used %d dwords, %d dwords reserved\n", __FUNCTION__,
494 ((uintptr_t)i915->batch->ptr - save_ptr) / 4,
495 batch_space);
496 assert(((uintptr_t)i915->batch->ptr - save_ptr) / 4 == batch_space);
497
498 i915->hardware_dirty = 0;
499 i915->immediate_dirty = 0;
500 i915->dynamic_dirty = 0;
501 i915->static_dirty = 0;
502 i915->flush_dirty = 0;
503 }