i915g: Use PIPE_FLUSH_END_OF_FRAME to trigger throttling
[mesa.git] / src / gallium / drivers / i915 / i915_state_emit.c
1 /**************************************************************************
2 *
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "i915_reg.h"
30 #include "i915_context.h"
31 #include "i915_batch.h"
32 #include "i915_debug.h"
33 #include "i915_fpc.h"
34 #include "i915_resource.h"
35
36 #include "pipe/p_context.h"
37 #include "pipe/p_defines.h"
38 #include "pipe/p_format.h"
39
40 #include "util/u_format.h"
41 #include "util/u_math.h"
42 #include "util/u_memory.h"
43
44 struct i915_tracked_hw_state {
45 const char *name;
46 void (*validate)(struct i915_context *, unsigned *batch_space);
47 void (*emit)(struct i915_context *);
48 unsigned dirty, batch_space;
49 };
50
51
52 static void
53 validate_flush(struct i915_context *i915, unsigned *batch_space)
54 {
55 *batch_space = i915->flush_dirty ? 1 : 0;
56 }
57
58 static void
59 emit_flush(struct i915_context *i915)
60 {
61 /* Cache handling is very cheap atm. State handling can request to flushes:
62 * - I915_FLUSH_CACHE which is a flush everything request and
63 * - I915_PIPELINE_FLUSH which is specifically for the draw_offset flush.
64 * Because the cache handling is so dumb, no explicit "invalidate map cache".
65 * Also, the first is a strict superset of the latter, so the following logic
66 * works. */
67 if (i915->flush_dirty & I915_FLUSH_CACHE)
68 OUT_BATCH(MI_FLUSH | FLUSH_MAP_CACHE);
69 else if (i915->flush_dirty & I915_PIPELINE_FLUSH)
70 OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);
71 }
72
73 uint32_t invariant_state[] = {
74 _3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | AA_LINE_ECAAR_WIDTH_1_0 |
75 AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0,
76
77 _3DSTATE_DFLT_DIFFUSE_CMD, 0,
78
79 _3DSTATE_DFLT_SPEC_CMD, 0,
80
81 _3DSTATE_DFLT_Z_CMD, 0,
82
83 _3DSTATE_COORD_SET_BINDINGS |
84 CSB_TCB(0, 0) |
85 CSB_TCB(1, 1) |
86 CSB_TCB(2, 2) |
87 CSB_TCB(3, 3) |
88 CSB_TCB(4, 4) |
89 CSB_TCB(5, 5) |
90 CSB_TCB(6, 6) |
91 CSB_TCB(7, 7),
92
93 _3DSTATE_RASTER_RULES_CMD |
94 ENABLE_POINT_RASTER_RULE |
95 OGL_POINT_RASTER_RULE |
96 ENABLE_LINE_STRIP_PROVOKE_VRTX |
97 ENABLE_TRI_FAN_PROVOKE_VRTX |
98 LINE_STRIP_PROVOKE_VRTX(1) |
99 TRI_FAN_PROVOKE_VRTX(2) |
100 ENABLE_TEXKILL_3D_4D |
101 TEXKILL_4D,
102
103 _3DSTATE_DEPTH_SUBRECT_DISABLE,
104
105 /* disable indirect state for now
106 */
107 _3DSTATE_LOAD_INDIRECT | 0, 0};
108
109 static void
110 emit_invariant(struct i915_context *i915)
111 {
112 i915_winsys_batchbuffer_write(i915->batch, invariant_state,
113 Elements(invariant_state)*sizeof(uint32_t));
114 }
115
116 static void
117 validate_immediate(struct i915_context *i915, unsigned *batch_space)
118 {
119 unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
120 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
121 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
122 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
123 i915->immediate_dirty;
124
125 if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0) && i915->vbo)
126 i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo;
127
128 *batch_space = 1 + util_bitcount(dirty);
129 }
130
131 static void
132 emit_immediate(struct i915_context *i915)
133 {
134 /* remove unwanted bits and S7 */
135 unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
136 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
137 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
138 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
139 i915->immediate_dirty;
140 int i, num = util_bitcount(dirty);
141 assert(num && num <= I915_MAX_IMMEDIATE);
142
143 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
144 dirty << 4 | (num - 1));
145
146 if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) {
147 if (i915->vbo)
148 OUT_RELOC(i915->vbo, I915_USAGE_VERTEX,
149 i915->current.immediate[I915_IMMEDIATE_S0]);
150 else
151 OUT_BATCH(0);
152 }
153
154 for (i = 1; i < I915_MAX_IMMEDIATE; i++) {
155 if (dirty & (1 << i)) {
156 /* Fixup blend function for A8 dst buffers.
157 * When we blend to an A8 buffer, the GPU thinks it's a G8 buffer,
158 * and therefore we need to use the color factor for alphas. */
159 if ((i == I915_IMMEDIATE_S6) &&
160 (i915->current.target_fixup_format == PIPE_FORMAT_A8_UNORM)) {
161 uint32_t imm = i915->current.immediate[i];
162 uint32_t srcRGB = (imm >> S6_CBUF_SRC_BLEND_FACT_SHIFT) & BLENDFACT_MASK;
163 if (srcRGB == BLENDFACT_DST_ALPHA)
164 srcRGB = BLENDFACT_DST_COLR;
165 else if (srcRGB == BLENDFACT_INV_DST_ALPHA)
166 srcRGB = BLENDFACT_INV_DST_COLR;
167 imm &= ~SRC_BLND_FACT(BLENDFACT_MASK);
168 imm |= SRC_BLND_FACT(srcRGB);
169 OUT_BATCH(imm);
170 } else {
171 OUT_BATCH(i915->current.immediate[i]);
172 }
173 }
174 }
175 }
176
177 static void
178 validate_dynamic(struct i915_context *i915, unsigned *batch_space)
179 {
180 *batch_space = util_bitcount(i915->dynamic_dirty & ((1 << I915_MAX_DYNAMIC) - 1));
181 }
182
183 static void
184 emit_dynamic(struct i915_context *i915)
185 {
186 int i;
187 for (i = 0; i < I915_MAX_DYNAMIC; i++) {
188 if (i915->dynamic_dirty & (1 << i))
189 OUT_BATCH(i915->current.dynamic[i]);
190 }
191 }
192
193 static void
194 validate_static(struct i915_context *i915, unsigned *batch_space)
195 {
196 *batch_space = 0;
197
198 if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) {
199 i915->validation_buffers[i915->num_validation_buffers++]
200 = i915->current.cbuf_bo;
201 *batch_space += 3;
202 }
203
204 if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) {
205 i915->validation_buffers[i915->num_validation_buffers++]
206 = i915->current.depth_bo;
207 *batch_space += 3;
208 }
209
210 if (i915->static_dirty & I915_DST_VARS)
211 *batch_space += 2;
212
213 if (i915->static_dirty & I915_DST_RECT)
214 *batch_space += 5;
215 }
216
217 static void
218 emit_static(struct i915_context *i915)
219 {
220 if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) {
221 OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
222 OUT_BATCH(i915->current.cbuf_flags);
223 OUT_RELOC(i915->current.cbuf_bo,
224 I915_USAGE_RENDER,
225 0);
226 }
227
228 /* What happens if no zbuf??
229 */
230 if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) {
231 OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
232 OUT_BATCH(i915->current.depth_flags);
233 OUT_RELOC(i915->current.depth_bo,
234 I915_USAGE_RENDER,
235 0);
236 }
237
238 if (i915->static_dirty & I915_DST_VARS) {
239 OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
240 OUT_BATCH(i915->current.dst_buf_vars);
241 }
242 }
243
244 static void
245 validate_map(struct i915_context *i915, unsigned *batch_space)
246 {
247 const uint enabled = i915->current.sampler_enable_flags;
248 uint unit;
249 struct i915_texture *tex;
250
251 *batch_space = i915->current.sampler_enable_nr ?
252 2 + 3*i915->current.sampler_enable_nr : 0;
253
254 for (unit = 0; unit < I915_TEX_UNITS; unit++) {
255 if (enabled & (1 << unit)) {
256 tex = i915_texture(i915->fragment_sampler_views[unit]->texture);
257 i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer;
258 }
259 }
260 }
261
262 static void
263 emit_map(struct i915_context *i915)
264 {
265 const uint nr = i915->current.sampler_enable_nr;
266 if (nr) {
267 const uint enabled = i915->current.sampler_enable_flags;
268 uint unit;
269 uint count = 0;
270 OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
271 OUT_BATCH(enabled);
272 for (unit = 0; unit < I915_TEX_UNITS; unit++) {
273 if (enabled & (1 << unit)) {
274 struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture);
275 struct i915_winsys_buffer *buf = texture->buffer;
276 assert(buf);
277
278 count++;
279
280 OUT_RELOC(buf, I915_USAGE_SAMPLER, 0);
281 OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */
282 OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */
283 }
284 }
285 assert(count == nr);
286 }
287 }
288
289 static void
290 validate_sampler(struct i915_context *i915, unsigned *batch_space)
291 {
292 *batch_space = i915->current.sampler_enable_nr ?
293 2 + 3*i915->current.sampler_enable_nr : 0;
294 }
295
296 static void
297 emit_sampler(struct i915_context *i915)
298 {
299 if (i915->current.sampler_enable_nr) {
300 int i;
301
302 OUT_BATCH( _3DSTATE_SAMPLER_STATE |
303 (3 * i915->current.sampler_enable_nr) );
304
305 OUT_BATCH( i915->current.sampler_enable_flags );
306
307 for (i = 0; i < I915_TEX_UNITS; i++) {
308 if (i915->current.sampler_enable_flags & (1<<i)) {
309 OUT_BATCH( i915->current.sampler[i][0] );
310 OUT_BATCH( i915->current.sampler[i][1] );
311 OUT_BATCH( i915->current.sampler[i][2] );
312 }
313 }
314 }
315 }
316
317 static void
318 validate_constants(struct i915_context *i915, unsigned *batch_space)
319 {
320 int nr = i915->fs->num_constants ?
321 2 + 4*i915->fs->num_constants : 0;
322
323 *batch_space = nr;
324 }
325
326 static void
327 emit_constants(struct i915_context *i915)
328 {
329 /* Collate the user-defined constants with the fragment shader's
330 * immediates according to the constant_flags[] array.
331 */
332 const uint nr = i915->fs->num_constants;
333
334 assert(nr < I915_MAX_CONSTANT);
335 if (nr) {
336 uint i;
337
338 OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) );
339 OUT_BATCH((1 << nr) - 1);
340
341 for (i = 0; i < nr; i++) {
342 const uint *c;
343 if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
344 /* grab user-defined constant */
345 c = (uint *) i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])->data;
346 c += 4 * i;
347 }
348 else {
349 /* emit program constant */
350 c = (uint *) i915->fs->constants[i];
351 }
352 #if 0 /* debug */
353 {
354 float *f = (float *) c;
355 printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
356 (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
357 ? "user" : "immediate"));
358 }
359 #endif
360 OUT_BATCH(*c++);
361 OUT_BATCH(*c++);
362 OUT_BATCH(*c++);
363 OUT_BATCH(*c++);
364 }
365 }
366 }
367
368 static void
369 validate_program(struct i915_context *i915, unsigned *batch_space)
370 {
371 uint additional_size = 0;
372
373 additional_size += i915->current.target_fixup_format ? 3 : 0;
374
375 /* we need more batch space if we want to emulate rgba framebuffers */
376 *batch_space = i915->fs->decl_len + i915->fs->program_len + additional_size;
377 }
378
379 static void
380 emit_program(struct i915_context *i915)
381 {
382 uint additional_size = 0;
383 uint i;
384
385 /* count how much additional space we'll need */
386 validate_program(i915, &additional_size);
387 additional_size -= i915->fs->decl_len + i915->fs->program_len;
388
389 /* we should always have, at least, a pass-through program */
390 assert(i915->fs->program_len > 0);
391
392 /* output the declarations */
393 {
394 /* first word has the size, we have to adjust that */
395 uint size = (i915->fs->decl[0]);
396 size += additional_size;
397 OUT_BATCH(size);
398 }
399
400 for (i = 1 ; i < i915->fs->decl_len; i++)
401 OUT_BATCH(i915->fs->decl[i]);
402
403 /* output the program */
404 assert(i915->fs->program_len % 3 == 0);
405 for (i = 0 ; i < i915->fs->program_len; i+=3) {
406 OUT_BATCH(i915->fs->program[i]);
407 OUT_BATCH(i915->fs->program[i+1]);
408 OUT_BATCH(i915->fs->program[i+2]);
409 }
410
411 /* we emit an additional mov with swizzle to fake RGBA framebuffers */
412 if (i915->current.target_fixup_format) {
413 /* mov out_color, out_color.zyxw */
414 OUT_BATCH(A0_MOV |
415 (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) |
416 A0_DEST_CHANNEL_ALL |
417 (REG_TYPE_OC << A0_SRC0_TYPE_SHIFT) |
418 (T_DIFFUSE << A0_SRC0_NR_SHIFT));
419 OUT_BATCH(i915->current.fixup_swizzle);
420 OUT_BATCH(0);
421 }
422 }
423
424 static void
425 emit_draw_rect(struct i915_context *i915)
426 {
427 if (i915->static_dirty & I915_DST_RECT) {
428 OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
429 OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS);
430 OUT_BATCH(i915->current.draw_offset);
431 OUT_BATCH(i915->current.draw_size);
432 OUT_BATCH(i915->current.draw_offset);
433 }
434 }
435
436 static boolean
437 i915_validate_state(struct i915_context *i915, unsigned *batch_space)
438 {
439 unsigned tmp;
440
441 i915->num_validation_buffers = 0;
442 if (i915->hardware_dirty & I915_HW_INVARIANT)
443 *batch_space = Elements(invariant_state);
444 else
445 *batch_space = 0;
446
447 #if 0
448 static int counter_total = 0;
449 #define VALIDATE_ATOM(atom, hw_dirty) \
450 if (i915->hardware_dirty & hw_dirty) { \
451 static int counter_##atom = 0;\
452 validate_##atom(i915, &tmp); \
453 *batch_space += tmp;\
454 counter_##atom += tmp;\
455 counter_total += tmp;\
456 printf("%s: \t%d/%d \t%2.2f\n",#atom, counter_##atom, counter_total, counter_##atom*100.f/counter_total);}
457 #else
458 #define VALIDATE_ATOM(atom, hw_dirty) \
459 if (i915->hardware_dirty & hw_dirty) { \
460 validate_##atom(i915, &tmp); \
461 *batch_space += tmp; }
462 #endif
463 VALIDATE_ATOM(flush, I915_HW_FLUSH);
464 VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE);
465 VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC);
466 VALIDATE_ATOM(static, I915_HW_STATIC);
467 VALIDATE_ATOM(map, I915_HW_MAP);
468 VALIDATE_ATOM(sampler, I915_HW_SAMPLER);
469 VALIDATE_ATOM(constants, I915_HW_CONSTANTS);
470 VALIDATE_ATOM(program, I915_HW_PROGRAM);
471 #undef VALIDATE_ATOM
472
473 if (i915->num_validation_buffers == 0)
474 return TRUE;
475
476 if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers,
477 i915->num_validation_buffers))
478 return FALSE;
479
480 return TRUE;
481 }
482
483 /* Push the state into the sarea and/or texture memory.
484 */
485 void
486 i915_emit_hardware_state(struct i915_context *i915 )
487 {
488 unsigned batch_space;
489 uintptr_t save_ptr;
490
491 assert(i915->dirty == 0);
492
493 if (I915_DBG_ON(DBG_ATOMS))
494 i915_dump_hardware_dirty(i915, __FUNCTION__);
495
496 if (!i915_validate_state(i915, &batch_space)) {
497 FLUSH_BATCH(NULL, I915_FLUSH_ASYNC);
498 assert(i915_validate_state(i915, &batch_space));
499 }
500
501 if(!BEGIN_BATCH(batch_space)) {
502 FLUSH_BATCH(NULL, I915_FLUSH_ASYNC);
503 assert(i915_validate_state(i915, &batch_space));
504 assert(BEGIN_BATCH(batch_space));
505 }
506
507 save_ptr = (uintptr_t)i915->batch->ptr;
508
509 #define EMIT_ATOM(atom, hw_dirty) \
510 if (i915->hardware_dirty & hw_dirty) \
511 emit_##atom(i915);
512 EMIT_ATOM(flush, I915_HW_FLUSH);
513 EMIT_ATOM(invariant, I915_HW_INVARIANT);
514 EMIT_ATOM(immediate, I915_HW_IMMEDIATE);
515 EMIT_ATOM(dynamic, I915_HW_DYNAMIC);
516 EMIT_ATOM(static, I915_HW_STATIC);
517 EMIT_ATOM(map, I915_HW_MAP);
518 EMIT_ATOM(sampler, I915_HW_SAMPLER);
519 EMIT_ATOM(constants, I915_HW_CONSTANTS);
520 EMIT_ATOM(program, I915_HW_PROGRAM);
521 EMIT_ATOM(draw_rect, I915_HW_STATIC);
522 #undef EMIT_ATOM
523
524 I915_DBG(DBG_EMIT, "%s: used %d dwords, %d dwords reserved\n", __FUNCTION__,
525 ((uintptr_t)i915->batch->ptr - save_ptr) / 4,
526 batch_space);
527 assert(((uintptr_t)i915->batch->ptr - save_ptr) / 4 == batch_space);
528
529 i915->hardware_dirty = 0;
530 i915->immediate_dirty = 0;
531 i915->dynamic_dirty = 0;
532 i915->static_dirty = 0;
533 i915->flush_dirty = 0;
534 }